mirror of
https://github.com/archlinuxarm/PKGBUILDs.git
synced 2024-11-18 22:54:00 +00:00
26961 lines
1 MiB
26961 lines
1 MiB
From b68f15b7a57f7df52d955f7f75f9df3b78041764 Mon Sep 17 00:00:00 2001
|
|
Message-Id: <b68f15b7a57f7df52d955f7f75f9df3b78041764.1553026141.git.jan.steffens@gmail.com>
|
|
From: "Jan Alexander Steffens (heftig)" <jan.steffens@gmail.com>
|
|
Date: Tue, 19 Mar 2019 20:45:22 +0100
|
|
Subject: [PATCH] bz 1468911
|
|
|
|
https://bugzilla.mozilla.org/show_bug.cgi?id=1521249
|
|
---
|
|
.cargo/config.in | 5 +
|
|
Cargo.lock | 53 +-
|
|
Cargo.toml | 1 +
|
|
third_party/rust/cfg-if/.cargo-checksum.json | 2 +-
|
|
third_party/rust/cfg-if/Cargo.toml | 28 +-
|
|
third_party/rust/cfg-if/README.md | 18 +-
|
|
third_party/rust/cfg-if/src/lib.rs | 53 +-
|
|
.../rust/encoding_rs/.cargo-checksum.json | 2 +-
|
|
third_party/rust/encoding_rs/Cargo.toml | 12 +-
|
|
third_party/rust/encoding_rs/README.md | 52 +-
|
|
third_party/rust/encoding_rs/build.rs | 8 +
|
|
third_party/rust/encoding_rs/src/handles.rs | 2 +-
|
|
third_party/rust/encoding_rs/src/lib.rs | 7 +-
|
|
third_party/rust/encoding_rs/src/mem.rs | 24 +-
|
|
.../rust/encoding_rs/src/simd_funcs.rs | 93 +-
|
|
.../rust/encoding_rs/src/x_user_defined.rs | 7 +-
|
|
third_party/rust/packed_simd/.appveyor.yml | 59 +
|
|
.../rust/packed_simd/.cargo-checksum.json | 1 +
|
|
third_party/rust/packed_simd/.travis.yml | 308 ++++
|
|
third_party/rust/packed_simd/Cargo.toml | 42 +
|
|
.../rust/{simd => packed_simd}/LICENSE-APACHE | 0
|
|
.../rust/{simd => packed_simd}/LICENSE-MIT | 4 +-
|
|
third_party/rust/packed_simd/bors.toml | 3 +
|
|
third_party/rust/packed_simd/build.rs | 8 +
|
|
third_party/rust/packed_simd/ci/all.sh | 71 +
|
|
.../packed_simd/ci/android-install-ndk.sh | 37 +
|
|
.../packed_simd/ci/android-install-sdk.sh | 60 +
|
|
.../rust/packed_simd/ci/android-sysimage.sh | 56 +
|
|
third_party/rust/packed_simd/ci/benchmark.sh | 32 +
|
|
.../ci/deploy_and_run_on_ios_simulator.rs | 176 +++
|
|
.../docker/aarch64-linux-android/Dockerfile | 47 +
|
|
.../aarch64-unknown-linux-gnu/Dockerfile | 14 +
|
|
.../docker/arm-linux-androideabi/Dockerfile | 47 +
|
|
.../arm-unknown-linux-gnueabi/Dockerfile | 15 +
|
|
.../arm-unknown-linux-gnueabihf/Dockerfile | 13 +
|
|
.../armv7-unknown-linux-gnueabihf/Dockerfile | 13 +
|
|
.../docker/i586-unknown-linux-gnu/Dockerfile | 7 +
|
|
.../docker/i686-unknown-linux-gnu/Dockerfile | 7 +
|
|
.../docker/mips-unknown-linux-gnu/Dockerfile | 13 +
|
|
.../mips64-unknown-linux-gnuabi64/Dockerfile | 10 +
|
|
.../Dockerfile | 10 +
|
|
.../mipsel-unknown-linux-musl/Dockerfile | 25 +
|
|
.../powerpc-unknown-linux-gnu/Dockerfile | 12 +
|
|
.../powerpc64-unknown-linux-gnu/Dockerfile | 17 +
|
|
.../powerpc64le-unknown-linux-gnu/Dockerfile | 11 +
|
|
.../docker/s390x-unknown-linux-gnu/Dockerfile | 20 +
|
|
.../sparc64-unknown-linux-gnu/Dockerfile | 18 +
|
|
.../thumbv7neon-linux-androideabi/Dockerfile | 47 +
|
|
.../Dockerfile | 13 +
|
|
.../docker/wasm32-unknown-unknown/Dockerfile | 37 +
|
|
.../ci/docker/x86_64-linux-android/Dockerfile | 29 +
|
|
.../Dockerfile | 16 +
|
|
.../x86_64-unknown-linux-gnu/Dockerfile | 10 +
|
|
third_party/rust/packed_simd/ci/dox.sh | 24 +
|
|
.../rust/packed_simd/ci/linux-s390x.sh | 18 +
|
|
.../rust/packed_simd/ci/linux-sparc64.sh | 17 +
|
|
third_party/rust/packed_simd/ci/lld-shim.rs | 11 +
|
|
.../rust/packed_simd/ci/max_line_width.sh | 17 +
|
|
third_party/rust/packed_simd/ci/run-docker.sh | 38 +
|
|
third_party/rust/packed_simd/ci/run.sh | 96 ++
|
|
.../rust/packed_simd/ci/run_examples.sh | 51 +
|
|
.../rust/packed_simd/ci/runtest-android.rs | 45 +
|
|
.../rust/packed_simd/ci/setup_benchmarks.sh | 10 +
|
|
.../rust/packed_simd/ci/test-runner-linux | 24 +
|
|
third_party/rust/packed_simd/contributing.md | 67 +
|
|
.../rust/packed_simd/perf-guide/.gitignore | 1 +
|
|
.../rust/packed_simd/perf-guide/book.toml | 12 +
|
|
.../packed_simd/perf-guide/src/SUMMARY.md | 21 +
|
|
.../rust/packed_simd/perf-guide/src/ascii.css | 4 +
|
|
.../perf-guide/src/bound_checks.md | 22 +
|
|
.../perf-guide/src/float-math/approx.md | 8 +
|
|
.../perf-guide/src/float-math/fma.md | 6 +
|
|
.../perf-guide/src/float-math/fp.md | 3 +
|
|
.../perf-guide/src/float-math/svml.md | 7 +
|
|
.../perf-guide/src/introduction.md | 26 +
|
|
.../packed_simd/perf-guide/src/prof/linux.md | 107 ++
|
|
.../packed_simd/perf-guide/src/prof/mca.md | 100 ++
|
|
.../perf-guide/src/prof/profiling.md | 14 +
|
|
.../src/target-feature/attribute.md | 5 +
|
|
.../perf-guide/src/target-feature/features.md | 13 +
|
|
.../perf-guide/src/target-feature/inlining.md | 5 +
|
|
.../perf-guide/src/target-feature/practice.md | 31 +
|
|
.../perf-guide/src/target-feature/runtime.md | 5 +
|
|
.../src/target-feature/rustflags.md | 77 +
|
|
.../perf-guide/src/vert-hor-ops.md | 76 +
|
|
third_party/rust/packed_simd/readme.md | 182 +++
|
|
third_party/rust/packed_simd/rustfmt.toml | 7 +
|
|
third_party/rust/packed_simd/src/api.rs | 301 ++++
|
|
.../rust/packed_simd/src/api/bit_manip.rs | 128 ++
|
|
third_party/rust/packed_simd/src/api/cast.rs | 108 ++
|
|
.../rust/packed_simd/src/api/cast/macros.rs | 82 +
|
|
.../rust/packed_simd/src/api/cast/v128.rs | 79 +
|
|
.../rust/packed_simd/src/api/cast/v16.rs | 17 +
|
|
.../rust/packed_simd/src/api/cast/v256.rs | 81 +
|
|
.../rust/packed_simd/src/api/cast/v32.rs | 30 +
|
|
.../rust/packed_simd/src/api/cast/v512.rs | 68 +
|
|
.../rust/packed_simd/src/api/cast/v64.rs | 47 +
|
|
third_party/rust/packed_simd/src/api/cmp.rs | 16 +
|
|
.../rust/packed_simd/src/api/cmp/eq.rs | 27 +
|
|
.../rust/packed_simd/src/api/cmp/ord.rs | 43 +
|
|
.../packed_simd/src/api/cmp/partial_eq.rs | 67 +
|
|
.../packed_simd/src/api/cmp/partial_ord.rs | 234 +++
|
|
.../rust/packed_simd/src/api/cmp/vertical.rs | 114 ++
|
|
.../rust/packed_simd/src/api/default.rs | 28 +
|
|
third_party/rust/packed_simd/src/api/fmt.rs | 12 +
|
|
.../rust/packed_simd/src/api/fmt/binary.rs | 56 +
|
|
.../rust/packed_simd/src/api/fmt/debug.rs | 62 +
|
|
.../rust/packed_simd/src/api/fmt/lower_hex.rs | 56 +
|
|
.../rust/packed_simd/src/api/fmt/octal.rs | 56 +
|
|
.../rust/packed_simd/src/api/fmt/upper_hex.rs | 56 +
|
|
third_party/rust/packed_simd/src/api/from.rs | 7 +
|
|
.../packed_simd/src/api/from/from_array.rs | 121 ++
|
|
.../packed_simd/src/api/from/from_vector.rs | 67 +
|
|
third_party/rust/packed_simd/src/api/hash.rs | 47 +
|
|
.../rust/packed_simd/src/api/into_bits.rs | 59 +
|
|
.../src/api/into_bits/arch_specific.rs | 190 +++
|
|
.../packed_simd/src/api/into_bits/macros.rs | 74 +
|
|
.../packed_simd/src/api/into_bits/v128.rs | 28 +
|
|
.../rust/packed_simd/src/api/into_bits/v16.rs | 9 +
|
|
.../packed_simd/src/api/into_bits/v256.rs | 27 +
|
|
.../rust/packed_simd/src/api/into_bits/v32.rs | 13 +
|
|
.../packed_simd/src/api/into_bits/v512.rs | 27 +
|
|
.../rust/packed_simd/src/api/into_bits/v64.rs | 18 +
|
|
third_party/rust/packed_simd/src/api/math.rs | 4 +
|
|
.../rust/packed_simd/src/api/math/float.rs | 61 +
|
|
.../packed_simd/src/api/math/float/abs.rs | 31 +
|
|
.../packed_simd/src/api/math/float/consts.rs | 86 +
|
|
.../packed_simd/src/api/math/float/cos.rs | 44 +
|
|
.../packed_simd/src/api/math/float/exp.rs | 33 +
|
|
.../rust/packed_simd/src/api/math/float/ln.rs | 33 +
|
|
.../packed_simd/src/api/math/float/mul_add.rs | 44 +
|
|
.../src/api/math/float/mul_adde.rs | 48 +
|
|
.../packed_simd/src/api/math/float/powf.rs | 36 +
|
|
.../packed_simd/src/api/math/float/recpre.rs | 36 +
|
|
.../packed_simd/src/api/math/float/rsqrte.rs | 40 +
|
|
.../packed_simd/src/api/math/float/sin.rs | 50 +
|
|
.../packed_simd/src/api/math/float/sqrt.rs | 35 +
|
|
.../packed_simd/src/api/math/float/sqrte.rs | 44 +
|
|
.../rust/packed_simd/src/api/minimal.rs | 6 +
|
|
.../rust/packed_simd/src/api/minimal/iuf.rs | 167 ++
|
|
.../rust/packed_simd/src/api/minimal/mask.rs | 174 +++
|
|
.../rust/packed_simd/src/api/minimal/ptr.rs | 1385 +++++++++++++++++
|
|
third_party/rust/packed_simd/src/api/ops.rs | 32 +
|
|
.../src/api/ops/scalar_arithmetic.rs | 203 +++
|
|
.../packed_simd/src/api/ops/scalar_bitwise.rs | 162 ++
|
|
.../src/api/ops/scalar_mask_bitwise.rs | 140 ++
|
|
.../packed_simd/src/api/ops/scalar_shifts.rs | 107 ++
|
|
.../src/api/ops/vector_arithmetic.rs | 148 ++
|
|
.../packed_simd/src/api/ops/vector_bitwise.rs | 129 ++
|
|
.../src/api/ops/vector_float_min_max.rs | 69 +
|
|
.../src/api/ops/vector_int_min_max.rs | 57 +
|
|
.../src/api/ops/vector_mask_bitwise.rs | 116 ++
|
|
.../packed_simd/src/api/ops/vector_neg.rs | 43 +
|
|
.../packed_simd/src/api/ops/vector_rotates.rs | 90 ++
|
|
.../packed_simd/src/api/ops/vector_shifts.rs | 107 ++
|
|
third_party/rust/packed_simd/src/api/ptr.rs | 4 +
|
|
.../packed_simd/src/api/ptr/gather_scatter.rs | 241 +++
|
|
.../rust/packed_simd/src/api/reductions.rs | 12 +
|
|
.../packed_simd/src/api/reductions/bitwise.rs | 151 ++
|
|
.../src/api/reductions/float_arithmetic.rs | 312 ++++
|
|
.../src/api/reductions/integer_arithmetic.rs | 197 +++
|
|
.../packed_simd/src/api/reductions/mask.rs | 89 ++
|
|
.../packed_simd/src/api/reductions/min_max.rs | 377 +++++
|
|
.../rust/packed_simd/src/api/select.rs | 75 +
|
|
.../rust/packed_simd/src/api/shuffle.rs | 190 +++
|
|
.../rust/packed_simd/src/api/shuffle1_dyn.rs | 159 ++
|
|
third_party/rust/packed_simd/src/api/slice.rs | 7 +
|
|
.../packed_simd/src/api/slice/from_slice.rs | 216 +++
|
|
.../src/api/slice/write_to_slice.rs | 211 +++
|
|
.../rust/packed_simd/src/api/swap_bytes.rs | 192 +++
|
|
third_party/rust/packed_simd/src/codegen.rs | 59 +
|
|
.../rust/packed_simd/src/codegen/bit_manip.rs | 354 +++++
|
|
.../rust/packed_simd/src/codegen/llvm.rs | 99 ++
|
|
.../rust/packed_simd/src/codegen/math.rs | 3 +
|
|
.../packed_simd/src/codegen/math/float.rs | 18 +
|
|
.../packed_simd/src/codegen/math/float/abs.rs | 103 ++
|
|
.../packed_simd/src/codegen/math/float/cos.rs | 103 ++
|
|
.../src/codegen/math/float/cos_pi.rs | 87 ++
|
|
.../packed_simd/src/codegen/math/float/exp.rs | 112 ++
|
|
.../packed_simd/src/codegen/math/float/ln.rs | 112 ++
|
|
.../src/codegen/math/float/macros.rs | 559 +++++++
|
|
.../src/codegen/math/float/mul_add.rs | 109 ++
|
|
.../src/codegen/math/float/mul_adde.rs | 66 +
|
|
.../src/codegen/math/float/powf.rs | 112 ++
|
|
.../packed_simd/src/codegen/math/float/sin.rs | 103 ++
|
|
.../src/codegen/math/float/sin_cos_pi.rs | 195 +++
|
|
.../src/codegen/math/float/sin_pi.rs | 87 ++
|
|
.../src/codegen/math/float/sqrt.rs | 103 ++
|
|
.../src/codegen/math/float/sqrte.rs | 67 +
|
|
.../src/codegen/pointer_sized_int.rs | 28 +
|
|
.../packed_simd/src/codegen/reductions.rs | 1 +
|
|
.../src/codegen/reductions/mask.rs | 69 +
|
|
.../src/codegen/reductions/mask/aarch64.rs | 71 +
|
|
.../src/codegen/reductions/mask/arm.rs | 54 +
|
|
.../src/codegen/reductions/mask/fallback.rs | 6 +
|
|
.../codegen/reductions/mask/fallback_impl.rs | 237 +++
|
|
.../src/codegen/reductions/mask/x86.rs | 194 +++
|
|
.../src/codegen/reductions/mask/x86/avx.rs | 101 ++
|
|
.../src/codegen/reductions/mask/x86/avx2.rs | 35 +
|
|
.../src/codegen/reductions/mask/x86/sse.rs | 68 +
|
|
.../src/codegen/reductions/mask/x86/sse2.rs | 70 +
|
|
.../rust/packed_simd/src/codegen/shuffle.rs | 302 ++++
|
|
.../packed_simd/src/codegen/shuffle1_dyn.rs | 432 +++++
|
|
.../packed_simd/src/codegen/swap_bytes.rs | 189 +++
|
|
.../rust/packed_simd/src/codegen/v128.rs | 46 +
|
|
.../rust/packed_simd/src/codegen/v16.rs | 7 +
|
|
.../rust/packed_simd/src/codegen/v256.rs | 78 +
|
|
.../rust/packed_simd/src/codegen/v32.rs | 11 +
|
|
.../rust/packed_simd/src/codegen/v512.rs | 145 ++
|
|
.../rust/packed_simd/src/codegen/v64.rs | 21 +
|
|
.../rust/packed_simd/src/codegen/vPtr.rs | 33 +
|
|
.../rust/packed_simd/src/codegen/vSize.rs | 43 +
|
|
third_party/rust/packed_simd/src/lib.rs | 327 ++++
|
|
third_party/rust/packed_simd/src/masks.rs | 128 ++
|
|
third_party/rust/packed_simd/src/sealed.rs | 41 +
|
|
third_party/rust/packed_simd/src/testing.rs | 8 +
|
|
.../rust/packed_simd/src/testing/macros.rs | 44 +
|
|
.../rust/packed_simd/src/testing/utils.rs | 135 ++
|
|
third_party/rust/packed_simd/src/v128.rs | 80 +
|
|
third_party/rust/packed_simd/src/v16.rs | 16 +
|
|
third_party/rust/packed_simd/src/v256.rs | 86 +
|
|
third_party/rust/packed_simd/src/v32.rs | 29 +
|
|
third_party/rust/packed_simd/src/v512.rs | 99 ++
|
|
third_party/rust/packed_simd/src/v64.rs | 66 +
|
|
third_party/rust/packed_simd/src/vPtr.rs | 34 +
|
|
third_party/rust/packed_simd/src/vSize.rs | 53 +
|
|
.../rust/packed_simd/tests/endianness.rs | 262 ++++
|
|
third_party/rust/simd/.cargo-checksum.json | 1 -
|
|
third_party/rust/simd/Cargo.toml | 37 -
|
|
third_party/rust/simd/README.md | 11 -
|
|
third_party/rust/simd/benches/mandelbrot.rs | 117 --
|
|
third_party/rust/simd/benches/matrix.rs | 485 ------
|
|
third_party/rust/simd/build.rs | 3 -
|
|
third_party/rust/simd/examples/axpy.rs | 65 -
|
|
third_party/rust/simd/examples/convert.rs | 38 -
|
|
third_party/rust/simd/examples/dot-product.rs | 60 -
|
|
.../simd/examples/fannkuch-redux-nosimd.rs | 156 --
|
|
.../rust/simd/examples/fannkuch-redux.rs | 233 ---
|
|
third_party/rust/simd/examples/mandelbrot.rs | 125 --
|
|
.../rust/simd/examples/matrix-inverse.rs | 281 ----
|
|
.../rust/simd/examples/nbody-nosimd.rs | 156 --
|
|
third_party/rust/simd/examples/nbody.rs | 170 --
|
|
third_party/rust/simd/examples/ops.rs | 10 -
|
|
.../simd/examples/spectral-norm-nosimd.rs | 106 --
|
|
.../rust/simd/examples/spectral-norm.rs | 74 -
|
|
third_party/rust/simd/src/aarch64/mod.rs | 3 -
|
|
third_party/rust/simd/src/aarch64/neon.rs | 681 --------
|
|
third_party/rust/simd/src/arm/mod.rs | 4 -
|
|
third_party/rust/simd/src/arm/neon.rs | 622 --------
|
|
third_party/rust/simd/src/common.rs | 520 -------
|
|
third_party/rust/simd/src/lib.rs | 804 ----------
|
|
third_party/rust/simd/src/sixty_four.rs | 228 ---
|
|
third_party/rust/simd/src/v256.rs | 436 ------
|
|
third_party/rust/simd/src/x86/avx.rs | 290 ----
|
|
third_party/rust/simd/src/x86/avx2.rs | 65 -
|
|
third_party/rust/simd/src/x86/mod.rs | 16 -
|
|
third_party/rust/simd/src/x86/sse2.rs | 359 -----
|
|
third_party/rust/simd/src/x86/sse3.rs | 57 -
|
|
third_party/rust/simd/src/x86/sse4_1.rs | 155 --
|
|
third_party/rust/simd/src/x86/sse4_2.rs | 19 -
|
|
third_party/rust/simd/src/x86/ssse3.rs | 172 --
|
|
toolkit/moz.configure | 11 +-
|
|
262 files changed, 17410 insertions(+), 6733 deletions(-)
|
|
create mode 100644 third_party/rust/packed_simd/.appveyor.yml
|
|
create mode 100644 third_party/rust/packed_simd/.cargo-checksum.json
|
|
create mode 100644 third_party/rust/packed_simd/.travis.yml
|
|
create mode 100644 third_party/rust/packed_simd/Cargo.toml
|
|
rename third_party/rust/{simd => packed_simd}/LICENSE-APACHE (100%)
|
|
rename third_party/rust/{simd => packed_simd}/LICENSE-MIT (93%)
|
|
create mode 100644 third_party/rust/packed_simd/bors.toml
|
|
create mode 100644 third_party/rust/packed_simd/build.rs
|
|
create mode 100644 third_party/rust/packed_simd/ci/all.sh
|
|
create mode 100644 third_party/rust/packed_simd/ci/android-install-ndk.sh
|
|
create mode 100644 third_party/rust/packed_simd/ci/android-install-sdk.sh
|
|
create mode 100644 third_party/rust/packed_simd/ci/android-sysimage.sh
|
|
create mode 100644 third_party/rust/packed_simd/ci/benchmark.sh
|
|
create mode 100644 third_party/rust/packed_simd/ci/deploy_and_run_on_ios_simulator.rs
|
|
create mode 100644 third_party/rust/packed_simd/ci/docker/aarch64-linux-android/Dockerfile
|
|
create mode 100644 third_party/rust/packed_simd/ci/docker/aarch64-unknown-linux-gnu/Dockerfile
|
|
create mode 100644 third_party/rust/packed_simd/ci/docker/arm-linux-androideabi/Dockerfile
|
|
create mode 100644 third_party/rust/packed_simd/ci/docker/arm-unknown-linux-gnueabi/Dockerfile
|
|
create mode 100644 third_party/rust/packed_simd/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile
|
|
create mode 100644 third_party/rust/packed_simd/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile
|
|
create mode 100644 third_party/rust/packed_simd/ci/docker/i586-unknown-linux-gnu/Dockerfile
|
|
create mode 100644 third_party/rust/packed_simd/ci/docker/i686-unknown-linux-gnu/Dockerfile
|
|
create mode 100644 third_party/rust/packed_simd/ci/docker/mips-unknown-linux-gnu/Dockerfile
|
|
create mode 100644 third_party/rust/packed_simd/ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile
|
|
create mode 100644 third_party/rust/packed_simd/ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile
|
|
create mode 100644 third_party/rust/packed_simd/ci/docker/mipsel-unknown-linux-musl/Dockerfile
|
|
create mode 100644 third_party/rust/packed_simd/ci/docker/powerpc-unknown-linux-gnu/Dockerfile
|
|
create mode 100644 third_party/rust/packed_simd/ci/docker/powerpc64-unknown-linux-gnu/Dockerfile
|
|
create mode 100644 third_party/rust/packed_simd/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile
|
|
create mode 100644 third_party/rust/packed_simd/ci/docker/s390x-unknown-linux-gnu/Dockerfile
|
|
create mode 100644 third_party/rust/packed_simd/ci/docker/sparc64-unknown-linux-gnu/Dockerfile
|
|
create mode 100644 third_party/rust/packed_simd/ci/docker/thumbv7neon-linux-androideabi/Dockerfile
|
|
create mode 100644 third_party/rust/packed_simd/ci/docker/thumbv7neon-unknown-linux-gnueabihf/Dockerfile
|
|
create mode 100644 third_party/rust/packed_simd/ci/docker/wasm32-unknown-unknown/Dockerfile
|
|
create mode 100644 third_party/rust/packed_simd/ci/docker/x86_64-linux-android/Dockerfile
|
|
create mode 100644 third_party/rust/packed_simd/ci/docker/x86_64-unknown-linux-gnu-emulated/Dockerfile
|
|
create mode 100644 third_party/rust/packed_simd/ci/docker/x86_64-unknown-linux-gnu/Dockerfile
|
|
create mode 100644 third_party/rust/packed_simd/ci/dox.sh
|
|
create mode 100644 third_party/rust/packed_simd/ci/linux-s390x.sh
|
|
create mode 100644 third_party/rust/packed_simd/ci/linux-sparc64.sh
|
|
create mode 100644 third_party/rust/packed_simd/ci/lld-shim.rs
|
|
create mode 100644 third_party/rust/packed_simd/ci/max_line_width.sh
|
|
create mode 100644 third_party/rust/packed_simd/ci/run-docker.sh
|
|
create mode 100644 third_party/rust/packed_simd/ci/run.sh
|
|
create mode 100644 third_party/rust/packed_simd/ci/run_examples.sh
|
|
create mode 100644 third_party/rust/packed_simd/ci/runtest-android.rs
|
|
create mode 100644 third_party/rust/packed_simd/ci/setup_benchmarks.sh
|
|
create mode 100644 third_party/rust/packed_simd/ci/test-runner-linux
|
|
create mode 100644 third_party/rust/packed_simd/contributing.md
|
|
create mode 100644 third_party/rust/packed_simd/perf-guide/.gitignore
|
|
create mode 100644 third_party/rust/packed_simd/perf-guide/book.toml
|
|
create mode 100644 third_party/rust/packed_simd/perf-guide/src/SUMMARY.md
|
|
create mode 100644 third_party/rust/packed_simd/perf-guide/src/ascii.css
|
|
create mode 100644 third_party/rust/packed_simd/perf-guide/src/bound_checks.md
|
|
create mode 100644 third_party/rust/packed_simd/perf-guide/src/float-math/approx.md
|
|
create mode 100644 third_party/rust/packed_simd/perf-guide/src/float-math/fma.md
|
|
create mode 100644 third_party/rust/packed_simd/perf-guide/src/float-math/fp.md
|
|
create mode 100644 third_party/rust/packed_simd/perf-guide/src/float-math/svml.md
|
|
create mode 100644 third_party/rust/packed_simd/perf-guide/src/introduction.md
|
|
create mode 100644 third_party/rust/packed_simd/perf-guide/src/prof/linux.md
|
|
create mode 100644 third_party/rust/packed_simd/perf-guide/src/prof/mca.md
|
|
create mode 100644 third_party/rust/packed_simd/perf-guide/src/prof/profiling.md
|
|
create mode 100644 third_party/rust/packed_simd/perf-guide/src/target-feature/attribute.md
|
|
create mode 100644 third_party/rust/packed_simd/perf-guide/src/target-feature/features.md
|
|
create mode 100644 third_party/rust/packed_simd/perf-guide/src/target-feature/inlining.md
|
|
create mode 100644 third_party/rust/packed_simd/perf-guide/src/target-feature/practice.md
|
|
create mode 100644 third_party/rust/packed_simd/perf-guide/src/target-feature/runtime.md
|
|
create mode 100644 third_party/rust/packed_simd/perf-guide/src/target-feature/rustflags.md
|
|
create mode 100644 third_party/rust/packed_simd/perf-guide/src/vert-hor-ops.md
|
|
create mode 100644 third_party/rust/packed_simd/readme.md
|
|
create mode 100644 third_party/rust/packed_simd/rustfmt.toml
|
|
create mode 100644 third_party/rust/packed_simd/src/api.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/api/bit_manip.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/api/cast.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/api/cast/macros.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/api/cast/v128.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/api/cast/v16.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/api/cast/v256.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/api/cast/v32.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/api/cast/v512.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/api/cast/v64.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/api/cmp.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/api/cmp/eq.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/api/cmp/ord.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/api/cmp/partial_eq.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/api/cmp/partial_ord.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/api/cmp/vertical.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/api/default.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/api/fmt.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/api/fmt/binary.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/api/fmt/debug.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/api/fmt/lower_hex.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/api/fmt/octal.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/api/fmt/upper_hex.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/api/from.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/api/from/from_array.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/api/from/from_vector.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/api/hash.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/api/into_bits.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/api/into_bits/arch_specific.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/api/into_bits/macros.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/api/into_bits/v128.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/api/into_bits/v16.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/api/into_bits/v256.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/api/into_bits/v32.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/api/into_bits/v512.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/api/into_bits/v64.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/api/math.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/api/math/float.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/api/math/float/abs.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/api/math/float/consts.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/api/math/float/cos.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/api/math/float/exp.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/api/math/float/ln.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/api/math/float/mul_add.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/api/math/float/mul_adde.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/api/math/float/powf.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/api/math/float/recpre.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/api/math/float/rsqrte.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/api/math/float/sin.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/api/math/float/sqrt.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/api/math/float/sqrte.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/api/minimal.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/api/minimal/iuf.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/api/minimal/mask.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/api/minimal/ptr.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/api/ops.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/api/ops/scalar_arithmetic.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/api/ops/scalar_bitwise.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/api/ops/scalar_mask_bitwise.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/api/ops/scalar_shifts.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/api/ops/vector_arithmetic.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/api/ops/vector_bitwise.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/api/ops/vector_float_min_max.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/api/ops/vector_int_min_max.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/api/ops/vector_mask_bitwise.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/api/ops/vector_neg.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/api/ops/vector_rotates.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/api/ops/vector_shifts.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/api/ptr.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/api/ptr/gather_scatter.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/api/reductions.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/api/reductions/bitwise.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/api/reductions/float_arithmetic.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/api/reductions/integer_arithmetic.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/api/reductions/mask.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/api/reductions/min_max.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/api/select.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/api/shuffle.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/api/shuffle1_dyn.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/api/slice.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/api/slice/from_slice.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/api/slice/write_to_slice.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/api/swap_bytes.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/codegen.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/codegen/bit_manip.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/codegen/llvm.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/codegen/math.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/codegen/math/float.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/codegen/math/float/abs.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/codegen/math/float/cos.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/codegen/math/float/cos_pi.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/codegen/math/float/exp.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/codegen/math/float/ln.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/codegen/math/float/macros.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/codegen/math/float/mul_add.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/codegen/math/float/mul_adde.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/codegen/math/float/powf.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/codegen/math/float/sin.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/codegen/math/float/sin_cos_pi.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/codegen/math/float/sin_pi.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/codegen/math/float/sqrt.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/codegen/math/float/sqrte.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/codegen/pointer_sized_int.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/codegen/reductions.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/codegen/reductions/mask.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/codegen/reductions/mask/aarch64.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/codegen/reductions/mask/arm.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/codegen/reductions/mask/fallback.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/codegen/reductions/mask/fallback_impl.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/codegen/reductions/mask/x86.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/codegen/reductions/mask/x86/avx.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/codegen/reductions/mask/x86/avx2.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/codegen/reductions/mask/x86/sse.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/codegen/reductions/mask/x86/sse2.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/codegen/shuffle.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/codegen/shuffle1_dyn.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/codegen/swap_bytes.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/codegen/v128.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/codegen/v16.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/codegen/v256.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/codegen/v32.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/codegen/v512.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/codegen/v64.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/codegen/vPtr.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/codegen/vSize.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/lib.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/masks.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/sealed.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/testing.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/testing/macros.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/testing/utils.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/v128.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/v16.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/v256.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/v32.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/v512.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/v64.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/vPtr.rs
|
|
create mode 100644 third_party/rust/packed_simd/src/vSize.rs
|
|
create mode 100644 third_party/rust/packed_simd/tests/endianness.rs
|
|
delete mode 100644 third_party/rust/simd/.cargo-checksum.json
|
|
delete mode 100644 third_party/rust/simd/Cargo.toml
|
|
delete mode 100644 third_party/rust/simd/README.md
|
|
delete mode 100755 third_party/rust/simd/benches/mandelbrot.rs
|
|
delete mode 100755 third_party/rust/simd/benches/matrix.rs
|
|
delete mode 100644 third_party/rust/simd/build.rs
|
|
delete mode 100755 third_party/rust/simd/examples/axpy.rs
|
|
delete mode 100644 third_party/rust/simd/examples/convert.rs
|
|
delete mode 100755 third_party/rust/simd/examples/dot-product.rs
|
|
delete mode 100644 third_party/rust/simd/examples/fannkuch-redux-nosimd.rs
|
|
delete mode 100755 third_party/rust/simd/examples/fannkuch-redux.rs
|
|
delete mode 100755 third_party/rust/simd/examples/mandelbrot.rs
|
|
delete mode 100644 third_party/rust/simd/examples/matrix-inverse.rs
|
|
delete mode 100644 third_party/rust/simd/examples/nbody-nosimd.rs
|
|
delete mode 100755 third_party/rust/simd/examples/nbody.rs
|
|
delete mode 100644 third_party/rust/simd/examples/ops.rs
|
|
delete mode 100644 third_party/rust/simd/examples/spectral-norm-nosimd.rs
|
|
delete mode 100755 third_party/rust/simd/examples/spectral-norm.rs
|
|
delete mode 100644 third_party/rust/simd/src/aarch64/mod.rs
|
|
delete mode 100644 third_party/rust/simd/src/aarch64/neon.rs
|
|
delete mode 100644 third_party/rust/simd/src/arm/mod.rs
|
|
delete mode 100644 third_party/rust/simd/src/arm/neon.rs
|
|
delete mode 100644 third_party/rust/simd/src/common.rs
|
|
delete mode 100644 third_party/rust/simd/src/lib.rs
|
|
delete mode 100644 third_party/rust/simd/src/sixty_four.rs
|
|
delete mode 100644 third_party/rust/simd/src/v256.rs
|
|
delete mode 100644 third_party/rust/simd/src/x86/avx.rs
|
|
delete mode 100644 third_party/rust/simd/src/x86/avx2.rs
|
|
delete mode 100644 third_party/rust/simd/src/x86/mod.rs
|
|
delete mode 100644 third_party/rust/simd/src/x86/sse2.rs
|
|
delete mode 100644 third_party/rust/simd/src/x86/sse3.rs
|
|
delete mode 100644 third_party/rust/simd/src/x86/sse4_1.rs
|
|
delete mode 100644 third_party/rust/simd/src/x86/sse4_2.rs
|
|
delete mode 100644 third_party/rust/simd/src/x86/ssse3.rs
|
|
|
|
diff --git a/.cargo/config.in b/.cargo/config.in
|
|
index 94f5732891fb..57ae36311e52 100644
|
|
--- a/.cargo/config.in
|
|
+++ b/.cargo/config.in
|
|
@@ -17,6 +17,11 @@ git = "https://github.com/froydnj/winapi-rs"
|
|
branch = "aarch64"
|
|
replace-with = "vendored-sources"
|
|
|
|
+[source."https://github.com/rust-lang-nursery/packed_simd"]
|
|
+git = "https://github.com/hsivonen/packed_simd"
|
|
+branch = "rust_1_32"
|
|
+replace-with = "vendored-sources"
|
|
+
|
|
[source.vendored-sources]
|
|
directory = '@top_srcdir@/third_party/rust'
|
|
|
|
diff --git a/Cargo.lock b/Cargo.lock
|
|
index 8896cfeddb28..a048a5522ab0 100644
|
|
--- a/Cargo.lock
|
|
+++ b/Cargo.lock
|
|
@@ -141,7 +141,7 @@ version = "0.3.9"
|
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
dependencies = [
|
|
"backtrace-sys 0.1.24 (registry+https://github.com/rust-lang/crates.io-index)",
|
|
- "cfg-if 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
|
+ "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
|
"libc 0.2.43 (registry+https://github.com/rust-lang/crates.io-index)",
|
|
"rustc-demangle 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
|
"winapi 0.3.6 (git+https://github.com/froydnj/winapi-rs?branch=aarch64)",
|
|
@@ -218,7 +218,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
dependencies = [
|
|
"bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
|
"cexpr 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
|
- "cfg-if 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
|
+ "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
|
"clang-sys 0.26.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
|
"clap 2.31.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
|
"lazy_static 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
|
@@ -372,7 +372,7 @@ dependencies = [
|
|
|
|
[[package]]
|
|
name = "cfg-if"
|
|
-version = "0.1.2"
|
|
+version = "0.1.6"
|
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
|
|
[[package]]
|
|
@@ -575,41 +575,41 @@ version = "0.3.1"
|
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
dependencies = [
|
|
"arrayvec 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
|
- "cfg-if 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
|
+ "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
|
"crossbeam-utils 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
|
"lazy_static 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
|
"memoffset 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
|
"nodrop 0.1.12 (registry+https://github.com/rust-lang/crates.io-index)",
|
|
"scopeguard 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
|
]
|
|
|
|
[[package]]
|
|
name = "crossbeam-epoch"
|
|
version = "0.4.3"
|
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
dependencies = [
|
|
"arrayvec 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
|
- "cfg-if 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
|
+ "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
|
"crossbeam-utils 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
|
"lazy_static 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
|
"memoffset 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
|
"scopeguard 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
|
]
|
|
|
|
[[package]]
|
|
name = "crossbeam-utils"
|
|
version = "0.2.2"
|
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
dependencies = [
|
|
- "cfg-if 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
|
+ "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
|
]
|
|
|
|
[[package]]
|
|
name = "crossbeam-utils"
|
|
version = "0.3.2"
|
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
dependencies = [
|
|
- "cfg-if 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
|
+ "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
|
]
|
|
|
|
[[package]]
|
|
@@ -848,25 +848,25 @@ name = "encoding_c"
|
|
version = "0.9.0"
|
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
dependencies = [
|
|
- "encoding_rs 0.8.14 (registry+https://github.com/rust-lang/crates.io-index)",
|
|
+ "encoding_rs 0.8.16 (registry+https://github.com/rust-lang/crates.io-index)",
|
|
]
|
|
|
|
[[package]]
|
|
name = "encoding_glue"
|
|
version = "0.1.0"
|
|
dependencies = [
|
|
- "encoding_rs 0.8.14 (registry+https://github.com/rust-lang/crates.io-index)",
|
|
+ "encoding_rs 0.8.16 (registry+https://github.com/rust-lang/crates.io-index)",
|
|
"nserror 0.1.0",
|
|
"nsstring 0.1.0",
|
|
]
|
|
|
|
[[package]]
|
|
name = "encoding_rs"
|
|
-version = "0.8.14"
|
|
+version = "0.8.16"
|
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
dependencies = [
|
|
- "cfg-if 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
|
- "simd 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
|
+ "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
|
+ "packed_simd 0.3.3 (git+https://github.com/hsivonen/packed_simd?branch=rust_1_32)",
|
|
]
|
|
|
|
[[package]]
|
|
@@ -1477,7 +1477,7 @@ name = "log"
|
|
version = "0.4.6"
|
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
dependencies = [
|
|
- "cfg-if 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
|
+ "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
|
]
|
|
|
|
[[package]]
|
|
@@ -1719,7 +1719,7 @@ name = "net2"
|
|
version = "0.2.32"
|
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
dependencies = [
|
|
- "cfg-if 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
|
+ "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
|
"libc 0.2.43 (registry+https://github.com/rust-lang/crates.io-index)",
|
|
"winapi 0.3.6 (git+https://github.com/froydnj/winapi-rs?branch=aarch64)",
|
|
]
|
|
@@ -1773,7 +1773,7 @@ name = "nsstring"
|
|
version = "0.1.0"
|
|
dependencies = [
|
|
"bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
|
- "encoding_rs 0.8.14 (registry+https://github.com/rust-lang/crates.io-index)",
|
|
+ "encoding_rs 0.8.16 (registry+https://github.com/rust-lang/crates.io-index)",
|
|
]
|
|
|
|
[[package]]
|
|
@@ -1859,6 +1859,14 @@ dependencies = [
|
|
"stable_deref_trait 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
|
]
|
|
|
|
+[[package]]
|
|
+name = "packed_simd"
|
|
+version = "0.3.3"
|
|
+source = "git+https://github.com/hsivonen/packed_simd?branch=rust_1_32#3541e3818fdc7c2a24f87e3459151a4ce955a67a"
|
|
+dependencies = [
|
|
+ "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
|
+]
|
|
+
|
|
[[package]]
|
|
name = "parking_lot"
|
|
version = "0.6.3"
|
|
@@ -2354,11 +2362,6 @@ dependencies = [
|
|
"opaque-debug 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
|
]
|
|
|
|
-[[package]]
|
|
-name = "simd"
|
|
-version = "0.2.3"
|
|
-source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
-
|
|
[[package]]
|
|
name = "siphasher"
|
|
version = "0.2.1"
|
|
@@ -2936,7 +2939,7 @@ name = "uuid"
|
|
version = "0.6.5"
|
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
dependencies = [
|
|
- "cfg-if 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
|
+ "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
|
"rand 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
|
]
|
|
|
|
@@ -3017,7 +3020,7 @@ dependencies = [
|
|
"bincode 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
|
"bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
|
"byteorder 1.2.7 (registry+https://github.com/rust-lang/crates.io-index)",
|
|
- "cfg-if 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
|
+ "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
|
"core-foundation 0.6.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
|
"core-graphics 0.17.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
|
"core-text 13.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
|
@@ -3253,7 +3256,7 @@ dependencies = [
|
|
"checksum cast 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "926013f2860c46252efceabb19f4a6b308197505082c609025aa6706c011d427"
|
|
"checksum cc 1.0.23 (registry+https://github.com/rust-lang/crates.io-index)" = "c37f0efaa4b9b001fa6f02d4b644dee4af97d3414df07c51e3e4f015f3a3e131"
|
|
"checksum cexpr 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "8fc0086be9ca82f7fc89fc873435531cb898b86e850005850de1f820e2db6e9b"
|
|
-"checksum cfg-if 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "d4c819a1287eb618df47cc647173c5c4c66ba19d888a6e50d605672aed3140de"
|
|
+"checksum cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "082bb9b28e00d3c9d39cc03e64ce4cea0f1bb9b3fde493f0cbc008472d22bdf4"
|
|
"checksum chrono 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)" = "45912881121cb26fad7c38c17ba7daa18764771836b34fab7d3fbd93ed633878"
|
|
"checksum clang-sys 0.26.1 (registry+https://github.com/rust-lang/crates.io-index)" = "481e42017c1416b1c0856ece45658ecbb7c93d8a93455f7e5fa77f3b35455557"
|
|
"checksum clap 2.31.2 (registry+https://github.com/rust-lang/crates.io-index)" = "f0f16b89cbb9ee36d87483dc939fe9f1e13c05898d56d7b230a0d4dff033a536"
|
|
@@ -3303,7 +3306,7 @@ dependencies = [
|
|
"checksum either 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "18785c1ba806c258137c937e44ada9ee7e69a37e3c72077542cd2f069d78562a"
|
|
"checksum ena 0.9.3 (registry+https://github.com/rust-lang/crates.io-index)" = "88dc8393b3c7352f94092497f6b52019643e493b6b890eb417cdb7c46117e621"
|
|
"checksum encoding_c 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "769ecb8b33323998e482b218c0d13cd64c267609023b4b7ec3ee740714c318ee"
|
|
-"checksum encoding_rs 0.8.14 (registry+https://github.com/rust-lang/crates.io-index)" = "a69d152eaa438a291636c1971b0a370212165ca8a75759eb66818c5ce9b538f7"
|
|
+"checksum encoding_rs 0.8.16 (registry+https://github.com/rust-lang/crates.io-index)" = "0535f350c60aac0b87ccf28319abc749391e912192255b0c00a2c12c6917bd73"
|
|
"checksum env_logger 0.5.6 (registry+https://github.com/rust-lang/crates.io-index)" = "0561146661ae44c579e993456bc76d11ce1e0c7d745e57b2fa7146b6e49fa2ad"
|
|
"checksum error-chain 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ff511d5dc435d703f4971bc399647c9bc38e20cb41452e3b9feb4765419ed3f3"
|
|
"checksum euclid 0.19.5 (registry+https://github.com/rust-lang/crates.io-index)" = "d1a7698bdda3d7444a79d33bdc96e8b518d44ea3ff101d8492a6ca1207b886ea"
|
|
@@ -3388,6 +3391,7 @@ dependencies = [
|
|
"checksum ordered-float 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2f0015e9e8e28ee20c581cfbfe47c650cedeb9ed0721090e0b7ebb10b9cdbcc2"
|
|
"checksum ordermap 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "a86ed3f5f244b372d6b1a00b72ef7f8876d0bc6a78a4c9985c53614041512063"
|
|
"checksum owning_ref 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "49a4b8ea2179e6a2e27411d3bca09ca6dd630821cf6894c6c7c8467a8ee7ef13"
|
|
+"checksum packed_simd 0.3.3 (git+https://github.com/hsivonen/packed_simd?branch=rust_1_32)" = "<none>"
|
|
"checksum parking_lot 0.6.3 (registry+https://github.com/rust-lang/crates.io-index)" = "69376b761943787ebd5cc85a5bc95958651a22609c5c1c2b65de21786baec72b"
|
|
"checksum parking_lot_core 0.2.14 (registry+https://github.com/rust-lang/crates.io-index)" = "4db1a8ccf734a7bce794cc19b3df06ed87ab2f3907036b693c68f56b4d4537fa"
|
|
"checksum peeking_take_while 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099"
|
|
@@ -3441,7 +3445,6 @@ dependencies = [
|
|
"checksum serde_json 1.0.26 (registry+https://github.com/rust-lang/crates.io-index)" = "44dd2cfde475037451fa99b7e5df77aa3cfd1536575fa8e7a538ab36dcde49ae"
|
|
"checksum sha2 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)" = "9eb6be24e4c23a84d7184280d2722f7f2731fcdd4a9d886efbfe4413e4847ea0"
|
|
"checksum sha2 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "7b4d8bfd0e469f417657573d8451fb33d16cfe0989359b93baf3a1ffc639543d"
|
|
-"checksum simd 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "0048b17eb9577ac545c61d85c3559b41dfb4cbea41c9bd9ca6a4f73ff05fda84"
|
|
"checksum siphasher 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2ffc669b726f2bc9a3bcff66e5e23b56ba6bf70e22a34c3d7b6d0b3450b65b84"
|
|
"checksum slab 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "17b4fcaed89ab08ef143da37bc52adbcc04d4a69014f4c1208d6b51f0c47bc23"
|
|
"checksum slab 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)" = "5f9776d6b986f77b35c6cf846c11ad986ff128fe0b2b63a3628e3755e8d3102d"
|
|
diff --git a/Cargo.toml b/Cargo.toml
|
|
index d64cbc77b53d..25859a20ecc3 100644
|
|
--- a/Cargo.toml
|
|
+++ b/Cargo.toml
|
|
@@ -59,3 +59,4 @@ codegen-units = 1
|
|
libudev-sys = { path = "dom/webauthn/libudev-sys" }
|
|
serde_derive = { git = "https://github.com/servo/serde", branch = "deserialize_from_enums9" }
|
|
winapi = { git = "https://github.com/froydnj/winapi-rs", branch = "aarch64" }
|
|
+packed_simd = { git = "https://github.com/hsivonen/packed_simd", branch = "rust_1_32" }
|
|
diff --git a/third_party/rust/cfg-if/.cargo-checksum.json b/third_party/rust/cfg-if/.cargo-checksum.json
|
|
index 89b14a227216..b744a21d9fd1 100644
|
|
--- a/third_party/rust/cfg-if/.cargo-checksum.json
|
|
+++ b/third_party/rust/cfg-if/.cargo-checksum.json
|
|
@@ -1 +1 @@
|
|
-{"files":{"Cargo.toml":"764b9ce160653e841430da3919ff968b957ff811f7da42c8483c8bfc2f06be25","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"378f5840b258e2779c39418f3f2d7b2ba96f1c7917dd6be0713f88305dbda397","README.md":"3fa9368c60bc701dea294fbacae0469188c4be1de79f82e972bb9b321776cd52","src/lib.rs":"6915169e3ca05f28e1cb0e052379d74f2496400de1240b74c56e55c2674a6560","tests/xcrate.rs":"30dcb70fbb9c96fda2b7825592558279f534776f72e2a8a0a3e26df4dedb3caa"},"package":"d4c819a1287eb618df47cc647173c5c4c66ba19d888a6e50d605672aed3140de"}
|
|
\ No newline at end of file
|
|
+{"files":{"Cargo.toml":"090d983ec20ad09e59f6b7679b48b9b54e9c0841cf2922b81cba485edcd40876","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"378f5840b258e2779c39418f3f2d7b2ba96f1c7917dd6be0713f88305dbda397","README.md":"1cd0ebc3b30a9c9eddb0fda5515b5a52ec2b85a087328f0ee9f4d68cbb28afc2","src/lib.rs":"f02d6e295109365cf54884e5282a3e7d1e1f62857c700f23cd013e94a56bd803","tests/xcrate.rs":"30dcb70fbb9c96fda2b7825592558279f534776f72e2a8a0a3e26df4dedb3caa"},"package":"082bb9b28e00d3c9d39cc03e64ce4cea0f1bb9b3fde493f0cbc008472d22bdf4"}
|
|
\ No newline at end of file
|
|
diff --git a/third_party/rust/cfg-if/Cargo.toml b/third_party/rust/cfg-if/Cargo.toml
|
|
index 7afa063d1ef5..84c4fc7835ab 100644
|
|
--- a/third_party/rust/cfg-if/Cargo.toml
|
|
+++ b/third_party/rust/cfg-if/Cargo.toml
|
|
@@ -1,14 +1,24 @@
|
|
+# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
|
|
+#
|
|
+# When uploading crates to the registry Cargo will automatically
|
|
+# "normalize" Cargo.toml files for maximal compatibility
|
|
+# with all versions of Cargo and also rewrite `path` dependencies
|
|
+# to registry (e.g. crates.io) dependencies
|
|
+#
|
|
+# If you believe there's an error in this file please file an
|
|
+# issue against the rust-lang/cargo repository. If you're
|
|
+# editing this file be aware that the upstream Cargo.toml
|
|
+# will likely look very different (and much more reasonable)
|
|
+
|
|
[package]
|
|
name = "cfg-if"
|
|
-version = "0.1.2"
|
|
+version = "0.1.6"
|
|
authors = ["Alex Crichton <alex@alexcrichton.com>"]
|
|
-license = "MIT/Apache-2.0"
|
|
+description = "A macro to ergonomically define an item depending on a large number of #[cfg]\nparameters. Structured like an if-else chain, the first matching branch is the\nitem that gets emitted.\n"
|
|
+homepage = "https://github.com/alexcrichton/cfg-if"
|
|
+documentation = "https://docs.rs/cfg-if"
|
|
readme = "README.md"
|
|
+license = "MIT/Apache-2.0"
|
|
repository = "https://github.com/alexcrichton/cfg-if"
|
|
-homepage = "https://github.com/alexcrichton/cfg-if"
|
|
-documentation = "http://alexcrichton.com/cfg-if"
|
|
-description = """
|
|
-A macro to ergonomically define an item depending on a large number of #[cfg]
|
|
-parameters. Structured like an if-else chain, the first matching branch is the
|
|
-item that gets emitted.
|
|
-"""
|
|
+[badges.travis-ci]
|
|
+repository = "alexcrichton/cfg-if"
|
|
diff --git a/third_party/rust/cfg-if/README.md b/third_party/rust/cfg-if/README.md
|
|
index e9859dadb609..344a946c0487 100644
|
|
--- a/third_party/rust/cfg-if/README.md
|
|
+++ b/third_party/rust/cfg-if/README.md
|
|
@@ -2,7 +2,7 @@
|
|
|
|
[![Build Status](https://travis-ci.org/alexcrichton/cfg-if.svg?branch=master)](https://travis-ci.org/alexcrichton/cfg-if)
|
|
|
|
-[Documentation](http://alexcrichton.com/cfg-if)
|
|
+[Documentation](https://docs.rs/cfg-if)
|
|
|
|
A macro to ergonomically define an item depending on a large number of #[cfg]
|
|
parameters. Structured like an if-else chain, the first matching branch is the
|
|
@@ -36,9 +36,17 @@ fn main() {
|
|
|
|
# License
|
|
|
|
-`cfg-if` is primarily distributed under the terms of both the MIT license and
|
|
-the Apache License (Version 2.0), with portions covered by various BSD-like
|
|
-licenses.
|
|
+This project is licensed under either of
|
|
|
|
-See LICENSE-APACHE, and LICENSE-MIT for details.
|
|
+ * Apache License, Version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or
|
|
+ http://www.apache.org/licenses/LICENSE-2.0)
|
|
+ * MIT license ([LICENSE-MIT](LICENSE-MIT) or
|
|
+ http://opensource.org/licenses/MIT)
|
|
|
|
+at your option.
|
|
+
|
|
+### Contribution
|
|
+
|
|
+Unless you explicitly state otherwise, any contribution intentionally submitted
|
|
+for inclusion in Serde by you, as defined in the Apache-2.0 license, shall be
|
|
+dual licensed as above, without any additional terms or conditions.
|
|
diff --git a/third_party/rust/cfg-if/src/lib.rs b/third_party/rust/cfg-if/src/lib.rs
|
|
index 563cda81f42d..ff144f69f862 100644
|
|
--- a/third_party/rust/cfg-if/src/lib.rs
|
|
+++ b/third_party/rust/cfg-if/src/lib.rs
|
|
@@ -1,81 +1,90 @@
|
|
-//! A macro for defining #[cfg] if-else statements.
|
|
+//! A macro for defining `#[cfg]` if-else statements.
|
|
//!
|
|
//! The macro provided by this crate, `cfg_if`, is similar to the `if/elif` C
|
|
//! preprocessor macro by allowing definition of a cascade of `#[cfg]` cases,
|
|
//! emitting the implementation which matches first.
|
|
//!
|
|
-//! This allows you to conveniently provide a long list #[cfg]'d blocks of code
|
|
+//! This allows you to conveniently provide a long list `#[cfg]`'d blocks of code
|
|
//! without having to rewrite each clause multiple times.
|
|
//!
|
|
//! # Example
|
|
//!
|
|
//! ```
|
|
//! #[macro_use]
|
|
//! extern crate cfg_if;
|
|
//!
|
|
//! cfg_if! {
|
|
//! if #[cfg(unix)] {
|
|
//! fn foo() { /* unix specific functionality */ }
|
|
//! } else if #[cfg(target_pointer_width = "32")] {
|
|
//! fn foo() { /* non-unix, 32-bit functionality */ }
|
|
//! } else {
|
|
//! fn foo() { /* fallback implementation */ }
|
|
//! }
|
|
//! }
|
|
//!
|
|
//! # fn main() {}
|
|
//! ```
|
|
|
|
#![no_std]
|
|
|
|
-#![doc(html_root_url = "http://alexcrichton.com/cfg-if")]
|
|
+#![doc(html_root_url = "https://docs.rs/cfg-if")]
|
|
#![deny(missing_docs)]
|
|
#![cfg_attr(test, deny(warnings))]
|
|
|
|
-#[macro_export]
|
|
+#[macro_export(local_inner_macros)]
|
|
macro_rules! cfg_if {
|
|
+ // match if/else chains with a final `else`
|
|
($(
|
|
if #[cfg($($meta:meta),*)] { $($it:item)* }
|
|
) else * else {
|
|
$($it2:item)*
|
|
}) => {
|
|
- __cfg_if_items! {
|
|
+ cfg_if! {
|
|
+ @__items
|
|
() ;
|
|
$( ( ($($meta),*) ($($it)*) ), )*
|
|
( () ($($it2)*) ),
|
|
}
|
|
};
|
|
+
|
|
+ // match if/else chains lacking a final `else`
|
|
(
|
|
if #[cfg($($i_met:meta),*)] { $($i_it:item)* }
|
|
$(
|
|
else if #[cfg($($e_met:meta),*)] { $($e_it:item)* }
|
|
)*
|
|
) => {
|
|
- __cfg_if_items! {
|
|
+ cfg_if! {
|
|
+ @__items
|
|
() ;
|
|
( ($($i_met),*) ($($i_it)*) ),
|
|
$( ( ($($e_met),*) ($($e_it)*) ), )*
|
|
( () () ),
|
|
}
|
|
- }
|
|
-}
|
|
+ };
|
|
|
|
-#[macro_export]
|
|
-#[doc(hidden)]
|
|
-macro_rules! __cfg_if_items {
|
|
- (($($not:meta,)*) ; ) => {};
|
|
- (($($not:meta,)*) ; ( ($($m:meta),*) ($($it:item)*) ), $($rest:tt)*) => {
|
|
- __cfg_if_apply! { cfg(all($($m,)* not(any($($not),*)))), $($it)* }
|
|
- __cfg_if_items! { ($($not,)* $($m,)*) ; $($rest)* }
|
|
- }
|
|
-}
|
|
+ // Internal and recursive macro to emit all the items
|
|
+ //
|
|
+ // Collects all the negated cfgs in a list at the beginning and after the
|
|
+ // semicolon is all the remaining items
|
|
+ (@__items ($($not:meta,)*) ; ) => {};
|
|
+ (@__items ($($not:meta,)*) ; ( ($($m:meta),*) ($($it:item)*) ), $($rest:tt)*) => {
|
|
+ // Emit all items within one block, applying an approprate #[cfg]. The
|
|
+ // #[cfg] will require all `$m` matchers specified and must also negate
|
|
+ // all previous matchers.
|
|
+ cfg_if! { @__apply cfg(all($($m,)* not(any($($not),*)))), $($it)* }
|
|
|
|
-#[macro_export]
|
|
-#[doc(hidden)]
|
|
-macro_rules! __cfg_if_apply {
|
|
- ($m:meta, $($it:item)*) => {
|
|
+ // Recurse to emit all other items in `$rest`, and when we do so add all
|
|
+ // our `$m` matchers to the list of `$not` matchers as future emissions
|
|
+ // will have to negate everything we just matched as well.
|
|
+ cfg_if! { @__items ($($not,)* $($m,)*) ; $($rest)* }
|
|
+ };
|
|
+
|
|
+ // Internal macro to Apply a cfg attribute to a list of items
|
|
+ (@__apply $m:meta, $($it:item)*) => {
|
|
$(#[$m] $it)*
|
|
- }
|
|
+ };
|
|
}
|
|
|
|
#[cfg(test)]
|
|
diff --git a/third_party/rust/encoding_rs/.cargo-checksum.json b/third_party/rust/encoding_rs/.cargo-checksum.json
|
|
index c063d4c27534..7c1901dce515 100644
|
|
--- a/third_party/rust/encoding_rs/.cargo-checksum.json
|
|
+++ b/third_party/rust/encoding_rs/.cargo-checksum.json
|
|
@@ -1 +1 @@
|
|
-{"files":{"CONTRIBUTING.md":"06c26277e8dbd3f57be2eb51b5e3285dc1cbbf8c11326df413868ae702e6a61c","COPYRIGHT":"8b98376eb373dcf81950474efe34b5576a8171460dff500cc58a1ed8d160cd57","Cargo.toml":"f4c9b33981fe222ef322d640f5ef680828d75dcd534b8aa2bfdd576598deea64","Ideas.md":"b7452893f500163868d8de52c09addaf91e1632454ed02e892c467ed7ec39dbd","LICENSE-APACHE":"cfc7749b96f63bd31c3c42b5c471bf756814053e847c10f3eb003417bc523d30","LICENSE-MIT":"f2ad48641d9c997d9ae3b95d93d1cd6e1ab12ab4c44de89937c7bfabbd076a4a","README.md":"ad140c9178067c8bdba8ae43ddffd0506d70d49474731247a050ff99a3ff7832","build.rs":"f5defca2c68b73e8723f489a9279af4fbe9724abc6e9abf58d32542e8a459e26","doc/Big5.txt":"f73a2edc5cb6c2d140ba6e07f4542e1c4a234950378acde1df93480f0ca0be0b","doc/EUC-JP.txt":"ee2818b907d0137f40a9ab9fd525fc700a44dbdddb6cf0c157a656566bae4bf1","doc/EUC-KR.txt":"71d9e2ccf3b124e8bdfb433c8cf2773fd878077038d0cec3c7237a50f4a78a30","doc/GBK.txt":"c1b522b5a799884e5001da661f42c5a8f4d0acb9ef1d74b206f22b5f65365606","doc/IBM866.txt":"a5a433e804d0f83af785015179fbc1d9b0eaf1f7960efcd04093e136b51fbd0e","doc/ISO-2022-JP.txt":"af86684f5a8f0e2868d7b2c292860140c3d2e5527530ca091f1b28198e8e2fe6","doc/ISO-8859-10.txt":"6d3949ad7c81ca176895101ed81a1db7df1060d64e262880b94bd31bb344ab4d","doc/ISO-8859-13.txt":"3951dd89cf93f7729148091683cf8511f4529388b7dc8dcd0d62eaed55be93fa","doc/ISO-8859-14.txt":"3d330784a0374fd255a38b47949675cc7168c800530534b0a01cac6edc623adc","doc/ISO-8859-15.txt":"24b1084aab5127a85aab99153f86e24694d0a3615f53b5ce23683f97cf66c47a","doc/ISO-8859-16.txt":"ce0272559b92ba76d7a7e476f6424ae4a5cc72e75b183611b08392e44add4d25","doc/ISO-8859-2.txt":"18ceff88c13d1b5ba455a3919b1e3de489045c4c3d2dd7e8527c125c75d54aad","doc/ISO-8859-3.txt":"21798404c68f4f5db59223362f24999da96968c0628427321fccce7d2849a130","doc/ISO-8859-4.txt":"d27f6520c6c5bfbcc19176b71d081cdb3bccde1622bb3e420d5680e812632d53","doc/ISO-8859-5.txt":"a10ec8d6ea7a78ad15da7275f6cb1a3365118527e28f9af6d0d5830501303f3a","doc/ISO-8859-6.txt":"ccda8a2efc96115336bdd77776637b9712425e44fbcf745353b9057fbef144e7","doc/ISO-8859-7.txt":"17900fa1f27a445958f0a77d7d9056be375a6bd7ee4492aa680c7c1500bab85e","doc/ISO-8859-8-I.txt":"8357555646d54265a9b9ffa3e68b08d132312f1561c60108ff9b8b1167b6ecf2","doc/ISO-8859-8.txt":"72cd6f3afb7b4a9c16a66a362473315770b7755d72c86c870e52fc3eba86c8af","doc/KOI8-R.txt":"839cf19a38da994488004ed7814b1f6151640156a9a2af02bf2efca745fb5966","doc/KOI8-U.txt":"0cc76624ed1f024183e2298b7e019957da2c70c8ca06e0fc4e6f353f50a5054f","doc/Shift_JIS.txt":"34c49141818cb9ddbcf59cc858f78a79be8ad148d563f26415108ae1f148443f","doc/UTF-16BE.txt":"e2e280d8acbaa6d2a6b3569d60e17500a285f2baa0df3363dd85537cd5a1ef8f","doc/UTF-16LE.txt":"70bdc170e3fc5298ba68f10125fb5eeb8b077036cc96bb4416c4de396f6d76c1","doc/UTF-8.txt":"ea7bae742e613010ced002cf4b601a737d2203fad65e115611451bc4428f548a","doc/gb18030.txt":"dc71378a8f07a2d8659f69ee81fb8791fef56ba86f124b429978285237bb4a7b","doc/macintosh.txt":"57491e53866711b4672d9b9ff35380b9dac9e0d8e3d6c20bdd6140603687c023","doc/replacement.txt":"4b6c3bbd7999d9d4108a281594bd02d13607e334a95465afff8c2c08d395f0e4","doc/windows-1250.txt":"61296bb6a21cdab602300d32ecfba434cb82de5ac3bc88d58710d2f125e28d39","doc/windows-1251.txt":"7deea1c61dea1485c8ff02db2c7d578db7a9aab63ab1cfd02ec04b515864689e","doc/windows-1252.txt":"933ef3bdddfce5ee132b9f1a1aa8b47423d2587bbe475b19028d0a6d38e180b6","doc/windows-1253.txt":"1a38748b88e99071a5c7b3d5456ead4caedeabab50d50d658be105bc113714de","doc/windows-1254.txt":"f8372f86c6f8d642563cd6ddc025260553292a39423df1683a98670bd7bf2b47","doc/windows-1255.txt":"4e5852494730054e2da258a74e1b9d780abbcdd8ce22ebc218ca2efe9e90493d","doc/windows-1256.txt":"c0879c5172abedead302a406e8f60d9cd9598694a0ffa4fd288ffe4fef7b8ea1","doc/windows-1257.txt":"c28a0c9f964fcb2b46d21f537c402446501a2800670481d6abf9fd9e9018d523","doc/windows-1258.txt":"5019ae4d61805c79aacbf17c93793342dbb098d65a1837783bc3e2c6d6a23602","doc/windows-874.txt":"4ef0e4501c5feba8b17aee1818602ed44b36ca8475db771ce2fc16d392cabecc","doc/x-mac-cyrillic.txt":"58be154d8a888ca3d484b83b44f749823ef339ab27f14d90ca9a856f5050a8bd","doc/x-user-defined.txt":"f9cd07c4321bf5cfb0be4bdddd251072999b04a6cf7a6f5bc63709a84e2c1ffc","generate-encoding-data.py":"92ddec35a834b6bc815fffffe6d07d9938a90d3c4526298637d8624410d83078","rustfmt.toml":"85c1a3b4382fd89e991cbb81b70fb52780472edc064c963943cdaaa56e0a2030","src/ascii.rs":"800cfbe3036d0c97ce27e07a4fd05edbcb7354ebec20903d81c76136d734931c","src/big5.rs":"1c94b35813314775c3fa1b10923cf8e8f8eba8c465d9833ad4293594e16c17f2","src/data.rs":"9544c019c7360a669bd3adaa90b70331124abd1df59841db66e74912bcdb96a5","src/euc_jp.rs":"0842e4f564a36051c6b85c47bbb652efae2f2926e91491daf77e4ceeecb18163","src/euc_kr.rs":"8e68590efa65485583bf57cae44ebf6de535bac1d37232e7f0307a38425fb992","src/gb18030.rs":"d269efb5e5d175f9d2ecf01d5606955a284b6f00749bb0ee23d3412c83aa3d59","src/handles.rs":"71aa7de1c5236a34ea0a8bb85332987751d2466b756fca6b3f6ac0da765cf91e","src/iso_2022_jp.rs":"3adc380736f24a5de36bc1cf81049bbe64473de10e6f12774195e6213c27c322","src/lib.rs":"e786de9e92e5652bc200266cf318753eea869e8971857cc0caa65a3cfe687545","src/macros.rs":"c7a019fd81d31de77569036ac36fd4e404b3f20144bbf79747faf4ea21538d09","src/mem.rs":"f412f60f2d4afb7e32ffba94dc5f93716e6ae9f065799ca17bb1f1b2145f6ee4","src/replacement.rs":"182c2093a6edb162183ca5990554fd7b199d3011924a8d80d894ba98ee7c479e","src/shift_jis.rs":"1c0c69ba6c123fcf720276646074660193bf9e6fa4327fe0d739a3e67874e081","src/simd_funcs.rs":"565ceeffe81173b85700c55c396ab72068751ef809bea8e1cb1e6c7919f5a905","src/single_byte.rs":"383d325dedbf3295acd50d880db1cecc29b69efe332ae2a37367cf40bf138ac4","src/test_data/big5_in.txt":"4c5a8691f8dc717311889c63894026d2fb62725a86c4208ca274a9cc8d42a503","src/test_data/big5_in_ref.txt":"99d399e17750cf9c7cf30bb253dbfe35b81c4fcbdead93cfa48b1429213473c7","src/test_data/big5_out.txt":"6193ca97c297aa20e09396038d18e938bb7ea331c26f0f2454097296723a0b13","src/test_data/big5_out_ref.txt":"36567691f557df144f6cc520015a87038dfa156f296fcf103b56ae9a718be1fc","src/test_data/euc_kr_in.txt":"c86a7224f3215fa0d04e685622a752fdc72763e8ae076230c7fd62de57ec4074","src/test_data/euc_kr_in_ref.txt":"1f419f4ca47d708b54c73c461545a022ae2e20498fdbf8005a483d752a204883","src/test_data/euc_kr_out.txt":"e7f32e026f70be1e1b58e0047baf7d3d2c520269c4f9b9992e158b4decb0a1a3","src/test_data/euc_kr_out_ref.txt":"c9907857980b20b8e9e3b584482ed6567a2be6185d72237b6322f0404944924e","src/test_data/gb18030_in.txt":"ab7231b2d3e9afacdbd7d7f3b9e5361a7ff9f7e1cfdb4f3bd905b9362b309e53","src/test_data/gb18030_in_ref.txt":"dc5069421adca2043c55f5012b55a76fdff651d22e6e699fd0978f8d5706815c","src/test_data/gb18030_out.txt":"f0208d527f5ca63de7d9a0323be8d5cf12d8a104b2943d92c2701f0c3364dac1","src/test_data/gb18030_out_ref.txt":"6819fe47627e4ea01027003fc514b9f21a1322e732d7f1fb92cc6c5455bc6c07","src/test_data/iso_2022_jp_in.txt":"cd24bbdcb1834e25db54646fbf4c41560a13dc7540f6be3dba4f5d97d44513af","src/test_data/iso_2022_jp_in_ref.txt":"3dc4e6a5e06471942d086b16c9440945e78415f6f3f47e43717e4bc2eac2cdf5","src/test_data/iso_2022_jp_out.txt":"9b6f015329dda6c3f9ee5ce6dbd6fa9c89acc21283e886836c78b8d833480c21","src/test_data/iso_2022_jp_out_ref.txt":"78cb260093a20116ad9a42f43b05d1848c5ab100b6b9a850749809e943884b35","src/test_data/jis0208_in.txt":"6df3030553ffb0a6615bb33dc8ea9dca6d9623a9028e2ffec754ce3c3da824cc","src/test_data/jis0208_in_ref.txt":"3dc4e6a5e06471942d086b16c9440945e78415f6f3f47e43717e4bc2eac2cdf5","src/test_data/jis0208_out.txt":"4ec24477e1675ce750733bdc3c5add1cd27b6bd4ce1f09289564646e9654e857","src/test_data/jis0208_out_ref.txt":"c3e1cef5032b2b1d93a406f31ff940c4e2dfe8859b8b17ca2761fee7a75a0e48","src/test_data/jis0212_in.txt":"c011f0dd72bd7c8cd922df9374ef8d2769a77190514c77f6c62b415852eeb9fe","src/test_data/jis0212_in_ref.txt":"7d9458b3d2f73e7092a7f505c08ce1d233dde18aa679fbcf9889256239cc9e06","src/test_data/shift_jis_in.txt":"02e389ccef0dd2122e63f503899402cb7f797912c2444cc80ab93131116c5524","src/test_data/shift_jis_in_ref.txt":"512f985950ca902e643c88682dba9708b7c38d3c5ec2925168ab00ac94ab19f9","src/test_data/shift_jis_out.txt":"5fbc44da7bf639bf6cfe0fa1fd3eba7102b88f81919c9ea991302712f69426fb","src/test_data/shift_jis_out_ref.txt":"466322c6fed8286c64582731755290c2296508efdd258826e6279686649b481f","src/test_labels_names.rs":"c962c7aeac3d9ef2aca70c9e21983b231d4cf998cb06879374b0401e5149d1da","src/testing.rs":"b299d27055f3b068de66cc10a75c024b881c48bc093627c01e0b1f8bd7d94666","src/utf_16.rs":"1ec4e1c8ed7e42e4de401c6d0f64c2835bd80c2a306f358959957d30e6ff1501","src/utf_8.rs":"f639fc5dccd5dcc2458936baa942237d0fd58ac398c83ea3f48e51dceb5b6a81","src/variant.rs":"619a8e604d2febe6a874e3ad73cddf3ef9e6011480aecf86f23708b313415251","src/x_user_defined.rs":"ab26ea900c8f7b7a4d1172872b7ca4bc573bc60b7b1979c93aafdfb86b2c2235"},"package":"a69d152eaa438a291636c1971b0a370212165ca8a75759eb66818c5ce9b538f7"}
|
|
\ No newline at end of file
|
|
+{"files":{"CONTRIBUTING.md":"06c26277e8dbd3f57be2eb51b5e3285dc1cbbf8c11326df413868ae702e6a61c","COPYRIGHT":"8b98376eb373dcf81950474efe34b5576a8171460dff500cc58a1ed8d160cd57","Cargo.toml":"fd56e8d662553f0cc559f8ef7097effefbc815ac3485799b37dee9df08ec803c","Ideas.md":"b7452893f500163868d8de52c09addaf91e1632454ed02e892c467ed7ec39dbd","LICENSE-APACHE":"cfc7749b96f63bd31c3c42b5c471bf756814053e847c10f3eb003417bc523d30","LICENSE-MIT":"f2ad48641d9c997d9ae3b95d93d1cd6e1ab12ab4c44de89937c7bfabbd076a4a","README.md":"8ae2a3548dee23c19e20564a90e2fd0dfa600cf4c2dfcc538f3455f4462d7133","build.rs":"82747097b0bb8999cdaf689a9e46195f6df5d691ee90bcde8a7b79f16bd976f0","doc/Big5.txt":"f73a2edc5cb6c2d140ba6e07f4542e1c4a234950378acde1df93480f0ca0be0b","doc/EUC-JP.txt":"ee2818b907d0137f40a9ab9fd525fc700a44dbdddb6cf0c157a656566bae4bf1","doc/EUC-KR.txt":"71d9e2ccf3b124e8bdfb433c8cf2773fd878077038d0cec3c7237a50f4a78a30","doc/GBK.txt":"c1b522b5a799884e5001da661f42c5a8f4d0acb9ef1d74b206f22b5f65365606","doc/IBM866.txt":"a5a433e804d0f83af785015179fbc1d9b0eaf1f7960efcd04093e136b51fbd0e","doc/ISO-2022-JP.txt":"af86684f5a8f0e2868d7b2c292860140c3d2e5527530ca091f1b28198e8e2fe6","doc/ISO-8859-10.txt":"6d3949ad7c81ca176895101ed81a1db7df1060d64e262880b94bd31bb344ab4d","doc/ISO-8859-13.txt":"3951dd89cf93f7729148091683cf8511f4529388b7dc8dcd0d62eaed55be93fa","doc/ISO-8859-14.txt":"3d330784a0374fd255a38b47949675cc7168c800530534b0a01cac6edc623adc","doc/ISO-8859-15.txt":"24b1084aab5127a85aab99153f86e24694d0a3615f53b5ce23683f97cf66c47a","doc/ISO-8859-16.txt":"ce0272559b92ba76d7a7e476f6424ae4a5cc72e75b183611b08392e44add4d25","doc/ISO-8859-2.txt":"18ceff88c13d1b5ba455a3919b1e3de489045c4c3d2dd7e8527c125c75d54aad","doc/ISO-8859-3.txt":"21798404c68f4f5db59223362f24999da96968c0628427321fccce7d2849a130","doc/ISO-8859-4.txt":"d27f6520c6c5bfbcc19176b71d081cdb3bccde1622bb3e420d5680e812632d53","doc/ISO-8859-5.txt":"a10ec8d6ea7a78ad15da7275f6cb1a3365118527e28f9af6d0d5830501303f3a","doc/ISO-8859-6.txt":"ccda8a2efc96115336bdd77776637b9712425e44fbcf745353b9057fbef144e7","doc/ISO-8859-7.txt":"17900fa1f27a445958f0a77d7d9056be375a6bd7ee4492aa680c7c1500bab85e","doc/ISO-8859-8-I.txt":"8357555646d54265a9b9ffa3e68b08d132312f1561c60108ff9b8b1167b6ecf2","doc/ISO-8859-8.txt":"72cd6f3afb7b4a9c16a66a362473315770b7755d72c86c870e52fc3eba86c8af","doc/KOI8-R.txt":"839cf19a38da994488004ed7814b1f6151640156a9a2af02bf2efca745fb5966","doc/KOI8-U.txt":"0cc76624ed1f024183e2298b7e019957da2c70c8ca06e0fc4e6f353f50a5054f","doc/Shift_JIS.txt":"34c49141818cb9ddbcf59cc858f78a79be8ad148d563f26415108ae1f148443f","doc/UTF-16BE.txt":"e2e280d8acbaa6d2a6b3569d60e17500a285f2baa0df3363dd85537cd5a1ef8f","doc/UTF-16LE.txt":"70bdc170e3fc5298ba68f10125fb5eeb8b077036cc96bb4416c4de396f6d76c1","doc/UTF-8.txt":"ea7bae742e613010ced002cf4b601a737d2203fad65e115611451bc4428f548a","doc/gb18030.txt":"dc71378a8f07a2d8659f69ee81fb8791fef56ba86f124b429978285237bb4a7b","doc/macintosh.txt":"57491e53866711b4672d9b9ff35380b9dac9e0d8e3d6c20bdd6140603687c023","doc/replacement.txt":"4b6c3bbd7999d9d4108a281594bd02d13607e334a95465afff8c2c08d395f0e4","doc/windows-1250.txt":"61296bb6a21cdab602300d32ecfba434cb82de5ac3bc88d58710d2f125e28d39","doc/windows-1251.txt":"7deea1c61dea1485c8ff02db2c7d578db7a9aab63ab1cfd02ec04b515864689e","doc/windows-1252.txt":"933ef3bdddfce5ee132b9f1a1aa8b47423d2587bbe475b19028d0a6d38e180b6","doc/windows-1253.txt":"1a38748b88e99071a5c7b3d5456ead4caedeabab50d50d658be105bc113714de","doc/windows-1254.txt":"f8372f86c6f8d642563cd6ddc025260553292a39423df1683a98670bd7bf2b47","doc/windows-1255.txt":"4e5852494730054e2da258a74e1b9d780abbcdd8ce22ebc218ca2efe9e90493d","doc/windows-1256.txt":"c0879c5172abedead302a406e8f60d9cd9598694a0ffa4fd288ffe4fef7b8ea1","doc/windows-1257.txt":"c28a0c9f964fcb2b46d21f537c402446501a2800670481d6abf9fd9e9018d523","doc/windows-1258.txt":"5019ae4d61805c79aacbf17c93793342dbb098d65a1837783bc3e2c6d6a23602","doc/windows-874.txt":"4ef0e4501c5feba8b17aee1818602ed44b36ca8475db771ce2fc16d392cabecc","doc/x-mac-cyrillic.txt":"58be154d8a888ca3d484b83b44f749823ef339ab27f14d90ca9a856f5050a8bd","doc/x-user-defined.txt":"f9cd07c4321bf5cfb0be4bdddd251072999b04a6cf7a6f5bc63709a84e2c1ffc","generate-encoding-data.py":"92ddec35a834b6bc815fffffe6d07d9938a90d3c4526298637d8624410d83078","rustfmt.toml":"85c1a3b4382fd89e991cbb81b70fb52780472edc064c963943cdaaa56e0a2030","src/ascii.rs":"800cfbe3036d0c97ce27e07a4fd05edbcb7354ebec20903d81c76136d734931c","src/big5.rs":"1c94b35813314775c3fa1b10923cf8e8f8eba8c465d9833ad4293594e16c17f2","src/data.rs":"9544c019c7360a669bd3adaa90b70331124abd1df59841db66e74912bcdb96a5","src/euc_jp.rs":"0842e4f564a36051c6b85c47bbb652efae2f2926e91491daf77e4ceeecb18163","src/euc_kr.rs":"8e68590efa65485583bf57cae44ebf6de535bac1d37232e7f0307a38425fb992","src/gb18030.rs":"d269efb5e5d175f9d2ecf01d5606955a284b6f00749bb0ee23d3412c83aa3d59","src/handles.rs":"0646bd091892ff7a76f34efccda4e5ddabe1e624e890baa9fdc9d48011d2d38b","src/iso_2022_jp.rs":"3adc380736f24a5de36bc1cf81049bbe64473de10e6f12774195e6213c27c322","src/lib.rs":"e2917fb9f605662ec4705d8c0b3c179f2264697a761191c3ec8101748cf717dc","src/macros.rs":"c7a019fd81d31de77569036ac36fd4e404b3f20144bbf79747faf4ea21538d09","src/mem.rs":"5498de31e816f51348b8d298d4fc9568da6b0b9363146f87ca5503131d33397f","src/replacement.rs":"182c2093a6edb162183ca5990554fd7b199d3011924a8d80d894ba98ee7c479e","src/shift_jis.rs":"1c0c69ba6c123fcf720276646074660193bf9e6fa4327fe0d739a3e67874e081","src/simd_funcs.rs":"857e61c1bda9d65286c23a6c3910d6814680bbc3064bf0ff92de5bc4f3edb6f3","src/single_byte.rs":"383d325dedbf3295acd50d880db1cecc29b69efe332ae2a37367cf40bf138ac4","src/test_data/big5_in.txt":"4c5a8691f8dc717311889c63894026d2fb62725a86c4208ca274a9cc8d42a503","src/test_data/big5_in_ref.txt":"99d399e17750cf9c7cf30bb253dbfe35b81c4fcbdead93cfa48b1429213473c7","src/test_data/big5_out.txt":"6193ca97c297aa20e09396038d18e938bb7ea331c26f0f2454097296723a0b13","src/test_data/big5_out_ref.txt":"36567691f557df144f6cc520015a87038dfa156f296fcf103b56ae9a718be1fc","src/test_data/euc_kr_in.txt":"c86a7224f3215fa0d04e685622a752fdc72763e8ae076230c7fd62de57ec4074","src/test_data/euc_kr_in_ref.txt":"1f419f4ca47d708b54c73c461545a022ae2e20498fdbf8005a483d752a204883","src/test_data/euc_kr_out.txt":"e7f32e026f70be1e1b58e0047baf7d3d2c520269c4f9b9992e158b4decb0a1a3","src/test_data/euc_kr_out_ref.txt":"c9907857980b20b8e9e3b584482ed6567a2be6185d72237b6322f0404944924e","src/test_data/gb18030_in.txt":"ab7231b2d3e9afacdbd7d7f3b9e5361a7ff9f7e1cfdb4f3bd905b9362b309e53","src/test_data/gb18030_in_ref.txt":"dc5069421adca2043c55f5012b55a76fdff651d22e6e699fd0978f8d5706815c","src/test_data/gb18030_out.txt":"f0208d527f5ca63de7d9a0323be8d5cf12d8a104b2943d92c2701f0c3364dac1","src/test_data/gb18030_out_ref.txt":"6819fe47627e4ea01027003fc514b9f21a1322e732d7f1fb92cc6c5455bc6c07","src/test_data/iso_2022_jp_in.txt":"cd24bbdcb1834e25db54646fbf4c41560a13dc7540f6be3dba4f5d97d44513af","src/test_data/iso_2022_jp_in_ref.txt":"3dc4e6a5e06471942d086b16c9440945e78415f6f3f47e43717e4bc2eac2cdf5","src/test_data/iso_2022_jp_out.txt":"9b6f015329dda6c3f9ee5ce6dbd6fa9c89acc21283e886836c78b8d833480c21","src/test_data/iso_2022_jp_out_ref.txt":"78cb260093a20116ad9a42f43b05d1848c5ab100b6b9a850749809e943884b35","src/test_data/jis0208_in.txt":"6df3030553ffb0a6615bb33dc8ea9dca6d9623a9028e2ffec754ce3c3da824cc","src/test_data/jis0208_in_ref.txt":"3dc4e6a5e06471942d086b16c9440945e78415f6f3f47e43717e4bc2eac2cdf5","src/test_data/jis0208_out.txt":"4ec24477e1675ce750733bdc3c5add1cd27b6bd4ce1f09289564646e9654e857","src/test_data/jis0208_out_ref.txt":"c3e1cef5032b2b1d93a406f31ff940c4e2dfe8859b8b17ca2761fee7a75a0e48","src/test_data/jis0212_in.txt":"c011f0dd72bd7c8cd922df9374ef8d2769a77190514c77f6c62b415852eeb9fe","src/test_data/jis0212_in_ref.txt":"7d9458b3d2f73e7092a7f505c08ce1d233dde18aa679fbcf9889256239cc9e06","src/test_data/shift_jis_in.txt":"02e389ccef0dd2122e63f503899402cb7f797912c2444cc80ab93131116c5524","src/test_data/shift_jis_in_ref.txt":"512f985950ca902e643c88682dba9708b7c38d3c5ec2925168ab00ac94ab19f9","src/test_data/shift_jis_out.txt":"5fbc44da7bf639bf6cfe0fa1fd3eba7102b88f81919c9ea991302712f69426fb","src/test_data/shift_jis_out_ref.txt":"466322c6fed8286c64582731755290c2296508efdd258826e6279686649b481f","src/test_labels_names.rs":"c962c7aeac3d9ef2aca70c9e21983b231d4cf998cb06879374b0401e5149d1da","src/testing.rs":"b299d27055f3b068de66cc10a75c024b881c48bc093627c01e0b1f8bd7d94666","src/utf_16.rs":"1ec4e1c8ed7e42e4de401c6d0f64c2835bd80c2a306f358959957d30e6ff1501","src/utf_8.rs":"f639fc5dccd5dcc2458936baa942237d0fd58ac398c83ea3f48e51dceb5b6a81","src/variant.rs":"619a8e604d2febe6a874e3ad73cddf3ef9e6011480aecf86f23708b313415251","src/x_user_defined.rs":"da51def859b870ced29cb87987f02d27b220eac0f222876cb72a1dc616f9d8ec"},"package":"0535f350c60aac0b87ccf28319abc749391e912192255b0c00a2c12c6917bd73"}
|
|
\ No newline at end of file
|
|
diff --git a/third_party/rust/encoding_rs/Cargo.toml b/third_party/rust/encoding_rs/Cargo.toml
|
|
index 65fc8e8dffcd..e29f19fb9afe 100644
|
|
--- a/third_party/rust/encoding_rs/Cargo.toml
|
|
+++ b/third_party/rust/encoding_rs/Cargo.toml
|
|
@@ -12,47 +12,47 @@
|
|
|
|
[package]
|
|
name = "encoding_rs"
|
|
-version = "0.8.14"
|
|
+version = "0.8.16"
|
|
authors = ["Henri Sivonen <hsivonen@hsivonen.fi>"]
|
|
description = "A Gecko-oriented implementation of the Encoding Standard"
|
|
homepage = "https://docs.rs/encoding_rs/"
|
|
documentation = "https://docs.rs/encoding_rs/"
|
|
readme = "README.md"
|
|
keywords = ["encoding", "web", "unicode", "charset"]
|
|
categories = ["text-processing", "encoding", "web-programming", "internationalization"]
|
|
license = "MIT/Apache-2.0"
|
|
repository = "https://github.com/hsivonen/encoding_rs"
|
|
[profile.release]
|
|
lto = true
|
|
[dependencies.cfg-if]
|
|
version = "0.1.0"
|
|
|
|
+[dependencies.packed_simd]
|
|
+version = "0.3.3"
|
|
+optional = true
|
|
+
|
|
[dependencies.serde]
|
|
version = "1.0"
|
|
optional = true
|
|
-
|
|
-[dependencies.simd]
|
|
-version = "0.2.3"
|
|
-optional = true
|
|
[dev-dependencies.bincode]
|
|
version = "0.8"
|
|
|
|
[dev-dependencies.serde_derive]
|
|
version = "1.0"
|
|
|
|
[dev-dependencies.serde_json]
|
|
version = "1.0"
|
|
|
|
[features]
|
|
fast-big5-hanzi-encode = []
|
|
fast-gb-hanzi-encode = []
|
|
fast-hangul-encode = []
|
|
fast-hanja-encode = []
|
|
fast-kanji-encode = []
|
|
fast-legacy-encode = ["fast-hangul-encode", "fast-hanja-encode", "fast-kanji-encode", "fast-gb-hanzi-encode", "fast-big5-hanzi-encode"]
|
|
less-slow-big5-hanzi-encode = []
|
|
less-slow-gb-hanzi-encode = []
|
|
less-slow-kanji-encode = []
|
|
-simd-accel = ["simd"]
|
|
+simd-accel = ["packed_simd", "packed_simd/into_bits"]
|
|
[badges.travis-ci]
|
|
repository = "hsivonen/encoding_rs"
|
|
diff --git a/third_party/rust/encoding_rs/README.md b/third_party/rust/encoding_rs/README.md
|
|
index 3446efd0bb43..8a72b515450e 100644
|
|
--- a/third_party/rust/encoding_rs/README.md
|
|
+++ b/third_party/rust/encoding_rs/README.md
|
|
@@ -126,17 +126,39 @@ There are currently these optional cargo features:
|
|
|
|
### `simd-accel`
|
|
|
|
-Enables SSE2 acceleration on x86 and x86_64 and NEON acceleration on Aarch64
|
|
-and ARMv7. _Enabling this cargo feature is recommended when building for x86,
|
|
-x86_64, ARMv7 or Aarch64._ The intention is for the functionality enabled by
|
|
-this feature to become the normal on-by-default behavior once
|
|
-[portable SIMD](https://github.com/rust-lang/rfcs/pull/2366) becames part of
|
|
-stable Rust.
|
|
-
|
|
-Enabling this feature breaks the build unless the target is x86 with SSE2
|
|
-(Rust's default 32-bit x86 target, `i686`, has SSE2, but Linux distros may
|
|
-use an x86 target without SSE2, i.e. `i586` in `rustup` terms), ARMv7 or
|
|
-thumbv7 with NEON (`-C target_feature=+neon`), x86_64 or Aarch64.
|
|
+Enables SIMD acceleration using the nightly-dependent `packed_simd` crate.
|
|
+
|
|
+This is an opt-in feature, because enabling this feature _opts out_ of Rust's
|
|
+guarantees of future compilers compiling old code (aka. "stability story").
|
|
+
|
|
+Currently, this has not been tested to be an improvement except for these
|
|
+targets:
|
|
+
|
|
+* x86_64
|
|
+* i686
|
|
+* aarch64
|
|
+* thumbv7neon
|
|
+
|
|
+If you use nightly Rust, you use targets whose first component is one of the
|
|
+above, and you are prepared _to have to revise your configuration when updating
|
|
+Rust_, you should enable this feature. Otherwise, please _do not_ enable this
|
|
+feature.
|
|
+
|
|
+_Note!_ If you are compiling for a target that does not have 128-bit SIMD
|
|
+enabled as part of the target definition and you are enabling 128-bit SIMD
|
|
+using `-C target_feature`, you need to enable the `core_arch` Cargo feature
|
|
+for `packed_simd` to compile a crates.io snapshot of `core_arch` instead of
|
|
+using the standard-library copy of `core::arch`, because the `core::arch`
|
|
+module of the pre-compiled standard library has been compiled with the
|
|
+assumption that the CPU doesn't have 128-bit SIMD. At present this applies
|
|
+mainly to 32-bit ARM targets whose first component does not include the
|
|
+substring `neon`.
|
|
+
|
|
+The encoding_rs side of things has not been properly set up for POWER,
|
|
+PowerPC, MIPS, etc., SIMD at this time, so even if you were to follow
|
|
+the advice from the previous paragraph, you probably shouldn't use
|
|
+the `simd-accel` option on the less mainstream architectures at this
|
|
+time.
|
|
|
|
Used by Firefox.
|
|
|
|
@@ -382,6 +404,14 @@ To regenerate the generated code:
|
|
|
|
## Release Notes
|
|
|
|
+### 0.8.16
|
|
+
|
|
+* Switch from the `simd` crate to `packed_simd`.
|
|
+
|
|
+### 0.8.15
|
|
+
|
|
+* Adjust documentation for `simd-accel` (README-only release).
|
|
+
|
|
### 0.8.14
|
|
|
|
* Made UTF-16 to UTF-8 encode conversion fill the output buffer as
|
|
diff --git a/third_party/rust/encoding_rs/build.rs b/third_party/rust/encoding_rs/build.rs
|
|
index 1b7adf780010..e687878081f7 100644
|
|
--- a/third_party/rust/encoding_rs/build.rs
|
|
+++ b/third_party/rust/encoding_rs/build.rs
|
|
@@ -1,4 +1,12 @@
|
|
fn main() {
|
|
+ // This does not enable `RUSTC_BOOTSTRAP=1` for `packed_simd`.
|
|
+ // You still need to knowingly have a setup that makes
|
|
+ // `packed_simd` compile. Therefore, having this file on
|
|
+ // crates.io is harmless in terms of users of `encoding_rs`
|
|
+ // accidentally depending on nightly features. Having this
|
|
+ // here means that if you knowingly want this, you only
|
|
+ // need to maintain a fork of `packed_simd` without _also_
|
|
+ // having to maintain a fork of `encoding_rs`.
|
|
#[cfg(feature = "simd-accel")]
|
|
println!("cargo:rustc-env=RUSTC_BOOTSTRAP=1");
|
|
}
|
|
diff --git a/third_party/rust/encoding_rs/src/handles.rs b/third_party/rust/encoding_rs/src/handles.rs
|
|
index d75b65d75ce3..08da62d20051 100644
|
|
--- a/third_party/rust/encoding_rs/src/handles.rs
|
|
+++ b/third_party/rust/encoding_rs/src/handles.rs
|
|
@@ -34,7 +34,7 @@ use simd_funcs::*;
|
|
all(target_endian = "little", target_feature = "neon")
|
|
)
|
|
))]
|
|
-use simd::u16x8;
|
|
+use packed_simd::u16x8;
|
|
|
|
use super::DecoderResult;
|
|
use super::EncoderResult;
|
|
diff --git a/third_party/rust/encoding_rs/src/lib.rs b/third_party/rust/encoding_rs/src/lib.rs
|
|
index 912c349a0e25..23069375d6f8 100644
|
|
--- a/third_party/rust/encoding_rs/src/lib.rs
|
|
+++ b/third_party/rust/encoding_rs/src/lib.rs
|
|
@@ -11,7 +11,7 @@
|
|
feature = "cargo-clippy",
|
|
allow(doc_markdown, inline_always, new_ret_no_self)
|
|
)]
|
|
-#![doc(html_root_url = "https://docs.rs/encoding_rs/0.8.14")]
|
|
+#![doc(html_root_url = "https://docs.rs/encoding_rs/0.8.16")]
|
|
|
|
//! encoding_rs is a Gecko-oriented Free Software / Open Source implementation
|
|
//! of the [Encoding Standard](https://encoding.spec.whatwg.org/) in Rust.
|
|
@@ -665,20 +665,21 @@
|
|
//! See the section [_UTF-16LE, UTF-16BE and Unicode Encoding Schemes_](#utf-16le-utf-16be-and-unicode-encoding-schemes)
|
|
//! for discussion about the UTF-16 family.
|
|
|
|
-#![cfg_attr(feature = "simd-accel", feature(platform_intrinsics, core_intrinsics))]
|
|
+#![cfg_attr(feature = "simd-accel", feature(stdsimd, core_intrinsics))]
|
|
|
|
#[macro_use]
|
|
extern crate cfg_if;
|
|
|
|
#[cfg(all(
|
|
feature = "simd-accel",
|
|
any(
|
|
target_feature = "sse2",
|
|
all(target_endian = "little", target_arch = "aarch64"),
|
|
all(target_endian = "little", target_feature = "neon")
|
|
)
|
|
))]
|
|
-extern crate simd;
|
|
+#[macro_use(shuffle)]
|
|
+extern crate packed_simd;
|
|
|
|
#[cfg(feature = "serde")]
|
|
extern crate serde;
|
|
diff --git a/third_party/rust/encoding_rs/src/mem.rs b/third_party/rust/encoding_rs/src/mem.rs
|
|
index 6cd1a4448056..c5ee605c1b13 100644
|
|
--- a/third_party/rust/encoding_rs/src/mem.rs
|
|
+++ b/third_party/rust/encoding_rs/src/mem.rs
|
|
@@ -228,8 +228,8 @@ macro_rules! by_unit_check_simd {
|
|
cfg_if! {
|
|
if #[cfg(all(feature = "simd-accel", any(target_feature = "sse2", all(target_endian = "little", target_arch = "aarch64"), all(target_endian = "little", target_feature = "neon"))))] {
|
|
use simd_funcs::*;
|
|
- use simd::u8x16;
|
|
- use simd::u16x8;
|
|
+ use packed_simd::u8x16;
|
|
+ use packed_simd::u16x8;
|
|
|
|
const SIMD_ALIGNMENT: usize = 16;
|
|
|
|
@@ -631,47 +631,42 @@ cfg_if! {
|
|
///
|
|
/// May read the entire buffer even if it isn't all-ASCII. (I.e. the function
|
|
/// is not guaranteed to fail fast.)
|
|
-#[inline]
|
|
pub fn is_ascii(buffer: &[u8]) -> bool {
|
|
is_ascii_impl(buffer)
|
|
}
|
|
|
|
/// Checks whether the buffer is all-Basic Latin (i.e. UTF-16 representing
|
|
/// only ASCII characters).
|
|
///
|
|
/// May read the entire buffer even if it isn't all-ASCII. (I.e. the function
|
|
/// is not guaranteed to fail fast.)
|
|
-#[inline]
|
|
pub fn is_basic_latin(buffer: &[u16]) -> bool {
|
|
is_basic_latin_impl(buffer)
|
|
}
|
|
|
|
/// Checks whether the buffer is valid UTF-8 representing only code points
|
|
/// less than or equal to U+00FF.
|
|
///
|
|
/// Fails fast. (I.e. returns before having read the whole buffer if UTF-8
|
|
/// invalidity or code points above U+00FF are discovered.
|
|
-#[inline]
|
|
pub fn is_utf8_latin1(buffer: &[u8]) -> bool {
|
|
is_utf8_latin1_impl(buffer).is_none()
|
|
}
|
|
|
|
/// Checks whether the buffer represents only code point less than or equal
|
|
/// to U+00FF.
|
|
///
|
|
/// Fails fast. (I.e. returns before having read the whole buffer if code
|
|
/// points above U+00FF are discovered.
|
|
-#[inline]
|
|
pub fn is_str_latin1(buffer: &str) -> bool {
|
|
is_str_latin1_impl(buffer).is_none()
|
|
}
|
|
|
|
/// Checks whether the buffer represents only code point less than or equal
|
|
/// to U+00FF.
|
|
///
|
|
/// May read the entire buffer even if it isn't all-Latin1. (I.e. the function
|
|
/// is not guaranteed to fail fast.)
|
|
-#[inline]
|
|
pub fn is_utf16_latin1(buffer: &[u16]) -> bool {
|
|
is_utf16_latin1_impl(buffer)
|
|
}
|
|
@@ -1283,7 +1278,6 @@ pub fn is_str_bidi(buffer: &str) -> bool {
|
|
/// high surrogate that could be the high half of an RTL character.
|
|
/// Returns `false` if the input contains neither RTL characters nor
|
|
/// unpaired high surrogates that could be higher halves of RTL characters.
|
|
-#[inline]
|
|
pub fn is_utf16_bidi(buffer: &[u16]) -> bool {
|
|
is_utf16_bidi_impl(buffer)
|
|
}
|
|
@@ -1416,67 +1410,63 @@ pub fn is_utf16_code_unit_bidi(u: u16) -> bool {
|
|
/// Returns `Latin1Bidi::Latin1` if `is_utf8_latin1()` would return `true`.
|
|
/// Otherwise, returns `Latin1Bidi::Bidi` if `is_utf8_bidi()` would return
|
|
/// `true`. Otherwise, returns `Latin1Bidi::LeftToRight`.
|
|
-#[inline]
|
|
pub fn check_utf8_for_latin1_and_bidi(buffer: &[u8]) -> Latin1Bidi {
|
|
if let Some(offset) = is_utf8_latin1_impl(buffer) {
|
|
if is_utf8_bidi(&buffer[offset..]) {
|
|
Latin1Bidi::Bidi
|
|
} else {
|
|
Latin1Bidi::LeftToRight
|
|
}
|
|
} else {
|
|
Latin1Bidi::Latin1
|
|
}
|
|
}
|
|
|
|
/// Checks whether a valid UTF-8 buffer contains code points
|
|
/// that trigger right-to-left processing or is all-Latin1.
|
|
///
|
|
/// Possibly more efficient than performing the checks separately.
|
|
///
|
|
/// Returns `Latin1Bidi::Latin1` if `is_str_latin1()` would return `true`.
|
|
/// Otherwise, returns `Latin1Bidi::Bidi` if `is_str_bidi()` would return
|
|
/// `true`. Otherwise, returns `Latin1Bidi::LeftToRight`.
|
|
-#[inline]
|
|
pub fn check_str_for_latin1_and_bidi(buffer: &str) -> Latin1Bidi {
|
|
// The transition from the latin1 check to the bidi check isn't
|
|
// optimal but not tweaking it to perfection today.
|
|
if let Some(offset) = is_str_latin1_impl(buffer) {
|
|
if is_str_bidi(&buffer[offset..]) {
|
|
Latin1Bidi::Bidi
|
|
} else {
|
|
Latin1Bidi::LeftToRight
|
|
}
|
|
} else {
|
|
Latin1Bidi::Latin1
|
|
}
|
|
}
|
|
|
|
/// Checks whether a potentially invalid UTF-16 buffer contains code points
|
|
/// that trigger right-to-left processing or is all-Latin1.
|
|
///
|
|
/// Possibly more efficient than performing the checks separately.
|
|
///
|
|
/// Returns `Latin1Bidi::Latin1` if `is_utf16_latin1()` would return `true`.
|
|
/// Otherwise, returns `Latin1Bidi::Bidi` if `is_utf16_bidi()` would return
|
|
/// `true`. Otherwise, returns `Latin1Bidi::LeftToRight`.
|
|
-#[inline]
|
|
pub fn check_utf16_for_latin1_and_bidi(buffer: &[u16]) -> Latin1Bidi {
|
|
check_utf16_for_latin1_and_bidi_impl(buffer)
|
|
}
|
|
|
|
/// Converts potentially-invalid UTF-8 to valid UTF-16 with errors replaced
|
|
/// with the REPLACEMENT CHARACTER.
|
|
///
|
|
/// The length of the destination buffer must be at least the length of the
|
|
/// source buffer _plus one_.
|
|
///
|
|
/// Returns the number of `u16`s written.
|
|
///
|
|
/// # Panics
|
|
///
|
|
/// Panics if the destination buffer is shorter than stated above.
|
|
-#[inline]
|
|
pub fn convert_utf8_to_utf16(src: &[u8], dst: &mut [u16]) -> usize {
|
|
// TODO: Can the requirement for dst to be at least one unit longer
|
|
// be eliminated?
|
|
@@ -1516,7 +1506,6 @@ pub fn convert_utf8_to_utf16(src: &[u8], dst: &mut [u16]) -> usize {
|
|
/// # Panics
|
|
///
|
|
/// Panics if the destination buffer is shorter than stated above.
|
|
-#[inline]
|
|
pub fn convert_str_to_utf16(src: &str, dst: &mut [u16]) -> usize {
|
|
assert!(
|
|
dst.len() >= src.len(),
|
|
@@ -1683,7 +1672,6 @@ pub fn convert_utf16_to_utf8(src: &[u16], dst: &mut [u8]) -> usize {
|
|
/// not allocating memory for the worst case up front. Specifically,
|
|
/// if the input starts with or ends with an unpaired surrogate, those are
|
|
/// replaced with the REPLACEMENT CHARACTER.
|
|
-#[inline]
|
|
pub fn convert_utf16_to_str_partial(src: &[u16], dst: &mut str) -> (usize, usize) {
|
|
let bytes: &mut [u8] = unsafe { dst.as_bytes_mut() };
|
|
let (read, written) = convert_utf16_to_utf8_partial(src, bytes);
|
|
@@ -1727,7 +1715,6 @@ pub fn convert_utf16_to_str(src: &[u16], dst: &mut str) -> usize {
|
|
/// # Panics
|
|
///
|
|
/// Panics if the destination buffer is shorter than stated above.
|
|
-#[inline]
|
|
pub fn convert_latin1_to_utf16(src: &[u8], dst: &mut [u16]) {
|
|
assert!(
|
|
dst.len() >= src.len(),
|
|
@@ -1755,7 +1742,6 @@ pub fn convert_latin1_to_utf16(src: &[u8], dst: &mut [u16]) {
|
|
/// indicated by the return value, so using a `&mut str` interpreted as
|
|
/// `&mut [u8]` as the destination is not safe. If you want to convert into
|
|
/// a `&mut str`, use `convert_utf16_to_str()` instead of this function.
|
|
-#[inline]
|
|
pub fn convert_latin1_to_utf8_partial(src: &[u8], dst: &mut [u8]) -> (usize, usize) {
|
|
let src_len = src.len();
|
|
let src_ptr = src.as_ptr();
|
|
@@ -1894,7 +1880,6 @@ pub fn convert_latin1_to_str(src: &[u8], dst: &mut str) -> usize {
|
|
///
|
|
/// If debug assertions are enabled (and not fuzzing) and the input is
|
|
/// not in the range U+0000 to U+00FF, inclusive.
|
|
-#[inline]
|
|
pub fn convert_utf8_to_latin1_lossy(src: &[u8], dst: &mut [u8]) -> usize {
|
|
assert!(
|
|
dst.len() >= src.len(),
|
|
@@ -1957,7 +1942,6 @@ pub fn convert_utf8_to_latin1_lossy(src: &[u8], dst: &mut [u8]) -> usize {
|
|
///
|
|
/// (Probably in future versions if debug assertions are enabled (and not
|
|
/// fuzzing) and the input is not in the range U+0000 to U+00FF, inclusive.)
|
|
-#[inline]
|
|
pub fn convert_utf16_to_latin1_lossy(src: &[u16], dst: &mut [u8]) {
|
|
assert!(
|
|
dst.len() >= src.len(),
|
|
@@ -2030,7 +2014,6 @@ pub fn encode_latin1_lossy<'a>(string: &'a str) -> Cow<'a, [u8]> {
|
|
|
|
/// Returns the index of the first unpaired surrogate or, if the input is
|
|
/// valid UTF-16 in its entirety, the length of the input.
|
|
-#[inline]
|
|
pub fn utf16_valid_up_to(buffer: &[u16]) -> usize {
|
|
utf16_valid_up_to_impl(buffer)
|
|
}
|
|
@@ -2060,61 +2043,58 @@ pub fn ensure_utf16_validity(buffer: &mut [u16]) {
|
|
/// # Panics
|
|
///
|
|
/// Panics if the destination buffer is shorter than stated above.
|
|
-#[inline]
|
|
pub fn copy_ascii_to_ascii(src: &[u8], dst: &mut [u8]) -> usize {
|
|
assert!(
|
|
dst.len() >= src.len(),
|
|
"Destination must not be shorter than the source."
|
|
);
|
|
if let Some((_, consumed)) =
|
|
unsafe { ascii_to_ascii(src.as_ptr(), dst.as_mut_ptr(), src.len()) }
|
|
{
|
|
consumed
|
|
} else {
|
|
src.len()
|
|
}
|
|
}
|
|
|
|
/// Copies ASCII from source to destination zero-extending it to UTF-16 up to
|
|
/// the first non-ASCII byte (or the end of the input if it is ASCII in its
|
|
/// entirety).
|
|
///
|
|
/// The length of the destination buffer must be at least the length of the
|
|
/// source buffer.
|
|
///
|
|
/// Returns the number of `u16`s written.
|
|
///
|
|
/// # Panics
|
|
///
|
|
/// Panics if the destination buffer is shorter than stated above.
|
|
-#[inline]
|
|
pub fn copy_ascii_to_basic_latin(src: &[u8], dst: &mut [u16]) -> usize {
|
|
assert!(
|
|
dst.len() >= src.len(),
|
|
"Destination must not be shorter than the source."
|
|
);
|
|
if let Some((_, consumed)) =
|
|
unsafe { ascii_to_basic_latin(src.as_ptr(), dst.as_mut_ptr(), src.len()) }
|
|
{
|
|
consumed
|
|
} else {
|
|
src.len()
|
|
}
|
|
}
|
|
|
|
/// Copies Basic Latin from source to destination narrowing it to ASCII up to
|
|
/// the first non-Basic Latin code unit (or the end of the input if it is
|
|
/// Basic Latin in its entirety).
|
|
///
|
|
/// The length of the destination buffer must be at least the length of the
|
|
/// source buffer.
|
|
///
|
|
/// Returns the number of bytes written.
|
|
///
|
|
/// # Panics
|
|
///
|
|
/// Panics if the destination buffer is shorter than stated above.
|
|
-#[inline]
|
|
pub fn copy_basic_latin_to_ascii(src: &[u16], dst: &mut [u8]) -> usize {
|
|
assert!(
|
|
dst.len() >= src.len(),
|
|
diff --git a/third_party/rust/encoding_rs/src/simd_funcs.rs b/third_party/rust/encoding_rs/src/simd_funcs.rs
|
|
index 0cc05baf784d..4e19b0e8a07e 100644
|
|
--- a/third_party/rust/encoding_rs/src/simd_funcs.rs
|
|
+++ b/third_party/rust/encoding_rs/src/simd_funcs.rs
|
|
@@ -7,9 +7,9 @@
|
|
// option. This file may not be copied, modified, or distributed
|
|
// except according to those terms.
|
|
|
|
-use simd::u16x8;
|
|
-use simd::u8x16;
|
|
-use simd::Simd;
|
|
+use packed_simd::u16x8;
|
|
+use packed_simd::u8x16;
|
|
+use packed_simd::FromBits;
|
|
|
|
// TODO: Migrate unaligned access to stdlib code if/when the RFC
|
|
// https://github.com/rust-lang/rfcs/pull/1725 is implemented.
|
|
@@ -62,81 +62,79 @@ pub unsafe fn store8_aligned(ptr: *mut u16, s: u16x8) {
|
|
*(ptr as *mut u16x8) = s;
|
|
}
|
|
|
|
-extern "platform-intrinsic" {
|
|
- fn simd_shuffle16<T: Simd, U: Simd<Elem = T::Elem>>(x: T, y: T, idx: [u32; 16]) -> U;
|
|
+cfg_if! {
|
|
+ if #[cfg(all(target_feature = "sse2", target_arch = "x86_64"))] {
|
|
+ use std::arch::x86_64::__m128i;
|
|
+ use std::arch::x86_64::_mm_movemask_epi8;
|
|
+ use std::arch::x86_64::_mm_packus_epi16;
|
|
+ } else if #[cfg(all(target_feature = "sse2", target_arch = "x86"))] {
|
|
+ use std::arch::x86::__m128i;
|
|
+ use std::arch::x86::_mm_movemask_epi8;
|
|
+ use std::arch::x86::_mm_packus_epi16;
|
|
+ } else if #[cfg(target_arch = "aarch64")]{
|
|
+ use std::arch::aarch64::uint8x16_t;
|
|
+ use std::arch::aarch64::uint16x8_t;
|
|
+ use std::arch::aarch64::vmaxvq_u8;
|
|
+ use std::arch::aarch64::vmaxvq_u16;
|
|
+ } else {
|
|
+
|
|
+ }
|
|
}
|
|
|
|
// #[inline(always)]
|
|
// fn simd_byte_swap_u8(s: u8x16) -> u8x16 {
|
|
// unsafe {
|
|
-// simd_shuffle16(s, s, [1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14])
|
|
+// shuffle!(s, s, [1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14])
|
|
// }
|
|
// }
|
|
|
|
// #[inline(always)]
|
|
// pub fn simd_byte_swap(s: u16x8) -> u16x8 {
|
|
// to_u16_lanes(simd_byte_swap_u8(to_u8_lanes(s)))
|
|
// }
|
|
|
|
#[inline(always)]
|
|
pub fn simd_byte_swap(s: u16x8) -> u16x8 {
|
|
let left = s << 8;
|
|
let right = s >> 8;
|
|
left | right
|
|
}
|
|
|
|
#[inline(always)]
|
|
pub fn to_u16_lanes(s: u8x16) -> u16x8 {
|
|
- unsafe { ::std::mem::transmute(s) }
|
|
+ u16x8::from_bits(s)
|
|
}
|
|
|
|
-// #[inline(always)]
|
|
-// pub fn to_u8_lanes(s: u16x8) -> u8x16 {
|
|
-// unsafe { ::std::mem::transmute(s) }
|
|
-// }
|
|
-
|
|
cfg_if! {
|
|
if #[cfg(target_feature = "sse2")] {
|
|
|
|
- use simd::i16x8;
|
|
- use simd::i8x16;
|
|
- extern "platform-intrinsic" {
|
|
- fn x86_mm_movemask_epi8(x: i8x16) -> i32;
|
|
- }
|
|
-
|
|
// Expose low-level mask instead of higher-level conclusion,
|
|
// because the non-ASCII case would perform less well otherwise.
|
|
#[inline(always)]
|
|
pub fn mask_ascii(s: u8x16) -> i32 {
|
|
unsafe {
|
|
- let signed: i8x16 = ::std::mem::transmute_copy(&s);
|
|
- x86_mm_movemask_epi8(signed)
|
|
+ _mm_movemask_epi8(__m128i::from_bits(s))
|
|
}
|
|
}
|
|
|
|
} else {
|
|
|
|
}
|
|
}
|
|
|
|
cfg_if! {
|
|
if #[cfg(target_feature = "sse2")] {
|
|
#[inline(always)]
|
|
pub fn simd_is_ascii(s: u8x16) -> bool {
|
|
unsafe {
|
|
- let signed: i8x16 = ::std::mem::transmute_copy(&s);
|
|
- x86_mm_movemask_epi8(signed) == 0
|
|
+ _mm_movemask_epi8(__m128i::from_bits(s)) == 0
|
|
}
|
|
}
|
|
} else if #[cfg(target_arch = "aarch64")]{
|
|
- extern "platform-intrinsic" {
|
|
- fn aarch64_vmaxvq_u8(x: u8x16) -> u8;
|
|
- }
|
|
-
|
|
#[inline(always)]
|
|
pub fn simd_is_ascii(s: u8x16) -> bool {
|
|
unsafe {
|
|
- aarch64_vmaxvq_u8(s) < 0x80
|
|
+ vmaxvq_u8(uint8x16_t::from_bits(s)) < 0x80
|
|
}
|
|
}
|
|
} else {
|
|
@@ -164,35 +162,31 @@ cfg_if! {
|
|
#[inline(always)]
|
|
pub fn simd_is_str_latin1(s: u8x16) -> bool {
|
|
unsafe {
|
|
- aarch64_vmaxvq_u8(s) < 0xC4
|
|
+ vmaxvq_u8(uint8x16_t::from_bits(s)) < 0xC4
|
|
}
|
|
}
|
|
} else {
|
|
#[inline(always)]
|
|
pub fn simd_is_str_latin1(s: u8x16) -> bool {
|
|
let above_str_latin1 = u8x16::splat(0xC4);
|
|
s.lt(above_str_latin1).all()
|
|
}
|
|
}
|
|
}
|
|
|
|
cfg_if! {
|
|
if #[cfg(target_arch = "aarch64")]{
|
|
- extern "platform-intrinsic" {
|
|
- fn aarch64_vmaxvq_u16(x: u16x8) -> u16;
|
|
- }
|
|
-
|
|
#[inline(always)]
|
|
pub fn simd_is_basic_latin(s: u16x8) -> bool {
|
|
unsafe {
|
|
- aarch64_vmaxvq_u16(s) < 0x80
|
|
+ vmaxvq_u16(uint16x8_t::from_bits(s)) < 0x80
|
|
}
|
|
}
|
|
|
|
#[inline(always)]
|
|
pub fn simd_is_latin1(s: u16x8) -> bool {
|
|
unsafe {
|
|
- aarch64_vmaxvq_u16(s) < 0x100
|
|
+ vmaxvq_u16(uint16x8_t::from_bits(s)) < 0x100
|
|
}
|
|
}
|
|
} else {
|
|
@@ -225,7 +219,7 @@ cfg_if! {
|
|
macro_rules! aarch64_return_false_if_below_hebrew {
|
|
($s:ident) => ({
|
|
unsafe {
|
|
- if aarch64_vmaxvq_u16($s) < 0x0590 {
|
|
+ if vmaxvq_u16(uint16x8_t::from_bits($s)) < 0x0590 {
|
|
return false;
|
|
}
|
|
}
|
|
@@ -292,47 +286,38 @@ pub fn is_u16x8_bidi(s: u16x8) -> bool {
|
|
#[inline(always)]
|
|
pub fn simd_unpack(s: u8x16) -> (u16x8, u16x8) {
|
|
unsafe {
|
|
- let first: u8x16 = simd_shuffle16(
|
|
+ let first: u8x16 = shuffle!(
|
|
s,
|
|
u8x16::splat(0),
|
|
- [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23],
|
|
+ [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23]
|
|
);
|
|
- let second: u8x16 = simd_shuffle16(
|
|
+ let second: u8x16 = shuffle!(
|
|
s,
|
|
u8x16::splat(0),
|
|
- [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31],
|
|
+ [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31]
|
|
);
|
|
- (
|
|
- ::std::mem::transmute_copy(&first),
|
|
- ::std::mem::transmute_copy(&second),
|
|
- )
|
|
+ (u16x8::from_bits(first), u16x8::from_bits(second))
|
|
}
|
|
}
|
|
|
|
cfg_if! {
|
|
if #[cfg(target_feature = "sse2")] {
|
|
- extern "platform-intrinsic" {
|
|
- fn x86_mm_packus_epi16(x: i16x8, y: i16x8) -> u8x16;
|
|
- }
|
|
-
|
|
#[inline(always)]
|
|
pub fn simd_pack(a: u16x8, b: u16x8) -> u8x16 {
|
|
unsafe {
|
|
- let first: i16x8 = ::std::mem::transmute_copy(&a);
|
|
- let second: i16x8 = ::std::mem::transmute_copy(&b);
|
|
- x86_mm_packus_epi16(first, second)
|
|
+ u8x16::from_bits(_mm_packus_epi16(__m128i::from_bits(a), __m128i::from_bits(b)))
|
|
}
|
|
}
|
|
} else {
|
|
#[inline(always)]
|
|
pub fn simd_pack(a: u16x8, b: u16x8) -> u8x16 {
|
|
unsafe {
|
|
- let first: u8x16 = ::std::mem::transmute_copy(&a);
|
|
- let second: u8x16 = ::std::mem::transmute_copy(&b);
|
|
- simd_shuffle16(
|
|
+ let first = u8x16::from_bits(a);
|
|
+ let second = u8x16::from_bits(b);
|
|
+ shuffle!(
|
|
first,
|
|
second,
|
|
- [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30],
|
|
+ [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]
|
|
)
|
|
}
|
|
}
|
|
diff --git a/third_party/rust/encoding_rs/src/x_user_defined.rs b/third_party/rust/encoding_rs/src/x_user_defined.rs
|
|
index 6d0d613fa093..2d2076987984 100644
|
|
--- a/third_party/rust/encoding_rs/src/x_user_defined.rs
|
|
+++ b/third_party/rust/encoding_rs/src/x_user_defined.rs
|
|
@@ -14,15 +14,12 @@ use variant::*;
|
|
cfg_if! {
|
|
if #[cfg(feature = "simd-accel")] {
|
|
use simd_funcs::*;
|
|
- use simd::u16x8;
|
|
+ use packed_simd::u16x8;
|
|
|
|
#[inline(always)]
|
|
fn shift_upper(unpacked: u16x8) -> u16x8 {
|
|
let highest_ascii = u16x8::splat(0x7F);
|
|
- let offset = u16x8::splat(0xF700);
|
|
- let mask = unpacked.gt(highest_ascii).to_repr().to_u16();
|
|
- unpacked + (offset & mask)
|
|
- }
|
|
+ unpacked + unpacked.gt(highest_ascii).select(u16x8::splat(0xF700), u16x8::splat(0)) }
|
|
} else {
|
|
}
|
|
}
|
|
diff --git a/third_party/rust/packed_simd/.appveyor.yml b/third_party/rust/packed_simd/.appveyor.yml
|
|
new file mode 100644
|
|
index 000000000000..0388cee0a07b
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/.appveyor.yml
|
|
@@ -0,0 +1,59 @@
|
|
+matrix:
|
|
+ allow_failures:
|
|
+ # FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/72
|
|
+ - TARGET: i686-pc-windows-msvc
|
|
+ - TARGET: i686-pc-windows-gnu
|
|
+ - TARGET: x86_64-pc-windows-gnu
|
|
+ fast_finish: true
|
|
+
|
|
+environment:
|
|
+ matrix:
|
|
+ - TARGET: x86_64-pc-windows-msvc
|
|
+ MSYSTEM: MINGW64
|
|
+ NOVERIFY: "1"
|
|
+ - TARGET: x86_64-pc-windows-msvc
|
|
+ MSYSTEM: MINGW64
|
|
+ RUSTFLAGS: "-C target-feature=+sse4.2"
|
|
+ NOVERIFY: "1"
|
|
+ - TARGET: x86_64-pc-windows-msvc
|
|
+ MSYSTEM: MINGW64
|
|
+ RUSTFLAGS: "-C target-feature=+avx"
|
|
+ NOVERIFY: "1"
|
|
+ - TARGET: x86_64-pc-windows-msvc
|
|
+ MSYSTEM: MINGW64
|
|
+ RUSTFLAGS: "-C target-feature=+avx2"
|
|
+ NOVERIFY: "1"
|
|
+
|
|
+ - TARGET: i686-pc-windows-msvc
|
|
+ MSYSTEM: MINGW32
|
|
+ NOVERIFY: "1"
|
|
+ - TARGET: i686-pc-windows-msvc
|
|
+ MSYSTEM: MINGW32
|
|
+ RUSTFLAGS: "-C target-feature=+sse4.2"
|
|
+ NOVERIFY: "1"
|
|
+ - TARGET: i686-pc-windows-msvc
|
|
+ MSYSTEM: MINGW32
|
|
+ RUSTFLAGS: "-C target-feature=+avx"
|
|
+ NOVERIFY: "1"
|
|
+ - TARGET: i686-pc-windows-msvc
|
|
+ MSYSTEM: MINGW32
|
|
+ RUSTFLAGS: "-C target-feature=+avx2"
|
|
+ NOVERIFY: "1"
|
|
+
|
|
+ - TARGET: x86_64-pc-windows-gnu
|
|
+ MSYSTEM: MINGW64
|
|
+
|
|
+ - TARGET: i686-pc-windows-gnu
|
|
+ MSYSTEM: MINGW32
|
|
+ - TARGET: x86_64-pc-windows-gnu
|
|
+ MSYSTEM: MINGW64
|
|
+install:
|
|
+ - ps: if (ls -r . -fi "*.rs" | sls "`t") { throw "Found tab character" }
|
|
+ - ps: Start-FileDownload "https://static.rust-lang.org/dist/rust-nightly-${env:TARGET}.exe" -FileName "rust-install.exe"
|
|
+ - ps: .\rust-install.exe /VERYSILENT /NORESTART /DIR="C:\rust" | Out-Null
|
|
+ - ps: $env:PATH="$env:PATH;C:\rust\bin"
|
|
+ - set PATH=c:\msys64\%MSYSTEM%\bin;c:\msys64\usr\bin;%PATH%
|
|
+ - rustc -vV
|
|
+ - cargo -vV
|
|
+build: false
|
|
+test_script: bash -c "ci/run.sh"
|
|
diff --git a/third_party/rust/packed_simd/.cargo-checksum.json b/third_party/rust/packed_simd/.cargo-checksum.json
|
|
new file mode 100644
|
|
index 000000000000..01afcc1efdac
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/.cargo-checksum.json
|
|
@@ -0,0 +1 @@
|
|
+{"files":{".appveyor.yml":"f1ed01850e0d725f9498f52a1a63ddf40702ad6e0bf5b2d7c4c04d76e96794a3",".travis.yml":"e9258d9a54fdaf4cbc12405fe5993ac4497eb2b29021691dbc91b19cb9b52227","Cargo.toml":"089941ba3c89ea111cbea3cc3abdcdcf2b9d0ae0db268d7269ee38226db950e5","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"6485b8ed310d3f0340bf1ad1f47645069ce4069dcc6bb46c7d5c6faf41de1fdb","bors.toml":"dee881dc69b9b7834e4eba5d95c3ed5a416d4628815a167d6a22d4cb4fb064b8","build.rs":"f3baefc5e5bb9b250e762a1466371b922fd7ee4243c217b2d014307603c2f57a","ci/all.sh":"a23d14e10cb26a0eb719e389c30eb955fa53cddcd436890646df09af640bd2eb","ci/android-install-ndk.sh":"0f1746108cc30bf9b9ba45bcde7b19fc1a8bdf5b0258035b4eb8dc69b75efac4","ci/android-install-sdk.sh":"3490432022c5c8f5a115c084f7a9aca1626f96c0c87ffb62019228c4346b47e4","ci/android-sysimage.sh":"ebf4e5daa1f0fe1b2092b79f0f3f161c4c4275cb744e52352c4d81ab451e4c5a","ci/benchmark.sh":"b61d19ef6b90deba8fb79dee74c8b062d94844676293da346da87bb78a9a49a4","ci/deploy_and_run_on_ios_simulator.rs":"ec8ecf82d92072676aa47f0d1a3d021b60a7ae3531153ef12d2ff4541fc294dc","ci/docker/aarch64-linux-android/Dockerfile":"ace2e7d33c87bc0f6d3962a4a3408c04557646f7f51ab99cfbf574906796b016","ci/docker/aarch64-unknown-linux-gnu/Dockerfile":"1ecdac757101d951794fb2ab0deaa278199cf25f2e08a15c7d40ff31a8556184","ci/docker/arm-linux-androideabi/Dockerfile":"370e55d3330a413a3ccf677b3afb3e0ef9018a5fab263faa97ae8ac017fc2286","ci/docker/arm-unknown-linux-gnueabi/Dockerfile":"e25d88f6c0c94aada3d2e3f08243f755feb7e869dc5dc505b3799719cb1af591","ci/docker/arm-unknown-linux-gnueabihf/Dockerfile":"f126f4c7bae8c11ab8b16df06ad997863f0838825a9c08c9899a3eedb6d570bd","ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile":"b647545c158ee480a4c581dbdc1f57833aef056c8d498acc04b573e842bf803c","ci/docker/i586-unknown-linux-gnu/Dockerfile":"0d492759017307ccf74dc2aa4a8cf6623daf3dc728c708dc2b18fa7940800cba","ci/docker/i686-unknown-linux-gnu/Dockerfile":"0d492759017307ccf74dc2aa4a8cf6623daf3dc728c708dc2b18fa7940800cba","ci/docker/mips-unknown-linux-gnu/Dockerfile":"323776469bb7b160385f3621d66e3ee14c75242f8180f916e65af048a29d4ea0","ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile":"c647f6948a9a43b0be695cbed4eac752120d0faf28e5e69c718cb10406921dab","ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile":"77bfd00cc8639509be381b394f077e39b45a00158ad61b4e1656714c714665d1","ci/docker/mipsel-unknown-linux-musl/Dockerfile":"ec5bea6c98a3b626731fdb95f9ff2d1182639c76e8fb16d3271d0fc884901524","ci/docker/powerpc-unknown-linux-gnu/Dockerfile":"4f2b662de66e83d1354f650b7077692309637f786c2ea5516c31b5c2ee10af2d","ci/docker/powerpc64-unknown-linux-gnu/Dockerfile":"a9595402b772bc365982e22a0096a8988825d90b09b5faa97ab192e76072f71d","ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile":"df3c381c157439695ae8cd10ab71664702c061e3b4ab22906a5ad6c2680acfed","ci/docker/s390x-unknown-linux-gnu/Dockerfile":"93fb44df3d7fd31ead158570667c97b5076a05c3d968af4a84bc13819a8f2db8","ci/docker/sparc64-unknown-linux-gnu/Dockerfile":"da1c39a3ff1fe22e41395fa7c8934e90b4c1788e551b9aec6e38bfd94effc437","ci/docker/thumbv7neon-linux-androideabi/Dockerfile":"c2decd5591bd7a09378901bef629cd944acf052eb55e4f35b79eb9cb4d62246a","ci/docker/thumbv7neon-unknown-linux-gnueabihf/Dockerfile":"75c0c56161c7382b439de74c00de1c0e3dc9d59560cd6720976a751034b78714","ci/docker/wasm32-unknown-unknown/Dockerfile":"3e5f294bc1e004aa599086c2af49d6f3e7459fa250f5fbdd60cf67d53db78758","ci/docker/x86_64-linux-android/Dockerfile":"685040273cf350d5509e580ac451555efa19790c8723ca2af066adadc6880ad2","ci/docker/x86_64-unknown-linux-gnu-emulated/Dockerfile":"44b6203d9290bfdc53d81219f0937e1110847a23dd982ec8c4de388354f01536","ci/docker/x86_64-unknown-linux-gnu/Dockerfile":"d253c86803b22da428fa9cc671a05f18d3318eca7733b8dccb4f7be1ddf524c5","ci/dox.sh":"5b61711be47a4e3dde0ddd15ba73d256ea95fd75af3897732c24db1dc7e66366","ci/linux-s390x.sh":"d6b732d7795b4ba131326aff893bca6228a7d2eb0e9402f135705413dbbe0dce","ci/linux-sparc64.sh":"c92966838b1ab7ad3b7a344833ee726aba6b647cf5952e56f0ad1ba420b13325","ci/lld-shim.rs":"3d7f71ec23a49e2b67f694a0168786f9a954dda15f5a138815d966643fd3fcc3","ci/max_line_width.sh":"0a1518bba4c9ecaa55694cb2e9930d0e19c265baabf73143f17f9cf285aaa5bb","ci/run-docker.sh":"92e036390ad9b0d16f109579df1b5ced2e72e9afea40c7d011400ebd3a2a90de","ci/run.sh":"63259e22a96ba539f53c06b1b39f53e3a78a71171652e7afc170836110ccd913","ci/run_examples.sh":"d1a23c6c35374a0678ba5114b9b8fefd8be0a79e774872a8bf0898d1baca18d0","ci/runtest-android.rs":"145a8e9799a5223975061fe7e586ade5669ee4877a7d7a4cf6b4ab48e8e36c7c","ci/setup_benchmarks.sh":"73fb981a8fdb1dcd54409d3c0fbbfb8f77a3ceabf8626a6b9bf9d21d6bc8ce72","ci/test-runner-linux":"c8aa6025cff5306f4f31d0c61dc5f9d4dd5a1d189ab613ef8d4c367c694d9ccd","contributing.md":"2cc8c9c560ae17867e69b06d09b758dbf7bc39eb774ada50a743724b10acc0a2","perf-guide/.gitignore":"fe82c7da551079d832cf74200b0b359b4df9828cb4a0416fa7384f07a2ae6a13","perf-guide/book.toml":"115a98284126c6b180178b44713314cc494f08a71662ee2ce15cf67f17a51064","perf-guide/src/SUMMARY.md":"3e03bffc991fdc2050f3d51842d72d9d21ea6abab56a3baf3b2d5973a78b89e1","perf-guide/src/ascii.css":"29afb08833b2fe2250f0412e1fa1161a2432a0820a14953c87124407417c741a","perf-guide/src/bound_checks.md":"5e4991ff58a183ef0cd9fdc1feb4cd12d083b44bdf87393bbb0927808ef3ce7d","perf-guide/src/float-math/approx.md":"8c09032fa2d795a0c5db1775826c850d28eb2627846d0965c60ee72de63735ad","perf-guide/src/float-math/fma.md":"311076ba4b741d604a82e74b83a8d7e8c318fcbd7f64c4392d1cf5af95c60243","perf-guide/src/float-math/fp.md":"04153e775ab6e4f0d7837bcc515230d327b04edfa34c84ce9c9e10ebaeef2be8","perf-guide/src/float-math/svml.md":"0798873b8eedaeda5fed62dc91645b57c20775a02d3cd74d8bd06958f1516506","perf-guide/src/introduction.md":"9f5a19e9e6751f25d2daad39891a0cc600974527ec4c8305843f9618910671bd","perf-guide/src/prof/linux.md":"447731eb5de7d69166728fdbc5ecb0c0c9db678ea493b45a592d67dd002184c0","perf-guide/src/prof/mca.md":"f56d54f3d20e7aa4d32052186e8237b03d65971eb5d112802b442570ff11d344","perf-guide/src/prof/profiling.md":"8a650c0fd6ede0964789bb6577557eeef1d8226a896788602ce61528e260e43c","perf-guide/src/target-feature/attribute.md":"615f88dca0a707b6c416fa605435dd6e1fb5361cc639429cbf68cd87624bd78b","perf-guide/src/target-feature/features.md":"17077760ff24c006b606dd21889c53d87228f4311f3ba3a574f9afdeacd86165","perf-guide/src/target-feature/inlining.md":"7ed1d7068d8173a00d84c16cfe5871cd68b9f04f8d0cca2d01ebc84957ebf2f6","perf-guide/src/target-feature/practice.md":"c4b371842e0086df178488fec97f20def8f0c62ee588bcd25fd948b9b1fa227e","perf-guide/src/target-feature/runtime.md":"835425f5ee597fb3e51d36e725a81ebee29f4561231d19563cd4da81dbb1cfcb","perf-guide/src/target-feature/rustflags.md":"ab49712e9293a65d74d540ba4784fcb57ff1119ec05a575d895c071f1a620f64","perf-guide/src/vert-hor-ops.md":"c6211c0ee91e60552ec592d89d9d957eedc21dee3cbd89e1ad6765ea06a27471","readme.md":"585a8f0e16877fb9abb00cd17a175fcb9d7857840c6c61209f1827ffab095070","rustfmt.toml":"de6101d0670bad65fb3b337d56957d2a024e017e5ab146ec784d77312daaf8ff","src/api.rs":"331a3a4abb19cee2df5f2df4ad7c3e88b45e62cf23fdacfc9bbaa633dc5cf788","src/api/bit_manip.rs":"e68290ee679cc5abc9c73afbe635c1035f8cbfe849e5c751a1680e459244c39e","src/api/cast.rs":"03b94a3d316ac7b7be7068810044911e965e889a0ace7bae762749ca74a92747","src/api/cast/macros.rs":"b0a14d0c83ad2ebb7a275180f6d9e3f2bc312ba57a7d3d6c39fad4e0f20f9408","src/api/cast/v128.rs":"63e28c6a3edf1a7a635f51b8d3c6adbb1d46f884d92a196b3d4a6e743d809416","src/api/cast/v16.rs":"2a584eeb57fd47baad6f3533764301b04aaaac23702b7a8db12598ac02899262","src/api/cast/v256.rs":"b91c15ed8d1536ecd97b4eb79ff9d5aba0552cd9b6f0ea6435b05f2273e23b3a","src/api/cast/v32.rs":"62ec89fcce7fa7f28497ee5770adc8f81d2d3a6b2925b02f7dc06504c40e8f38","src/api/cast/v512.rs":"d855cb943ae7106e9599ef38e30a3afb1c6bd5433178baca54cb128fd9a7d143","src/api/cast/v64.rs":"fe0f7dfaf4fc0c0c1a78c96fcfcdfdc2a1e2845843b11aa797a0c6fb52a8f774","src/api/cmp.rs":"357c3a2a09c6d4611c32dd7fa95be2fae933d513e229026ec9b44451a77b884e","src/api/cmp/eq.rs":"60f70f355bae4cb5b17db53204cacc3890f70670611c17df638d4c04f7cc8075","src/api/cmp/ord.rs":"589f7234761c294fa5df8f525bc4acd5a47cdb602207d524a0d4e19804cd9695","src/api/cmp/partial_eq.rs":"3ed23d2a930b0f9750c3a5309da766b03dc4f9c4d375b42ad3c50fe732693d15","src/api/cmp/partial_ord.rs":"e16b11805c94048acd058c93994b5bc74bb187f8d7e3b86a87df60e1601467f9","src/api/cmp/vertical.rs":"de3d62f38eba817299aa16f1e1939954c9a447e316509397465c2830852ba053","src/api/default.rs":"b61f92fc0e33a2633b3375eb405beba480da071cde03df4d437d8a6058afcd97","src/api/fmt.rs":"67fb804bb86b6cd77cf8cd492b5733ce437071b66fe3297278b8a6552c325dda","src/api/fmt/binary.rs":"35cb5c266197d6224d598fb3d286e5fe48ef0c01ed356c2ff6fe9ba946f96a92","src/api/fmt/debug.rs":"aa18eea443bf353fea3db8b1a025132bbcaf91e747ecfa43b8d9fce9af395a0c","src/api/fmt/lower_hex.rs":"69d5be366631af309f214e8031c8c20267fcc27a695eac6f45c6bc1df72a67e6","src/api/fmt/octal.rs":"9eb11ba3d990213f3c7f1ec25edba7ce997cb1320e16d308c83498ba6b9bfbd9","src/api/fmt/upper_hex.rs":"a4637d085b7bb20e759ce58e08435b510a563ba3dd468af2b03560fdc5511562","src/api/from.rs":"2e599d8329cb05eaf06224cc441355c4b7b51254fc19256619333be8c149d444","src/api/from/from_array.rs":"4151593c7bba7455821fffa5b59867005a77c95d32f1f0cc3fd87294000157d9","src/api/from/from_vector.rs":"9764371aa9e6005aace74dea14f59e5611a095b7cf42707940924749282c52f0","src/api/hash.rs":"562cfa3f1d8eb9a733c035a3665a599c2f1e341ee820d8fbdd102a4398a441bc","src/api/into_bits.rs":"82297f0697d67b5a015e904e7e6e7b2a7066ba825bc54b94b4ff3e22d7a1eefb","src/api/into_bits/arch_specific.rs":"1f925390b0ce7132587d95f2419c6e2ad3e1a9d17eb1d9c120a1c1c4bdf4277e","src/api/into_bits/macros.rs":"d762406de25aedff88d460dec7a80dc8e825a2a419d53218ce007efa6a1d3e04","src/api/into_bits/v128.rs":"ecdc5893664c71d7ab1ff3697c3fbe490d20d8748b9b76881d05e7625e40d74c","src/api/into_bits/v16.rs":"5459ec7dad1ad7bd30dc7e48374580b993abf23701d9c3cb22203fa0a9aabb6d","src/api/into_bits/v256.rs":"90ea351da0380ead1bf0f63b620afd40d01d638d09f7e7be31840bd2c1d9c663","src/api/into_bits/v32.rs":"ee1dc5a430050e16f51154b5fe85b1536f5feddf2ea23dd1d3859b67c4afc6fc","src/api/into_bits/v512.rs":"f72098ed1c9a23944f3d01abaf5e0f2d0e81d35a06fdadd2183e896d41b59867","src/api/into_bits/v64.rs":"6394462facdfe7827349c742b7801f1291e75a720dfb8c0b52100df46f371c98","src/api/math.rs":"8b2a2fc651917a850539f993aa0b9e5bf4da67b11685285b8de8cdca311719ec","src/api/math/float.rs":"61d2794d68262a1090ae473bd30793b5f65cf732f32a6694a3af2ce5d9225616","src/api/math/float/abs.rs":"5b6b2701e2e11135b7ce58a05052ea8120e10e4702c95d046b9d21b827b26bf8","src/api/math/float/consts.rs":"78acba000d3fa527111300b6327c1932de9c4c1e02d4174e1a5615c01463d38c","src/api/math/float/cos.rs":"4c2dd7173728ef189314f1576c9486e03be21b7da98843b2f9011282a7979e31","src/api/math/float/exp.rs":"7c6d5f1e304f498a01cfa23b92380c815d7da0ad94eae3483783bc377d287eef","src/api/math/float/ln.rs":"54c7583f3df793b39ff57534fade27b41bb992439e5dc178252f5ca3190a3e54","src/api/math/float/mul_add.rs":"62cac77660d20159276d4c9ef066eb90c81cbddb808e8e157182c607625ad2eb","src/api/math/float/mul_adde.rs":"bae056ee9f3a70df39ec3c3b2f6437c65303888a7b843ef1a5bcf1f5aca0e602","src/api/math/float/powf.rs":"9ddb938984b36d39d82a82f862f80df8f7fb013f1d222d45698d41d88472f568","src/api/math/float/recpre.rs":"589225794ff1dbf31158dff660e6d4509ecc8befbb57c633900dea5ac0b840d6","src/api/math/float/rsqrte.rs":"a32abdcc318d7ccc8448231f54d75b884b7cbeb03a7d595713ab6243036f4dbf","src/api/math/float/sin.rs":"cbd3622b7df74f19691743001c8cf747a201f8977ad90542fee915f37dcd1e49","src/api/math/float/sqrt.rs":"0c66d5d63fb08e4d99c6b82a8828e41173aff1ac9fa1a2764a11fac217ccf2ac","src/api/math/float/sqrte.rs":"731e1c9f321b662accdd27dacb3aac2e8043b7aecb2f2161dde733bd9f025362","src/api/minimal.rs":"1f22bcc528555444e76de569ec0ae2029b9ae9d04805efeafa93369c8098036b","src/api/minimal/iuf.rs":"c501a6696950cf5e521765f178de548af64fdfb6e10d026616d09fab93ca2d17","src/api/minimal/mask.rs":"42e415f536c5193d0218f5a754b34b87fd7c971bff068009f958712166ff056d","src/api/minimal/ptr.rs":"a9ee482d1dd1c956fb8f3f179e6e620b1de4e9d713961461d4c6923a4ef2e67c","src/api/ops.rs":"3e273b277a0f3019d42c3c59ca94a5afd4885d5ae6d2182e5089bbeec9de42ee","src/api/ops/scalar_arithmetic.rs":"d2d5ad897a59dd0787544f927e0e7ca4072c3e58b0f4a2324083312b0d5a21d7","src/api/ops/scalar_bitwise.rs":"482204e459ca6be79568e1c9f70adbe2d2151412ddf122fb2161be8ebb51c40c","src/api/ops/scalar_mask_bitwise.rs":"c250f52042e37b22d57256c80d4604104cfd2fbe2a2e127c676267270ca5d350","src/api/ops/scalar_shifts.rs":"987f8fdebeedc16e3d77c1b732e7826ef70633c541d16dfa290845d5c6289150","src/api/ops/vector_arithmetic.rs":"ddca15d09ddeef502c2ed66117a62300ca65d87e959e8b622d767bdf1c307910","src/api/ops/vector_bitwise.rs":"b3968f7005b649edcc22a54e2379b14d5ee19045f2e784029805781ae043b5ee","src/api/ops/vector_float_min_max.rs":"f5155dce75219f4ba11275b1f295d2fdcddd49d174a6f1fb2ace7ea42813ce41","src/api/ops/vector_int_min_max.rs":"a378789c6ff9b32a51fbd0a97ffd36ed102cd1fe6a067d2b02017c1df342def6","src/api/ops/vector_mask_bitwise.rs":"5052d18517d765415d40327e6e8e55a312daaca0a5e2aec959bfa54b1675f9c8","src/api/ops/vector_neg.rs":"5c62f6b0221983cdbd23cd0a3af3672e6ba1255f0dfe8b19aae6fbd6503e231b","src/api/ops/vector_rotates.rs":"03cbe8a400fd7c688e4ee771a990a6754f2031b1a59b19ae81158b21471167e5","src/api/ops/vector_shifts.rs":"9bf69d0087268f61009e39aea52e03a90f378910206b6a28e8393178b6a5d0e0","src/api/ptr.rs":"8a793251bed6130dcfb2f1519ceaa18b751bbb15875928d0fb6deb5a5e07523a","src/api/ptr/gather_scatter.rs":"9ddd960365e050674b25b2fd3116e24d94669b4375d74e71c03e3f1469576066","src/api/reductions.rs":"ae5baca81352ecd44526d6c30c0a1feeda475ec73ddd3c3ec6b14e944e5448ee","src/api/reductions/bitwise.rs":"8bf910ae226188bd15fc7e125f058cd2566b6186fcd0cd8fd020f352c39ce139","src/api/reductions/float_arithmetic.rs":"e58c8c87806a95df2b2b5b48ac5991036df024096d9d7c171a480fe9282896a4","src/api/reductions/integer_arithmetic.rs":"47471da1c5f859489680bb5d34ced3d3aa20081c16053a3af121a4496fcb57bf","src/api/reductions/mask.rs":"db83327a950e33a317f37fd33ca4e20c347fb415975ec024f3e23da8509425af","src/api/reductions/min_max.rs":"f27be3aa28e1c1f46de7890198db6e12f00c207085e89ef2de7e57ee443cdb98","src/api/select.rs":"a98e2ccf9fc6bdeed32d337c8675bc96c2fbe2cc34fbf149ad6047fb8e749774","src/api/shuffle.rs":"da58200790868c09659819322a489929a5b6e56c596ed07e6a44293ea02e7d09","src/api/shuffle1_dyn.rs":"bfea5a91905b31444e9ef7ca6eddb7a9606b7e22d3f71bb842eb2795a0346620","src/api/slice.rs":"ee87484e8af329547b9a5d4f2a69e8bed6ea10bbd96270d706083843d4eea2ac","src/api/slice/from_slice.rs":"4d4fe8a329c885fcb4fbcbedf99efb15a95296fe6b3f595056cc37037450d5ac","src/api/slice/write_to_slice.rs":"f5b23b2c4b91cfb26b713a9013a6c0da7f45eaefb79ba06dcbc27f3f23bda679","src/api/swap_bytes.rs":"4a6792a2e49a77475e1b237592b4b2804dbddb79c474331acd0dd71b36934259","src/codegen.rs":"c6eebc3d3665420aa6a2f317977e3c41a4f43e0550ac630cdbe8e4bbed5e2031","src/codegen/bit_manip.rs":"5559e095105a80003e0de35af1d19b0c65c9ab04eb743c7e01c5442d882eb34e","src/codegen/llvm.rs":"d1299c189abb17a6133f047574cffc7a6db4c1be37cb7d4785491cb5e8f8cf54","src/codegen/math.rs":"35f96e37a78fcf0cdb02146b7f27a45108fe06a37fc2a54d8851ce131a326178","src/codegen/math/float.rs":"dd86c0449e576c83b719700962ac017c332987fac08d91f2b7a2b1b883598170","src/codegen/math/float/abs.rs":"f56e2b4b8055ea861c1f5cbc6b6e1d8e7e5af163b62c13574ddee4e09513bfbc","src/codegen/math/float/cos.rs":"ef3b511a24d23045b310315e80348a9b7fedb576fc2de52d74290616a0abeb2a","src/codegen/math/float/cos_pi.rs":"4e7631a5d73dac21531e09ef1802d1180f8997509c2c8fa9f67f322194263a97","src/codegen/math/float/exp.rs":"61b691598c41b5622f24e4320c1bdd08701e612a516438bdddcc728fc3405c8c","src/codegen/math/float/ln.rs":"46b718b1ba8c9d99e1ad40f53d20dfde08a3063ca7bd2a9fdd6698e060da687e","src/codegen/math/float/macros.rs":"dd42135fff13f9aca4fd3a1a4e14c7e6c31aadc6d817d63b0d2fb9e62e062744","src/codegen/math/float/mul_add.rs":"a37bf764345d4b1714f97e83897b7cf0855fc2811704bcbc0012db91825339e1","src/codegen/math/float/mul_adde.rs":"c75702bfcb361de45964a93caf959a695ef2376bd069227600b8c6872665c755","src/codegen/math/float/powf.rs":"642346e982bc4c39203de0864d2149c4179cd7b21cf67a2951687932b4675872","src/codegen/math/float/sin.rs":"9d68164c90cdca6a85155040cdac42e27342ebe0b925273ef1593df721af4258","src/codegen/math/float/sin_cos_pi.rs":"9be02ad48585a1e8d99129382fbffbaed47852f15459256a708850b6b7a75405","src/codegen/math/float/sin_pi.rs":"9890347905b4d4a3c7341c3eb06406e46e60582bcf6960688bd727e5dadc6c57","src/codegen/math/float/sqrt.rs":"e3c60dcfb0c6d2fc62adabcc931b2d4040b83cab294dea36443fb4b89eb79e34","src/codegen/math/float/sqrte.rs":"f0f4ef9eb475ae41bcc7ec6a95ad744ba6b36925faa8b2c2814004396d196b63","src/codegen/pointer_sized_int.rs":"a70697169c28218b56fd2e8d5353f2e00671d1150d0c8cef77d613bdfacd84cb","src/codegen/reductions.rs":"645e2514746d01387ddd07f0aa4ffd8430cc9ab428d4fb13773ea319fa25dd95","src/codegen/reductions/mask.rs":"8f1afe6aabf096a3278e1fc3a30f736e04aa8b9ce96373cee22162d18cfe2702","src/codegen/reductions/mask/aarch64.rs":"cba6e17603d39795dcfe8339b6b7d8714c3e162a1f0a635979f037aa24fe4206","src/codegen/reductions/mask/arm.rs":"9447904818aa2c7c25d0963eead452a639a11ca7dbd6d21eedbfcaade07a0f33","src/codegen/reductions/mask/fallback.rs":"7a0ef9f7fd03ae318b495b95e121350cd61caffc5cc6ee17fabf130d5d933453","src/codegen/reductions/mask/fallback_impl.rs":"76547f396e55ef403327c77c314cf8db8c7a5c9b9819bfb925abeacf130249e5","src/codegen/reductions/mask/x86.rs":"14bd2c482071f2355beebcf7b7ecf950ff2dfcdb08c3ca50993092434a9de717","src/codegen/reductions/mask/x86/avx.rs":"b4913d87844c522903641cbbf10db4551addb1ce5e9e78278e21612fa65c733b","src/codegen/reductions/mask/x86/avx2.rs":"677aed3f056285285daa3adff8bc65e739630b4424defa6d9665e160f027507e","src/codegen/reductions/mask/x86/sse.rs":"226610b4ff88c676d5187114dd57b4a8800de6ce40884675e9198445b1ed0306","src/codegen/reductions/mask/x86/sse2.rs":"bc38e6c31cb4b3d62147eba6cac264e519e2a48e0f7ce9010cfa9ef0cf0ec9fd","src/codegen/shuffle.rs":"0abca97e92cdce49a58a39cc447eb09dc7d7715ef256c8dbd2181a186e61bb64","src/codegen/shuffle1_dyn.rs":"04523e9338133bdedb012dd076c2c564b79ce5593b0fc56d0fb6910e04190a81","src/codegen/swap_bytes.rs":"1d6cdc716eadddc92b4fd506b2445a821caa8dc00860447de09d7ebd69c2087f","src/codegen/v128.rs":"94226b31ec403d18d9d2fe06713f147c9c79e9b5f9105089088266313f843185","src/codegen/v16.rs":"ddec4ffb66b6f7aaffb9a1780c5ddba82557abd74f45073d335047e04cf74924","src/codegen/v256.rs":"6b63917f0444118d6b1595bff2045e59b97c4d24012bd575f69f1f0efc5a0241","src/codegen/v32.rs":"3477b3c5540aed86e61e2f5807dd31db947413cec9181c587d93ed6ec74f0eba","src/codegen/v512.rs":"5854f99d3aabc4cd42b28a20d9ce447756dc2ba024a409a69b6a8ae1f1842fc5","src/codegen/v64.rs":"e9e89caebfe63d10c0cbca61e4dfdba3b7e02ee0989170f80beed23237ddd950","src/codegen/vPtr.rs":"96d609a9eece4dcbbcc01ba0b8744d7f5958be12774176a2945bc676f4e6b5cb","src/codegen/vSize.rs":"eeee9858749aa82142b27bc120d1989bb74a6b82e1e4efbbeaccc9634dc9acfc","src/lib.rs":"1b5d419ff05ee0370d671810423ccc254708cc8d415c1dbac2a7a36be4bf63a8","src/masks.rs":"870f429967b2d7d5133f4d28d6c753fc5cef0570b27b29d4e966a066d22d2d0e","src/sealed.rs":"ff7f0324276408ae8249941cfa32c90b8835a54d750896b683efea857af19db2","src/testing.rs":"1d3a7862ef625e235a5734ad7204e68d350f902c0695182b1f08a0552432416e","src/testing/macros.rs":"6378856d7a40ba5ec5c7c0dad6327d79f0c77266921c24296d10aed6c68e9b98","src/testing/utils.rs":"d6fd5a5017f1f85d9d99585754f8f6ad06fc3d683b34083543e67a7cc6c1772c","src/v128.rs":"18fe263c4aa28cd06461c7070b0269f69f4a2e75749b8f142a83dfdfe4d22bf5","src/v16.rs":"e5c663c9fb3547eaeac78a5f7db9969f4d8b5ec96112bf2954602fff11f0aebd","src/v256.rs":"68732cd688ad12a56d8b4f8ddf279f77bdfe1be2943c7dc0c1b4f1a76798aa0f","src/v32.rs":"785b22a1ccb4a41bb53dfeb0670f624c0ce42e6cdf62d1747e3283777a1c70bd","src/v512.rs":"d1337bfe07f06a8f37f8e8fa7d4315b9307476ee435ad80dd5269eaed564fbfa","src/v64.rs":"3077468d65125b8f085e9454c8b2463a4d5225697464ba6a1300f8799528fd4b","src/vPtr.rs":"c9a53f41f466e17b6648a4ce390fd8f4d3a848d440eb8a9a803a11608d76eb05","src/vSize.rs":"5c46d3e8c3ee5863d9b6e37e681f871386e0efc254d6d84ba711edb529ce7b3c","tests/endianness.rs":"541a144be017e3dd7da7c8ea49d907dc02538245e8c5f3deb5bd43da92c929e1"},"package":null}
|
|
\ No newline at end of file
|
|
diff --git a/third_party/rust/packed_simd/.travis.yml b/third_party/rust/packed_simd/.travis.yml
|
|
new file mode 100644
|
|
index 000000000000..8d8ed54ab737
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/.travis.yml
|
|
@@ -0,0 +1,308 @@
|
|
+language: rust
|
|
+sudo: false
|
|
+rust: nightly
|
|
+
|
|
+stages:
|
|
+ - tools
|
|
+ - linux-tier1
|
|
+ - osx-tier1
|
|
+ - osx-tier2
|
|
+ - linux-tier2
|
|
+ - android
|
|
+
|
|
+matrix:
|
|
+ fast_finish: true
|
|
+ include:
|
|
+ # Android:
|
|
+ - env: TARGET=x86_64-linux-android NOVERIFY=1
|
|
+ name: "x86_64-unknown-linux-android + SSE2"
|
|
+ stage: android
|
|
+ - env: TARGET=arm-linux-androideabi
|
|
+ name: "arm-linux-androideabi"
|
|
+ stage: android
|
|
+ - env: TARGET=arm-linux-androideabi RUSTFLAGS="-C target-feature=+v7,+neon"
|
|
+ name: "arm-linux-androideabi + NEON"
|
|
+ stage: android
|
|
+ - env: TARGET=aarch64-linux-android
|
|
+ name: "aarch64-unknown-linux-android"
|
|
+ stage: android
|
|
+ - env: TARGET=aarch64-linux-android RUSTFLAGS="-C target-feature=+neon"
|
|
+ name: "aarch64-unknown-linux-android + NEON"
|
|
+ stage: android
|
|
+ - env: TARGET="thumbv7neon-linux-androideabi"
|
|
+ name: "thumbv7neon-linux-androideabi"
|
|
+ stage: android
|
|
+ # Linux:
|
|
+ - env: TARGET=i586-unknown-linux-gnu
|
|
+ name: "i586-unknown-linux-gnu"
|
|
+ stage: linux-tier2
|
|
+ - env: TARGET=i586-unknown-linux-gnu RUSTFLAGS="-C target-feature=+sse"
|
|
+ name: "i586-unknown-linux-gnu + SSE"
|
|
+ stage: linux-tier2
|
|
+ - env: TARGET=i586-unknown-linux-gnu RUSTFLAGS="-C target-feature=+sse2"
|
|
+ name: "i586-unknown-linux-gnu + SSE2"
|
|
+ stage: linux-tier2
|
|
+ - env: TARGET=i686-unknown-linux-gnu
|
|
+ name: "i686-unknown-linux-gnu + SSE2"
|
|
+ stage: linux-tier1
|
|
+ - env: TARGET=i686-unknown-linux-gnu RUSTFLAGS="-C target-feature=+sse4.2"
|
|
+ name: "i686-unknown-linux-gnu + SSE4.2"
|
|
+ stage: linux-tier1
|
|
+ - env: TARGET=i686-unknown-linux-gnu RUSTFLAGS="-C target-feature=+avx2"
|
|
+ name: "i686-unknown-linux-gnu + AVX2"
|
|
+ stage: linux-tier1
|
|
+ - env: TARGET=x86_64-unknown-linux-gnu
|
|
+ name: "x86_64-unknown-linux-gnu + SSE2"
|
|
+ install: rustup component add rustfmt-preview
|
|
+ stage: linux-tier1
|
|
+ - env: TARGET=x86_64-unknown-linux-gnu RUSTFLAGS="-C target-feature=+sse4.2"
|
|
+ name: "x86_64-unknown-linux-gnu + SSE4.2"
|
|
+ install: rustup component add rustfmt-preview
|
|
+ stage: linux-tier1
|
|
+ - env: TARGET=x86_64-unknown-linux-gnu RUSTFLAGS="-C target-feature=+avx"
|
|
+ name: "x86_64-unknown-linux-gnu + AVX"
|
|
+ install: rustup component add rustfmt-preview
|
|
+ stage: linux-tier1
|
|
+ - env: TARGET=x86_64-unknown-linux-gnu RUSTFLAGS="-C target-feature=+avx2"
|
|
+ name: "x86_64-unknown-linux-gnu + AVX2"
|
|
+ install: rustup component add rustfmt-preview
|
|
+ stage: linux-tier1
|
|
+ - env: TARGET=x86_64-unknown-linux-gnu-emulated
|
|
+ name: "Intel SDE + SSE2"
|
|
+ install: true
|
|
+ stage: linux-tier1
|
|
+ - env: TARGET=x86_64-unknown-linux-gnu-emulated RUSTFLAGS="-C target-feature=+sse4.2"
|
|
+ name: "Intel SDE + SSE4.2"
|
|
+ install: true
|
|
+ stage: linux-tier1
|
|
+ - env: TARGET=x86_64-unknown-linux-gnu-emulated RUSTFLAGS="-C target-feature=+avx"
|
|
+ name: "Intel SDE + AVX"
|
|
+ install: true
|
|
+ stage: linux-tier1
|
|
+ - env: TARGET=x86_64-unknown-linux-gnu-emulated RUSTFLAGS="-C target-feature=+avx2"
|
|
+ name: "Intel SDE + AVX2"
|
|
+ install: true
|
|
+ stage: linux-tier1
|
|
+ - env: TARGET=x86_64-unknown-linux-gnu-emulated RUSTFLAGS="-C target-feature=+avx-512f"
|
|
+ name: "Intel SDE + AVX-512"
|
|
+ install: true
|
|
+ stage: linux-tier1
|
|
+ - env: TARGET=arm-unknown-linux-gnueabi
|
|
+ name: "arm-unknown-linux-gnueabi"
|
|
+ stage: linux-tier2
|
|
+ - env: TARGET=arm-unknown-linux-gnueabi RUSTFLAGS="-C target-feature=+v7,+neon"
|
|
+ name: "arm-unknown-linux-gnueabi + NEON"
|
|
+ stage: linux-tier2
|
|
+ - env: TARGET=arm-unknown-linux-gnueabihf
|
|
+ name: "arm-unknown-linux-gnueabihf"
|
|
+ stage: linux-tier2
|
|
+ - env: TARGET=arm-unknown-linux-gnueabihf RUSTFLAGS="-C target-feature=+v7,+neon"
|
|
+ name: "arm-unknown-linux-gnueabihf + NEON"
|
|
+ stage: linux-tier2
|
|
+ - env: TARGET=armv7-unknown-linux-gnueabihf
|
|
+ name: "armv7-unknown-linux-gnueabihf"
|
|
+ stage: linux-tier2
|
|
+ - env: TARGET=armv7-unknown-linux-gnueabihf RUSTFLAGS="-C target-feature=+neon"
|
|
+ name: "armv7-unknown-linux-gnueabihf + NEON"
|
|
+ stage: linux-tier2
|
|
+ - env: TARGET="thumbv7neon-unknown-linux-gnueabihf"
|
|
+ name: "thumbv7neon-unknown-linux-gnueabihf"
|
|
+ stage: linux-tier2
|
|
+ - env: TARGET=aarch64-unknown-linux-gnu
|
|
+ name: "aarch64-unknown-linux-gnu"
|
|
+ stage: linux-tier2
|
|
+ - env: TARGET=aarch64-unknown-linux-gnu RUSTFLAGS="-C target-feature=+neon"
|
|
+ name: "aarch64-unknown-linux-gnu + NEON"
|
|
+ stage: linux-tier2
|
|
+ - env: TARGET=mips-unknown-linux-gnu
|
|
+ name: "mips-unknown-linux-gnu"
|
|
+ stage: linux-tier2
|
|
+ - env: TARGET=mipsel-unknown-linux-musl
|
|
+ name: "mipsel-unknown-linux-musl"
|
|
+ stage: linux-tier2
|
|
+ - env: TARGET=mips64-unknown-linux-gnuabi64
|
|
+ name: "mips64-unknown-linux-gnuabi64"
|
|
+ stage: linux-tier2
|
|
+ - env: TARGET=mips64el-unknown-linux-gnuabi64
|
|
+ name: "mips64el-unknown-linux-gnuabi64"
|
|
+ stage: linux-tier2
|
|
+ # FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/18
|
|
+ # env: TARGET=mips64el-unknown-linux-gnuabi64 RUSTFLAGS="-C target-feature=+msa -C target-cpu=mips64r6"
|
|
+ - env: TARGET=powerpc-unknown-linux-gnu
|
|
+ name: "powerpc-unknown-linux-gnu"
|
|
+ stage: linux-tier2
|
|
+ - env: TARGET=powerpc64-unknown-linux-gnu
|
|
+ name: "powerpc64-unknown-linux-gnu"
|
|
+ stage: linux-tier2
|
|
+ - env: TARGET=powerpc64le-unknown-linux-gnu
|
|
+ name: "powerpc64le-unknown-linux-gnu"
|
|
+ stage: linux-tier2
|
|
+ - env: TARGET=powerpc64le-unknown-linux-gnu RUSTFLAGS="-C target-feature=+altivec"
|
|
+ name: "powerpc64le-unknown-linux-gnu + ALTIVEC"
|
|
+ stage: linux-tier2
|
|
+ - env: TARGET=powerpc64le-unknown-linux-gnu RUSTFLAGS="-C target-feature=+vsx"
|
|
+ name: "powerpc64le-unknown-linux-gnu + VSX"
|
|
+ stage: linux-tier2
|
|
+ - env: TARGET=s390x-unknown-linux-gnu
|
|
+ name: "s390x-unknown-linux-gnu"
|
|
+ stage: linux-tier2
|
|
+ - env: TARGET=sparc64-unknown-linux-gnu
|
|
+ name: "sparc64-unknown-linux-gnu"
|
|
+ stage: linux-tier2
|
|
+ # WebAssembly:
|
|
+ - env: TARGET=wasm32-unknown-unknown
|
|
+ name: "wasm32-unknown-unknown"
|
|
+ stage: osx-tier1 # For now
|
|
+ # MacOSX:
|
|
+ - os: osx
|
|
+ env: TARGET=i686-apple-darwin
|
|
+ name: "i686-apple-darwin + SSE2"
|
|
+ script: ci/run.sh
|
|
+ osx_image: xcode10
|
|
+ stage: osx-tier1
|
|
+ - os: osx
|
|
+ env: TARGET=i686-apple-darwin RUSTFLAGS="-C target-feature=+sse4.2"
|
|
+ name: "i686-apple-darwin + SSE4.2"
|
|
+ script: ci/run.sh
|
|
+ osx_image: xcode10
|
|
+ stage: osx-tier1
|
|
+ # Travis-CI OSX build bots do not support AVX2:
|
|
+ - os: osx
|
|
+ env: TARGET=i686-apple-darwin RUSTFLAGS="-C target-feature=+avx"
|
|
+ name: "i686-apple-darwin + AVX"
|
|
+ script: ci/run.sh
|
|
+ osx_image: xcode10
|
|
+ stage: osx-tier1
|
|
+ - os: osx
|
|
+ env: TARGET=x86_64-apple-darwin
|
|
+ name: "x86_64-apple-darwin + SSE2"
|
|
+ install: true
|
|
+ script: ci/run.sh
|
|
+ osx_image: xcode10
|
|
+ stage: osx-tier1
|
|
+ - os: osx
|
|
+ env: TARGET=x86_64-apple-darwin RUSTFLAGS="-C target-feature=+sse4.2"
|
|
+ name: "x86_64-apple-darwin + SSE4.2"
|
|
+ install: true
|
|
+ script: ci/run.sh
|
|
+ osx_image: xcode10
|
|
+ stage: osx-tier1
|
|
+ # Travis-CI OSX build bots do not support AVX2:
|
|
+ - os: osx
|
|
+ env: TARGET=x86_64-apple-darwin RUSTFLAGS="-C target-feature=+avx"
|
|
+ name: "x86_64-apple-darwin + AVX"
|
|
+ install: true
|
|
+ script: ci/run.sh
|
|
+ osx_image: xcode10
|
|
+ stage: osx-tier1
|
|
+ # *BSDs:
|
|
+ #- env: TARGET=i686-unknown-freebsd NORUN=1
|
|
+ # script: ci/run.sh
|
|
+ #- env: TARGET=x86_64-unknown-freebsd NORUN=1
|
|
+ # script: ci/run.sh
|
|
+ #- env: TARGET=x86_64-unknown-netbsd NORUN=1
|
|
+ # script: ci/run.sh
|
|
+ # Solaris:
|
|
+ #- env: TARGET=x86_64-sun-solaris NORUN=1
|
|
+ # script: ci/run.sh
|
|
+ # iOS:
|
|
+ - os: osx
|
|
+ env: TARGET=i386-apple-ios
|
|
+ name: "i386-apple-ios"
|
|
+ script: ci/run.sh
|
|
+ osx_image: xcode9.4
|
|
+ stage: osx-tier2
|
|
+ - os: osx
|
|
+ env: TARGET=x86_64-apple-ios
|
|
+ name: "x86_64-apple-ios + SSE2"
|
|
+ script: ci/run.sh
|
|
+ osx_image: xcode9.4
|
|
+ stage: osx-tier2
|
|
+ - os: osx
|
|
+ env: TARGET=armv7-apple-ios NORUN=1
|
|
+ name: "armv7-apple-ios [Build only]"
|
|
+ script: ci/run.sh
|
|
+ osx_image: xcode9.4
|
|
+ stage: osx-tier2
|
|
+ - os: osx
|
|
+ env: TARGET=aarch64-apple-ios NORUN=1
|
|
+ name: "aarch64-apple-ios [Build only]"
|
|
+ script: ci/run.sh
|
|
+ osx_image: xcode9.4
|
|
+ stage: osx-tier2
|
|
+ # BENCHMARKS:
|
|
+ - name: "Benchmarks - x86_64-unknown-linux-gnu"
|
|
+ install: TARGET=x86_64-unknown-linux-gnu ./ci/setup_benchmarks.sh
|
|
+ script: PATH=$(pwd):$PATH NORUN=1 VERIFY=1 FEATURES=core_arch,ispc,sleef-sys ci/benchmark.sh
|
|
+ stage: tools
|
|
+ - name: "Benchmarks - x86_64-apple-darwin"
|
|
+ install: TARGET=x86_64-apple-darwin ./ci/setup_benchmarks.sh
|
|
+ script: PATH=$(pwd):$PATH NORUN=1 VERIFY=1 FEATURES=core_arch,ispc,sleef-sys ci/benchmark.sh
|
|
+ os: osx
|
|
+ osx_image: xcode9.4
|
|
+ stage: tools
|
|
+ # TOOLS:
|
|
+ - name: "Documentation"
|
|
+ install: cargo install mdbook
|
|
+ script: ci/dox.sh
|
|
+ stage: tools
|
|
+ - name: "rustfmt"
|
|
+ install: true
|
|
+ before_script: rustup component add rustfmt-preview
|
|
+ script: ci/all.sh check_fmt || true
|
|
+ stage: tools
|
|
+ - name: "clippy"
|
|
+ install: true
|
|
+ before_script: rustup component add clippy-preview
|
|
+ script: ci/all.sh clippy
|
|
+ stage: tools
|
|
+
|
|
+ allow_failures:
|
|
+ # FIXME: ISPC cannot be found?
|
|
+ - name: "Benchmarks - x86_64-apple-darwin"
|
|
+ # FIXME: TBD
|
|
+ - env: TARGET=powerpc-unknown-linux-gnu
|
|
+ - env: TARGET=powerpc64-unknown-linux-gnu
|
|
+ - env: TARGET=powerpc64le-unknown-linux-gnu
|
|
+ - env: TARGET=powerpc64le-unknown-linux-gnu RUSTFLAGS="-C target-feature=+altivec"
|
|
+ - env: TARGET=powerpc64le-unknown-linux-gnu RUSTFLAGS="-C target-feature=+vsx"
|
|
+ #- env: TARGET=i686-unknown-freebsd NORUN=1
|
|
+ #- env: TARGET=x86_64-unknown-freebsd NORUN=1
|
|
+ #- env: TARGET=x86_64-unknown-netbsd NORUN=1
|
|
+ #- env: TARGET=x86_64-sun-solaris NORUN=1
|
|
+
|
|
+ # FIXME: TBD
|
|
+ - env: TARGET=arm-linux-androideabi
|
|
+ - env: TARGET=arm-linux-androideabi RUSTFLAGS="-C target-feature=+v7,+neon"
|
|
+ - env: TARGET=aarch64-linux-android
|
|
+ - env: TARGET=aarch64-linux-android RUSTFLAGS="-C target-feature=+neon"
|
|
+
|
|
+ # FIXME: iOS
|
|
+ # https://github.com/rust-lang-nursery/packed_simd/issues/26
|
|
+ - env: TARGET=i386-apple-ios
|
|
+ - env: TARGET=x86_64-apple-ios
|
|
+
|
|
+ # FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/182
|
|
+ - env: TARGET=arm-unknown-linux-gnueabi RUSTFLAGS="-C target-feature=+v7,+neon"
|
|
+ - env: TARGET=arm-unknown-linux-gnueabihf RUSTFLAGS="-C target-feature=+v7,+neon"
|
|
+ - env: TARGET=armv7-unknown-linux-gnueabihf RUSTFLAGS="-C target-feature=+neon"
|
|
+
|
|
+ # FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/183
|
|
+ - env: TARGET=wasm32-unknown-unknown
|
|
+
|
|
+install: travis_retry rustup target add $TARGET
|
|
+before_script: cargo generate-lockfile
|
|
+script: travis_wait 50 ci/run-docker.sh
|
|
+after_script: sleep 5
|
|
+
|
|
+env:
|
|
+ global:
|
|
+ secure: "lPHv7s6+AxQYNaFncycVFQt++Y1asQmMhOikQU1ztlP8CK7+hn2m98cg/euOJyzIOb2iJ3ZX4cGZkzw4lc59MQBByb1GtDbazQoUOzVDbVfe9BDD2f8JVoIFh1CMfjPKQ7Gg/rJqWlwrUlSd5GNxPCutKjY7qZhJuR6SQbJjlWaGN2Vd4fVCzKXz8fHRXgMEZS+d+CR4Nsrkb83J3Z4s5kSdJmhYxJ61AWjuzJVwUh4l3/HEYlSL5XXpuh5R2i7W16h1PlNdaTUgkZli1lHzO8+6Q8LzX9+XiLIEVX9lw3A2NdIKGz8E/+7Qs5oYOkwYhjROsDQxIK7xkSM30bQuN7cwMBybAVIyOPJkqXQ1dQyp83KSdsOj7JMyDDRvcEDLI6ehRlm5EcdH7YrReuboN81iUo0Sa7VsuUmgj5hjERCt9r30f9aWuitABai7vKRtjglg7Sp5CrEVPA4PQs6PqKCCRogoggbXJ/Z5Dyw/RZaXPeNR9+qIKN1Vjm9Gew1sRN2JK/3+vXTKtyJXH/uBxgJt4jQlbuShOJuF+BSfTF88sMe67a/357SSOIb4JkaCyd0flDCWYE8576kaHPlVVMT2peXee0LeRXm1e13nG3Na0t3LS/orJLPHOShNQGoDj7qAP5aEKggRya896JGwtvlaBHHTmSQh65G7cyNErZo="
|
|
+branches:
|
|
+ only:
|
|
+ - staging # bors r+
|
|
+ - trying # bors try
|
|
+ - master
|
|
+notifications:
|
|
+ email:
|
|
+ on_success: never
|
|
diff --git a/third_party/rust/packed_simd/Cargo.toml b/third_party/rust/packed_simd/Cargo.toml
|
|
new file mode 100644
|
|
index 000000000000..3db9354c9407
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/Cargo.toml
|
|
@@ -0,0 +1,42 @@
|
|
+[package]
|
|
+name = "packed_simd"
|
|
+version = "0.3.3"
|
|
+authors = ["Gonzalo Brito Gadeschi <gonzalobg88@gmail.com>"]
|
|
+description = "Portable Packed SIMD vectors"
|
|
+documentation = "https://docs.rs/crate/packed_simd/"
|
|
+homepage = "https://github.com/rust-lang-nursery/packed_simd"
|
|
+repository = "https://github.com/rust-lang-nursery/packed_simd"
|
|
+keywords = ["simd", "vector", "portability"]
|
|
+categories = ["hardware-support", "concurrency", "no-std", "data-structures"]
|
|
+license = "MIT/Apache-2.0"
|
|
+build = "build.rs"
|
|
+edition = "2018"
|
|
+
|
|
+[badges]
|
|
+appveyor = { repository = "rust-lang-nursery/packed_simd" }
|
|
+travis-ci = { repository = "rust-lang-nursery/packed_simd" }
|
|
+codecov = { repository = "rust-lang-nursery/packed_simd" }
|
|
+is-it-maintained-issue-resolution = { repository = "rust-lang-nursery/packed_simd" }
|
|
+is-it-maintained-open-issues = { repository = "rust-lang-nursery/packed_simd" }
|
|
+maintenance = { status = "experimental" }
|
|
+
|
|
+[dependencies]
|
|
+cfg-if = "^0.1.6"
|
|
+core_arch = { version = "^0.1.3", optional = true }
|
|
+
|
|
+[features]
|
|
+default = []
|
|
+into_bits = []
|
|
+libcore_neon = []
|
|
+
|
|
+[dev-dependencies]
|
|
+paste = "^0.1.3"
|
|
+arrayvec = { version = "^0.4", default-features = false }
|
|
+
|
|
+[target.'cfg(target_arch = "x86_64")'.dependencies.sleef-sys]
|
|
+version = "^0.1.2"
|
|
+optional = true
|
|
+
|
|
+[target.wasm32-unknown-unknown.dev-dependencies]
|
|
+wasm-bindgen = "=0.2.19"
|
|
+wasm-bindgen-test = "=0.2.19"
|
|
\ No newline at end of file
|
|
diff --git a/third_party/rust/simd/LICENSE-APACHE b/third_party/rust/packed_simd/LICENSE-APACHE
|
|
similarity index 100%
|
|
rename from third_party/rust/simd/LICENSE-APACHE
|
|
rename to third_party/rust/packed_simd/LICENSE-APACHE
|
|
diff --git a/third_party/rust/simd/LICENSE-MIT b/third_party/rust/packed_simd/LICENSE-MIT
|
|
similarity index 93%
|
|
rename from third_party/rust/simd/LICENSE-MIT
|
|
rename to third_party/rust/packed_simd/LICENSE-MIT
|
|
index bf6c304f7774..39d4bdb5acd3 100644
|
|
--- a/third_party/rust/simd/LICENSE-MIT
|
|
+++ b/third_party/rust/packed_simd/LICENSE-MIT
|
|
@@ -1,25 +1,25 @@
|
|
-Copyright (c) 2014 Huon Wilson
|
|
+Copyright (c) 2014 The Rust Project Developers
|
|
|
|
Permission is hereby granted, free of charge, to any
|
|
person obtaining a copy of this software and associated
|
|
documentation files (the "Software"), to deal in the
|
|
Software without restriction, including without
|
|
limitation the rights to use, copy, modify, merge,
|
|
publish, distribute, sublicense, and/or sell copies of
|
|
the Software, and to permit persons to whom the Software
|
|
is furnished to do so, subject to the following
|
|
conditions:
|
|
|
|
The above copyright notice and this permission notice
|
|
shall be included in all copies or substantial portions
|
|
of the Software.
|
|
|
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
|
|
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
|
|
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
|
|
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
|
|
SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
|
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
|
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
|
|
IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
-DEALINGS IN THE SOFTWARE.
|
|
\ No newline at end of file
|
|
+DEALINGS IN THE SOFTWARE.
|
|
diff --git a/third_party/rust/packed_simd/bors.toml b/third_party/rust/packed_simd/bors.toml
|
|
new file mode 100644
|
|
index 000000000000..6d302dc85cf6
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/bors.toml
|
|
@@ -0,0 +1,3 @@
|
|
+status = [
|
|
+ "continuous-integration/travis-ci/push"
|
|
+]
|
|
\ No newline at end of file
|
|
diff --git a/third_party/rust/packed_simd/build.rs b/third_party/rust/packed_simd/build.rs
|
|
new file mode 100644
|
|
index 000000000000..85639ff9d085
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/build.rs
|
|
@@ -0,0 +1,8 @@
|
|
+fn main() {
|
|
+ println!("cargo:rustc-env=RUSTC_BOOTSTRAP=1");
|
|
+ let target = std::env::var("TARGET")
|
|
+ .expect("TARGET environment variable not defined");
|
|
+ if target.contains("neon") {
|
|
+ println!("cargo:rustc-cfg=libcore_neon");
|
|
+ }
|
|
+}
|
|
diff --git a/third_party/rust/packed_simd/ci/all.sh b/third_party/rust/packed_simd/ci/all.sh
|
|
new file mode 100644
|
|
index 000000000000..273562d4a9bb
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/ci/all.sh
|
|
@@ -0,0 +1,71 @@
|
|
+#!/usr/bin/env bash
|
|
+#
|
|
+# Performs an operation on all targets
|
|
+
|
|
+set -ex
|
|
+
|
|
+: "${1?The all.sh script requires one argument.}"
|
|
+
|
|
+op=$1
|
|
+
|
|
+cargo_clean() {
|
|
+ cargo clean
|
|
+}
|
|
+
|
|
+cargo_check_fmt() {
|
|
+ cargo fmt --all -- --check
|
|
+}
|
|
+
|
|
+cargo_fmt() {
|
|
+ cargo fmt --all
|
|
+}
|
|
+
|
|
+cargo_clippy() {
|
|
+ cargo clippy --all -- -D clippy::pedantic
|
|
+}
|
|
+
|
|
+CMD="-1"
|
|
+
|
|
+case $op in
|
|
+ clean*)
|
|
+ CMD=cargo_clean
|
|
+ ;;
|
|
+ check_fmt*)
|
|
+ CMD=cargo_check_fmt
|
|
+ ;;
|
|
+ fmt*)
|
|
+ CMD=cargo_fmt
|
|
+ ;;
|
|
+ clippy)
|
|
+ CMD=cargo_clippy
|
|
+ ;;
|
|
+ *)
|
|
+ echo "Unknown operation: \"${op}\""
|
|
+ exit 1
|
|
+ ;;
|
|
+esac
|
|
+
|
|
+echo "Operation is: ${CMD}"
|
|
+
|
|
+# On src/
|
|
+$CMD
|
|
+
|
|
+# Check examples/
|
|
+for dir in examples/*/
|
|
+do
|
|
+ dir=${dir%*/}
|
|
+ (
|
|
+ cd "${dir%*/}"
|
|
+ $CMD
|
|
+ )
|
|
+done
|
|
+
|
|
+(
|
|
+ cd verify/verify
|
|
+ $CMD
|
|
+)
|
|
+
|
|
+(
|
|
+ cd micro_benchmarks
|
|
+ $CMD
|
|
+)
|
|
diff --git a/third_party/rust/packed_simd/ci/android-install-ndk.sh b/third_party/rust/packed_simd/ci/android-install-ndk.sh
|
|
new file mode 100644
|
|
index 000000000000..818e78446ae8
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/ci/android-install-ndk.sh
|
|
@@ -0,0 +1,37 @@
|
|
+#!/usr/bin/env sh
|
|
+# Copyright 2016 The Rust Project Developers. See the COPYRIGHT
|
|
+# file at the top-level directory of this distribution and at
|
|
+# http://rust-lang.org/COPYRIGHT.
|
|
+#
|
|
+# Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
|
+# http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
|
+# <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
|
+# option. This file may not be copied, modified, or distributed
|
|
+# except according to those terms.
|
|
+
|
|
+set -ex
|
|
+
|
|
+curl --retry 5 -O https://dl.google.com/android/repository/android-ndk-r15b-linux-x86_64.zip
|
|
+unzip -q android-ndk-r15b-linux-x86_64.zip
|
|
+
|
|
+case "$1" in
|
|
+ aarch64)
|
|
+ arch=arm64
|
|
+ ;;
|
|
+
|
|
+ i686)
|
|
+ arch=x86
|
|
+ ;;
|
|
+
|
|
+ *)
|
|
+ arch=$1
|
|
+ ;;
|
|
+esac;
|
|
+
|
|
+android-ndk-r15b/build/tools/make_standalone_toolchain.py \
|
|
+ --unified-headers \
|
|
+ --install-dir "/android/ndk-${1}" \
|
|
+ --arch "${arch}" \
|
|
+ --api 24
|
|
+
|
|
+rm -rf ./android-ndk-r15b-linux-x86_64.zip ./android-ndk-r15b
|
|
diff --git a/third_party/rust/packed_simd/ci/android-install-sdk.sh b/third_party/rust/packed_simd/ci/android-install-sdk.sh
|
|
new file mode 100644
|
|
index 000000000000..6b5ac09ab04a
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/ci/android-install-sdk.sh
|
|
@@ -0,0 +1,60 @@
|
|
+#!/usr/bin/env sh
|
|
+# Copyright 2016 The Rust Project Developers. See the COPYRIGHT
|
|
+# file at the top-level directory of this distribution and at
|
|
+# http://rust-lang.org/COPYRIGHT.
|
|
+#
|
|
+# Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
|
+# http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
|
+# <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
|
+# option. This file may not be copied, modified, or distributed
|
|
+# except according to those terms.
|
|
+
|
|
+set -ex
|
|
+
|
|
+# Prep the SDK and emulator
|
|
+#
|
|
+# Note that the update process requires that we accept a bunch of licenses, and
|
|
+# we can't just pipe `yes` into it for some reason, so we take the same strategy
|
|
+# located in https://github.com/appunite/docker by just wrapping it in a script
|
|
+# which apparently magically accepts the licenses.
|
|
+
|
|
+mkdir sdk
|
|
+curl --retry 5 https://dl.google.com/android/repository/sdk-tools-linux-3859397.zip -O
|
|
+unzip -d sdk sdk-tools-linux-3859397.zip
|
|
+
|
|
+case "$1" in
|
|
+ arm | armv7)
|
|
+ abi=armeabi-v7a
|
|
+ ;;
|
|
+
|
|
+ aarch64)
|
|
+ abi=arm64-v8a
|
|
+ ;;
|
|
+
|
|
+ i686)
|
|
+ abi=x86
|
|
+ ;;
|
|
+
|
|
+ x86_64)
|
|
+ abi=x86_64
|
|
+ ;;
|
|
+
|
|
+ *)
|
|
+ echo "invalid arch: $1"
|
|
+ exit 1
|
|
+ ;;
|
|
+esac;
|
|
+
|
|
+# --no_https avoids
|
|
+ # javax.net.ssl.SSLHandshakeException: sun.security.validator.ValidatorException: No trusted certificate found
|
|
+yes | ./sdk/tools/bin/sdkmanager --licenses --no_https
|
|
+yes | ./sdk/tools/bin/sdkmanager --no_https \
|
|
+ "emulator" \
|
|
+ "platform-tools" \
|
|
+ "platforms;android-24" \
|
|
+ "system-images;android-24;default;$abi"
|
|
+
|
|
+echo "no" |
|
|
+ ./sdk/tools/bin/avdmanager create avd \
|
|
+ --name "${1}" \
|
|
+ --package "system-images;android-24;default;$abi"
|
|
diff --git a/third_party/rust/packed_simd/ci/android-sysimage.sh b/third_party/rust/packed_simd/ci/android-sysimage.sh
|
|
new file mode 100644
|
|
index 000000000000..9eabd7c8d94f
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/ci/android-sysimage.sh
|
|
@@ -0,0 +1,56 @@
|
|
+#!/usr/bin/env bash
|
|
+
|
|
+# Copyright 2017 The Rust Project Developers. See the COPYRIGHT
|
|
+# file at the top-level directory of this distribution and at
|
|
+# http://rust-lang.org/COPYRIGHT.
|
|
+#
|
|
+# Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
|
+# http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
|
+# <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
|
+# option. This file may not be copied, modified, or distributed
|
|
+# except according to those terms.
|
|
+
|
|
+set -ex
|
|
+
|
|
+URL=https://dl.google.com/android/repository/sys-img/android
|
|
+
|
|
+main() {
|
|
+ local arch="${1}"
|
|
+ local name="${2}"
|
|
+ local dest=/system
|
|
+ local td
|
|
+ td="$(mktemp -d)"
|
|
+
|
|
+ apt-get install --no-install-recommends e2tools
|
|
+
|
|
+ pushd "${td}"
|
|
+ curl --retry 5 -O "${URL}/${name}"
|
|
+ unzip -q "${name}"
|
|
+
|
|
+ local system
|
|
+ system="$(find . -name system.img)"
|
|
+ mkdir -p ${dest}/{bin,lib,lib64}
|
|
+
|
|
+ # Extract android linker and libraries to /system
|
|
+ # This allows android executables to be run directly (or with qemu)
|
|
+ if [ "${arch}" = "x86_64" ] || [ "${arch}" = "arm64" ]; then
|
|
+ e2cp -p "${system}:/bin/linker64" "${dest}/bin/"
|
|
+ e2cp -p "${system}:/lib64/libdl.so" "${dest}/lib64/"
|
|
+ e2cp -p "${system}:/lib64/libc.so" "${dest}/lib64/"
|
|
+ e2cp -p "${system}:/lib64/libm.so" "${dest}/lib64/"
|
|
+ else
|
|
+ e2cp -p "${system}:/bin/linker" "${dest}/bin/"
|
|
+ e2cp -p "${system}:/lib/libdl.so" "${dest}/lib/"
|
|
+ e2cp -p "${system}:/lib/libc.so" "${dest}/lib/"
|
|
+ e2cp -p "${system}:/lib/libm.so" "${dest}/lib/"
|
|
+ fi
|
|
+
|
|
+ # clean up
|
|
+ apt-get purge --auto-remove -y e2tools
|
|
+
|
|
+ popd
|
|
+
|
|
+ rm -rf "${td}"
|
|
+}
|
|
+
|
|
+main "${@}"
|
|
diff --git a/third_party/rust/packed_simd/ci/benchmark.sh b/third_party/rust/packed_simd/ci/benchmark.sh
|
|
new file mode 100644
|
|
index 000000000000..3635b9e371d1
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/ci/benchmark.sh
|
|
@@ -0,0 +1,32 @@
|
|
+#!/usr/bin/env bash
|
|
+#
|
|
+# Runs all benchmarks. Controlled by the following environment variables:
|
|
+#
|
|
+# FEATURES={} - cargo features to pass to all benchmarks (e.g. core_arch,sleef-sys,ispc)
|
|
+# NORUN={1} - only builds the benchmarks
|
|
+
|
|
+set -ex
|
|
+
|
|
+if [[ ${NORUN} != 1 ]]; then
|
|
+ # Most benchmarks require hyperfine; require it upfront.
|
|
+ hash hyperfine 2>/dev/null || { echo >&2 "hyperfine is not in PATH."; exit 1; }
|
|
+fi
|
|
+
|
|
+
|
|
+# If the ispc benchmark feature is enabled, ispc must be in the path of the
|
|
+# benchmarks.
|
|
+if echo "$FEATURES" | grep -q "ispc"; then
|
|
+ hash ispc 2>/dev/null || { echo >&2 "ispc is not in PATH."; exit 1; }
|
|
+fi
|
|
+
|
|
+# An example with a benchmark.sh is a benchmark:
|
|
+for dir in examples/*/
|
|
+do
|
|
+ dir=${dir%*/}
|
|
+ cd ${dir%*/}
|
|
+ if [ -f "benchmark.sh" ]; then
|
|
+ ./benchmark.sh
|
|
+ fi
|
|
+ cd -
|
|
+done
|
|
+
|
|
diff --git a/third_party/rust/packed_simd/ci/deploy_and_run_on_ios_simulator.rs b/third_party/rust/packed_simd/ci/deploy_and_run_on_ios_simulator.rs
|
|
new file mode 100644
|
|
index 000000000000..c0fe52c35659
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/ci/deploy_and_run_on_ios_simulator.rs
|
|
@@ -0,0 +1,176 @@
|
|
+// Copyright 2017 The Rust Project Developers. See the COPYRIGHT
|
|
+// file at the top-level directory of this distribution and at
|
|
+// http://rust-lang.org/COPYRIGHT.
|
|
+//
|
|
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
|
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
|
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
|
+// option. This file may not be copied, modified, or distributed
|
|
+// except according to those terms.
|
|
+
|
|
+// This is a script to deploy and execute a binary on an iOS simulator.
|
|
+// The primary use of this is to be able to run unit tests on the simulator and
|
|
+// retrieve the results.
|
|
+//
|
|
+// To do this through Cargo instead, use Dinghy
|
|
+// (https://github.com/snipsco/dinghy): cargo dinghy install, then cargo dinghy
|
|
+// test.
|
|
+
|
|
+use std::env;
|
|
+use std::fs::{self, File};
|
|
+use std::io::Write;
|
|
+use std::path::Path;
|
|
+use std::process;
|
|
+use std::process::Command;
|
|
+
|
|
+macro_rules! t {
|
|
+ ($e:expr) => (match $e {
|
|
+ Ok(e) => e,
|
|
+ Err(e) => panic!("{} failed with: {}", stringify!($e), e),
|
|
+ })
|
|
+}
|
|
+
|
|
+// Step one: Wrap as an app
|
|
+fn package_as_simulator_app(crate_name: &str, test_binary_path: &Path) {
|
|
+ println!("Packaging simulator app");
|
|
+ drop(fs::remove_dir_all("ios_simulator_app"));
|
|
+ t!(fs::create_dir("ios_simulator_app"));
|
|
+ t!(fs::copy(test_binary_path,
|
|
+ Path::new("ios_simulator_app").join(crate_name)));
|
|
+
|
|
+ let mut f = t!(File::create("ios_simulator_app/Info.plist"));
|
|
+ t!(f.write_all(format!(r#"
|
|
+ <?xml version="1.0" encoding="UTF-8"?>
|
|
+ <!DOCTYPE plist PUBLIC
|
|
+ "-//Apple//DTD PLIST 1.0//EN"
|
|
+ "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
|
|
+ <plist version="1.0">
|
|
+ <dict>
|
|
+ <key>CFBundleExecutable</key>
|
|
+ <string>{}</string>
|
|
+ <key>CFBundleIdentifier</key>
|
|
+ <string>com.rust.unittests</string>
|
|
+ </dict>
|
|
+ </plist>
|
|
+ "#, crate_name).as_bytes()));
|
|
+}
|
|
+
|
|
+// Step two: Start the iOS simulator
|
|
+fn start_simulator() {
|
|
+ println!("Looking for iOS simulator");
|
|
+ let output = t!(Command::new("xcrun").arg("simctl").arg("list").output());
|
|
+ assert!(output.status.success());
|
|
+ let mut simulator_exists = false;
|
|
+ let mut simulator_booted = false;
|
|
+ let mut found_rust_sim = false;
|
|
+ let stdout = t!(String::from_utf8(output.stdout));
|
|
+ for line in stdout.lines() {
|
|
+ if line.contains("rust_ios") {
|
|
+ if found_rust_sim {
|
|
+ panic!("Duplicate rust_ios simulators found. Please \
|
|
+ double-check xcrun simctl list.");
|
|
+ }
|
|
+ simulator_exists = true;
|
|
+ simulator_booted = line.contains("(Booted)");
|
|
+ found_rust_sim = true;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if simulator_exists == false {
|
|
+ println!("Creating iOS simulator");
|
|
+ Command::new("xcrun")
|
|
+ .arg("simctl")
|
|
+ .arg("create")
|
|
+ .arg("rust_ios")
|
|
+ .arg("com.apple.CoreSimulator.SimDeviceType.iPhone-SE")
|
|
+ .arg("com.apple.CoreSimulator.SimRuntime.iOS-10-2")
|
|
+ .check_status();
|
|
+ } else if simulator_booted == true {
|
|
+ println!("Shutting down already-booted simulator");
|
|
+ Command::new("xcrun")
|
|
+ .arg("simctl")
|
|
+ .arg("shutdown")
|
|
+ .arg("rust_ios")
|
|
+ .check_status();
|
|
+ }
|
|
+
|
|
+ println!("Starting iOS simulator");
|
|
+ // We can't uninstall the app (if present) as that will hang if the
|
|
+ // simulator isn't completely booted; just erase the simulator instead.
|
|
+ Command::new("xcrun").arg("simctl").arg("erase").arg("rust_ios").check_status();
|
|
+ Command::new("xcrun").arg("simctl").arg("boot").arg("rust_ios").check_status();
|
|
+}
|
|
+
|
|
+// Step three: Install the app
|
|
+fn install_app_to_simulator() {
|
|
+ println!("Installing app to simulator");
|
|
+ Command::new("xcrun")
|
|
+ .arg("simctl")
|
|
+ .arg("install")
|
|
+ .arg("booted")
|
|
+ .arg("ios_simulator_app/")
|
|
+ .check_status();
|
|
+}
|
|
+
|
|
+// Step four: Run the app
|
|
+fn run_app_on_simulator() {
|
|
+ println!("Running app");
|
|
+ let output = t!(Command::new("xcrun")
|
|
+ .arg("simctl")
|
|
+ .arg("launch")
|
|
+ .arg("--console")
|
|
+ .arg("booted")
|
|
+ .arg("com.rust.unittests")
|
|
+ .output());
|
|
+
|
|
+ println!("stdout --\n{}\n", String::from_utf8_lossy(&output.stdout));
|
|
+ println!("stderr --\n{}\n", String::from_utf8_lossy(&output.stderr));
|
|
+
|
|
+ let stdout = String::from_utf8_lossy(&output.stdout);
|
|
+ let failed = stdout.lines()
|
|
+ .find(|l| l.contains("FAILED"))
|
|
+ .map(|l| l.contains("FAILED"))
|
|
+ .unwrap_or(false);
|
|
+
|
|
+ let passed = stdout.lines()
|
|
+ .find(|l| l.contains("test result: ok"))
|
|
+ .map(|l| l.contains("test result: ok"))
|
|
+ .unwrap_or(false);
|
|
+
|
|
+ println!("Shutting down simulator");
|
|
+ Command::new("xcrun")
|
|
+ .arg("simctl")
|
|
+ .arg("shutdown")
|
|
+ .arg("rust_ios")
|
|
+ .check_status();
|
|
+ if !(passed && !failed) {
|
|
+ panic!("tests didn't pass");
|
|
+ }
|
|
+}
|
|
+
|
|
+trait CheckStatus {
|
|
+ fn check_status(&mut self);
|
|
+}
|
|
+
|
|
+impl CheckStatus for Command {
|
|
+ fn check_status(&mut self) {
|
|
+ println!("\trunning: {:?}", self);
|
|
+ assert!(t!(self.status()).success());
|
|
+ }
|
|
+}
|
|
+
|
|
+fn main() {
|
|
+ let args: Vec<String> = env::args().collect();
|
|
+ if args.len() != 2 {
|
|
+ println!("Usage: {} <executable>", args[0]);
|
|
+ process::exit(-1);
|
|
+ }
|
|
+
|
|
+ let test_binary_path = Path::new(&args[1]);
|
|
+ let crate_name = test_binary_path.file_name().unwrap();
|
|
+
|
|
+ package_as_simulator_app(crate_name.to_str().unwrap(), test_binary_path);
|
|
+ start_simulator();
|
|
+ install_app_to_simulator();
|
|
+ run_app_on_simulator();
|
|
+}
|
|
diff --git a/third_party/rust/packed_simd/ci/docker/aarch64-linux-android/Dockerfile b/third_party/rust/packed_simd/ci/docker/aarch64-linux-android/Dockerfile
|
|
new file mode 100644
|
|
index 000000000000..27bde89c5a8d
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/ci/docker/aarch64-linux-android/Dockerfile
|
|
@@ -0,0 +1,47 @@
|
|
+FROM ubuntu:16.04
|
|
+
|
|
+RUN dpkg --add-architecture i386 && \
|
|
+ apt-get update && \
|
|
+ apt-get install -y --no-install-recommends \
|
|
+ file \
|
|
+ make \
|
|
+ curl \
|
|
+ ca-certificates \
|
|
+ python \
|
|
+ unzip \
|
|
+ expect \
|
|
+ openjdk-9-jre \
|
|
+ libstdc++6:i386 \
|
|
+ libpulse0 \
|
|
+ gcc \
|
|
+ libc6-dev
|
|
+
|
|
+WORKDIR /android/
|
|
+COPY android* /android/
|
|
+
|
|
+ENV ANDROID_ARCH=aarch64
|
|
+ENV PATH=$PATH:/android/ndk-$ANDROID_ARCH/bin:/android/sdk/tools:/android/sdk/platform-tools
|
|
+
|
|
+RUN sh /android/android-install-ndk.sh $ANDROID_ARCH
|
|
+RUN sh /android/android-install-sdk.sh $ANDROID_ARCH
|
|
+RUN mv /root/.android /tmp
|
|
+RUN chmod 777 -R /tmp/.android
|
|
+RUN chmod 755 /android/sdk/tools/* /android/sdk/emulator/qemu/linux-x86_64/*
|
|
+
|
|
+ENV PATH=$PATH:/rust/bin \
|
|
+ CARGO_TARGET_AARCH64_LINUX_ANDROID_LINKER=aarch64-linux-android-gcc \
|
|
+ CARGO_TARGET_AARCH64_LINUX_ANDROID_RUNNER=/tmp/runtest \
|
|
+ OBJDUMP=aarch64-linux-android-objdump \
|
|
+ HOME=/tmp
|
|
+
|
|
+ADD runtest-android.rs /tmp/runtest.rs
|
|
+ENTRYPOINT [ \
|
|
+ "bash", \
|
|
+ "-c", \
|
|
+ # set SHELL so android can detect a 64bits system, see
|
|
+ # http://stackoverflow.com/a/41789144
|
|
+ "SHELL=/bin/dash /android/sdk/emulator/emulator @aarch64 -no-window & \
|
|
+ rustc /tmp/runtest.rs -o /tmp/runtest && \
|
|
+ exec \"$@\"", \
|
|
+ "--" \
|
|
+]
|
|
diff --git a/third_party/rust/packed_simd/ci/docker/aarch64-unknown-linux-gnu/Dockerfile b/third_party/rust/packed_simd/ci/docker/aarch64-unknown-linux-gnu/Dockerfile
|
|
new file mode 100644
|
|
index 000000000000..68261a2f033d
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/ci/docker/aarch64-unknown-linux-gnu/Dockerfile
|
|
@@ -0,0 +1,14 @@
|
|
+FROM ubuntu:17.10
|
|
+RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
+ gcc \
|
|
+ ca-certificates \
|
|
+ libc6-dev \
|
|
+ gcc-aarch64-linux-gnu \
|
|
+ libc6-dev-arm64-cross \
|
|
+ qemu-user \
|
|
+ make \
|
|
+ file
|
|
+
|
|
+ENV CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER=aarch64-linux-gnu-gcc \
|
|
+ CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_RUNNER="qemu-aarch64 -L /usr/aarch64-linux-gnu" \
|
|
+ OBJDUMP=aarch64-linux-gnu-objdump
|
|
diff --git a/third_party/rust/packed_simd/ci/docker/arm-linux-androideabi/Dockerfile b/third_party/rust/packed_simd/ci/docker/arm-linux-androideabi/Dockerfile
|
|
new file mode 100644
|
|
index 000000000000..995a9e30e65e
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/ci/docker/arm-linux-androideabi/Dockerfile
|
|
@@ -0,0 +1,47 @@
|
|
+FROM ubuntu:16.04
|
|
+
|
|
+RUN dpkg --add-architecture i386 && \
|
|
+ apt-get update && \
|
|
+ apt-get install -y --no-install-recommends \
|
|
+ file \
|
|
+ make \
|
|
+ curl \
|
|
+ ca-certificates \
|
|
+ python \
|
|
+ unzip \
|
|
+ expect \
|
|
+ openjdk-9-jre \
|
|
+ libstdc++6:i386 \
|
|
+ libpulse0 \
|
|
+ gcc \
|
|
+ libc6-dev
|
|
+
|
|
+WORKDIR /android/
|
|
+COPY android* /android/
|
|
+
|
|
+ENV ANDROID_ARCH=arm
|
|
+ENV PATH=$PATH:/android/ndk-$ANDROID_ARCH/bin:/android/sdk/tools:/android/sdk/platform-tools
|
|
+
|
|
+RUN sh /android/android-install-ndk.sh $ANDROID_ARCH
|
|
+RUN sh /android/android-install-sdk.sh $ANDROID_ARCH
|
|
+RUN mv /root/.android /tmp
|
|
+RUN chmod 777 -R /tmp/.android
|
|
+RUN chmod 755 /android/sdk/tools/* /android/sdk/emulator/qemu/linux-x86_64/*
|
|
+
|
|
+ENV PATH=$PATH:/rust/bin \
|
|
+ CARGO_TARGET_ARM_LINUX_ANDROIDEABI_LINKER=arm-linux-androideabi-gcc \
|
|
+ CARGO_TARGET_ARM_LINUX_ANDROIDEABI_RUNNER=/tmp/runtest \
|
|
+ OBJDUMP=arm-linux-androideabi-objdump \
|
|
+ HOME=/tmp
|
|
+
|
|
+ADD runtest-android.rs /tmp/runtest.rs
|
|
+ENTRYPOINT [ \
|
|
+ "bash", \
|
|
+ "-c", \
|
|
+ # set SHELL so android can detect a 64bits system, see
|
|
+ # http://stackoverflow.com/a/41789144
|
|
+ "SHELL=/bin/dash /android/sdk/emulator/emulator @arm -no-window & \
|
|
+ rustc /tmp/runtest.rs -o /tmp/runtest && \
|
|
+ exec \"$@\"", \
|
|
+ "--" \
|
|
+]
|
|
diff --git a/third_party/rust/packed_simd/ci/docker/arm-unknown-linux-gnueabi/Dockerfile b/third_party/rust/packed_simd/ci/docker/arm-unknown-linux-gnueabi/Dockerfile
|
|
new file mode 100644
|
|
index 000000000000..cb4de6a57eaa
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/ci/docker/arm-unknown-linux-gnueabi/Dockerfile
|
|
@@ -0,0 +1,15 @@
|
|
+FROM ubuntu:17.10
|
|
+RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
+ gcc \
|
|
+ ca-certificates \
|
|
+ libc6-dev \
|
|
+ libc6-armel-cross \
|
|
+ libc6-dev-armel-cross \
|
|
+ binutils-arm-linux-gnueabi \
|
|
+ gcc-arm-linux-gnueabi \
|
|
+ qemu-user \
|
|
+ make \
|
|
+ file
|
|
+ENV CARGO_TARGET_ARM_UNKNOWN_LINUX_GNUEABI_LINKER=arm-linux-gnueabi-gcc \
|
|
+ CARGO_TARGET_ARM_UNKNOWN_LINUX_GNUEABI_RUNNER="qemu-arm -L /usr/arm-linux-gnueabi" \
|
|
+ OBJDUMP=arm-linux-gnueabi-objdump
|
|
diff --git a/third_party/rust/packed_simd/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile b/third_party/rust/packed_simd/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile
|
|
new file mode 100644
|
|
index 000000000000..c7bd61f0a796
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile
|
|
@@ -0,0 +1,13 @@
|
|
+FROM ubuntu:17.10
|
|
+RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
+ gcc \
|
|
+ ca-certificates \
|
|
+ libc6-dev \
|
|
+ gcc-arm-linux-gnueabihf \
|
|
+ libc6-dev-armhf-cross \
|
|
+ qemu-user \
|
|
+ make \
|
|
+ file
|
|
+ENV CARGO_TARGET_ARM_UNKNOWN_LINUX_GNUEABIHF_LINKER=arm-linux-gnueabihf-gcc \
|
|
+ CARGO_TARGET_ARM_UNKNOWN_LINUX_GNUEABIHF_RUNNER="qemu-arm -L /usr/arm-linux-gnueabihf" \
|
|
+ OBJDUMP=arm-linux-gnueabihf-objdump
|
|
diff --git a/third_party/rust/packed_simd/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile b/third_party/rust/packed_simd/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile
|
|
new file mode 100644
|
|
index 000000000000..e01b87afdf56
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile
|
|
@@ -0,0 +1,13 @@
|
|
+FROM ubuntu:17.10
|
|
+RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
+ gcc \
|
|
+ ca-certificates \
|
|
+ libc6-dev \
|
|
+ gcc-arm-linux-gnueabihf \
|
|
+ libc6-dev-armhf-cross \
|
|
+ qemu-user \
|
|
+ make \
|
|
+ file
|
|
+ENV CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_LINKER=arm-linux-gnueabihf-gcc \
|
|
+ CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_RUNNER="qemu-arm -L /usr/arm-linux-gnueabihf" \
|
|
+ OBJDUMP=arm-linux-gnueabihf-objdump
|
|
diff --git a/third_party/rust/packed_simd/ci/docker/i586-unknown-linux-gnu/Dockerfile b/third_party/rust/packed_simd/ci/docker/i586-unknown-linux-gnu/Dockerfile
|
|
new file mode 100644
|
|
index 000000000000..857974a858f1
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/ci/docker/i586-unknown-linux-gnu/Dockerfile
|
|
@@ -0,0 +1,7 @@
|
|
+FROM ubuntu:17.10
|
|
+RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
+ gcc-multilib \
|
|
+ libc6-dev \
|
|
+ file \
|
|
+ make \
|
|
+ ca-certificates
|
|
diff --git a/third_party/rust/packed_simd/ci/docker/i686-unknown-linux-gnu/Dockerfile b/third_party/rust/packed_simd/ci/docker/i686-unknown-linux-gnu/Dockerfile
|
|
new file mode 100644
|
|
index 000000000000..857974a858f1
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/ci/docker/i686-unknown-linux-gnu/Dockerfile
|
|
@@ -0,0 +1,7 @@
|
|
+FROM ubuntu:17.10
|
|
+RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
+ gcc-multilib \
|
|
+ libc6-dev \
|
|
+ file \
|
|
+ make \
|
|
+ ca-certificates
|
|
diff --git a/third_party/rust/packed_simd/ci/docker/mips-unknown-linux-gnu/Dockerfile b/third_party/rust/packed_simd/ci/docker/mips-unknown-linux-gnu/Dockerfile
|
|
new file mode 100644
|
|
index 000000000000..4711cead372a
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/ci/docker/mips-unknown-linux-gnu/Dockerfile
|
|
@@ -0,0 +1,13 @@
|
|
+FROM ubuntu:17.10
|
|
+
|
|
+RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
+ gcc libc6-dev qemu-user ca-certificates \
|
|
+ gcc-mips-linux-gnu libc6-dev-mips-cross \
|
|
+ qemu-system-mips \
|
|
+ qemu-user \
|
|
+ make \
|
|
+ file
|
|
+
|
|
+ENV CARGO_TARGET_MIPS_UNKNOWN_LINUX_GNU_LINKER=mips-linux-gnu-gcc \
|
|
+ CARGO_TARGET_MIPS_UNKNOWN_LINUX_GNU_RUNNER="qemu-mips -L /usr/mips-linux-gnu" \
|
|
+ OBJDUMP=mips-linux-gnu-objdump
|
|
\ No newline at end of file
|
|
diff --git a/third_party/rust/packed_simd/ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile b/third_party/rust/packed_simd/ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile
|
|
new file mode 100644
|
|
index 000000000000..1422e8c80924
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile
|
|
@@ -0,0 +1,10 @@
|
|
+FROM ubuntu:17.10
|
|
+
|
|
+RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
+ gcc libc6-dev qemu-user ca-certificates \
|
|
+ gcc-mips64-linux-gnuabi64 libc6-dev-mips64-cross \
|
|
+ qemu-system-mips64 qemu-user
|
|
+
|
|
+ENV CARGO_TARGET_MIPS64_UNKNOWN_LINUX_GNUABI64_LINKER=mips64-linux-gnuabi64-gcc \
|
|
+ CARGO_TARGET_MIPS64_UNKNOWN_LINUX_GNUABI64_RUNNER="qemu-mips64 -L /usr/mips64-linux-gnuabi64" \
|
|
+ OBJDUMP=mips64-linux-gnuabi64-objdump
|
|
\ No newline at end of file
|
|
diff --git a/third_party/rust/packed_simd/ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile b/third_party/rust/packed_simd/ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile
|
|
new file mode 100644
|
|
index 000000000000..d94deb5b2013
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile
|
|
@@ -0,0 +1,10 @@
|
|
+FROM ubuntu:17.10
|
|
+
|
|
+RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
+ gcc libc6-dev qemu-user ca-certificates \
|
|
+ gcc-mips64el-linux-gnuabi64 libc6-dev-mips64el-cross \
|
|
+ qemu-system-mips64el
|
|
+
|
|
+ENV CARGO_TARGET_MIPS64EL_UNKNOWN_LINUX_GNUABI64_LINKER=mips64el-linux-gnuabi64-gcc \
|
|
+ CARGO_TARGET_MIPS64EL_UNKNOWN_LINUX_GNUABI64_RUNNER="qemu-mips64el -L /usr/mips64el-linux-gnuabi64" \
|
|
+ OBJDUMP=mips64el-linux-gnuabi64-objdump
|
|
\ No newline at end of file
|
|
diff --git a/third_party/rust/packed_simd/ci/docker/mipsel-unknown-linux-musl/Dockerfile b/third_party/rust/packed_simd/ci/docker/mipsel-unknown-linux-musl/Dockerfile
|
|
new file mode 100644
|
|
index 000000000000..40ac50675bd9
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/ci/docker/mipsel-unknown-linux-musl/Dockerfile
|
|
@@ -0,0 +1,25 @@
|
|
+FROM ubuntu:18.10
|
|
+
|
|
+RUN apt-get update && \
|
|
+ apt-get install -y --no-install-recommends \
|
|
+ ca-certificates \
|
|
+ gcc \
|
|
+ libc6-dev \
|
|
+ make \
|
|
+ qemu-user \
|
|
+ qemu-system-mips \
|
|
+ bzip2 \
|
|
+ curl \
|
|
+ file
|
|
+
|
|
+RUN mkdir /toolchain
|
|
+
|
|
+# Note that this originally came from:
|
|
+# https://downloads.openwrt.org/snapshots/trunk/malta/generic/OpenWrt-Toolchain-malta-le_gcc-5.3.0_musl-1.1.15.Linux-x86_64.tar.bz2
|
|
+RUN curl -L https://s3-us-west-1.amazonaws.com/rust-lang-ci2/libc/OpenWrt-Toolchain-malta-le_gcc-5.3.0_musl-1.1.15.Linux-x86_64.tar.bz2 | \
|
|
+ tar xjf - -C /toolchain --strip-components=2
|
|
+
|
|
+ENV PATH=$PATH:/rust/bin:/toolchain/bin \
|
|
+ CC_mipsel_unknown_linux_musl=mipsel-openwrt-linux-gcc \
|
|
+ CARGO_TARGET_MIPSEL_UNKNOWN_LINUX_MUSL_LINKER=mipsel-openwrt-linux-gcc \
|
|
+ CARGO_TARGET_MIPSEL_UNKNOWN_LINUX_MUSL_RUNNER="qemu-mipsel -L /toolchain"
|
|
\ No newline at end of file
|
|
diff --git a/third_party/rust/packed_simd/ci/docker/powerpc-unknown-linux-gnu/Dockerfile b/third_party/rust/packed_simd/ci/docker/powerpc-unknown-linux-gnu/Dockerfile
|
|
new file mode 100644
|
|
index 000000000000..43b174ed87fc
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/ci/docker/powerpc-unknown-linux-gnu/Dockerfile
|
|
@@ -0,0 +1,12 @@
|
|
+FROM ubuntu:17.10
|
|
+
|
|
+RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
+ gcc libc6-dev qemu-user ca-certificates \
|
|
+ gcc-powerpc-linux-gnu libc6-dev-powerpc-cross \
|
|
+ qemu-system-ppc \
|
|
+ make \
|
|
+ file
|
|
+
|
|
+ENV CARGO_TARGET_POWERPC_UNKNOWN_LINUX_GNU_LINKER=powerpc-linux-gnu-gcc \
|
|
+ CARGO_TARGET_POWERPC_UNKNOWN_LINUX_GNU_RUNNER="qemu-ppc -cpu Vger -L /usr/powerpc-linux-gnu" \
|
|
+ OBJDUMP=powerpc-linux-gnu-objdump
|
|
diff --git a/third_party/rust/packed_simd/ci/docker/powerpc64-unknown-linux-gnu/Dockerfile b/third_party/rust/packed_simd/ci/docker/powerpc64-unknown-linux-gnu/Dockerfile
|
|
new file mode 100644
|
|
index 000000000000..7757ad28a42d
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/ci/docker/powerpc64-unknown-linux-gnu/Dockerfile
|
|
@@ -0,0 +1,17 @@
|
|
+FROM ubuntu:17.10
|
|
+
|
|
+RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
+ gcc \
|
|
+ ca-certificates \
|
|
+ libc6-dev \
|
|
+ gcc-powerpc64-linux-gnu \
|
|
+ libc6-dev-ppc64-cross \
|
|
+ qemu-user \
|
|
+ qemu-system-ppc \
|
|
+ make \
|
|
+ file
|
|
+
|
|
+ENV CARGO_TARGET_POWERPC64_UNKNOWN_LINUX_GNU_LINKER=powerpc64-linux-gnu-gcc \
|
|
+ CARGO_TARGET_POWERPC64_UNKNOWN_LINUX_GNU_RUNNER="qemu-ppc64 -L /usr/powerpc64-linux-gnu" \
|
|
+ CC=powerpc64-linux-gnu-gcc \
|
|
+ OBJDUMP=powerpc64-linux-gnu-objdump
|
|
diff --git a/third_party/rust/packed_simd/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile b/third_party/rust/packed_simd/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile
|
|
new file mode 100644
|
|
index 000000000000..0b0c214fdf1b
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile
|
|
@@ -0,0 +1,11 @@
|
|
+FROM ubuntu:17.10
|
|
+
|
|
+RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
+ gcc libc6-dev qemu-user ca-certificates \
|
|
+ gcc-powerpc64le-linux-gnu libc6-dev-ppc64el-cross \
|
|
+ qemu-system-ppc file make
|
|
+
|
|
+ENV CARGO_TARGET_POWERPC64LE_UNKNOWN_LINUX_GNU_LINKER=powerpc64le-linux-gnu-gcc \
|
|
+ CARGO_TARGET_POWERPC64LE_UNKNOWN_LINUX_GNU_RUNNER="qemu-ppc64le -L /usr/powerpc64le-linux-gnu" \
|
|
+ CC=powerpc64le-linux-gnu-gcc \
|
|
+ OBJDUMP=powerpc64le-linux-gnu-objdump
|
|
diff --git a/third_party/rust/packed_simd/ci/docker/s390x-unknown-linux-gnu/Dockerfile b/third_party/rust/packed_simd/ci/docker/s390x-unknown-linux-gnu/Dockerfile
|
|
new file mode 100644
|
|
index 000000000000..c645b0bcc2b8
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/ci/docker/s390x-unknown-linux-gnu/Dockerfile
|
|
@@ -0,0 +1,20 @@
|
|
+FROM ubuntu:18.10
|
|
+
|
|
+RUN apt-get update && \
|
|
+ apt-get install -y --no-install-recommends \
|
|
+ ca-certificates \
|
|
+ curl \
|
|
+ cmake \
|
|
+ gcc \
|
|
+ libc6-dev \
|
|
+ g++-s390x-linux-gnu \
|
|
+ libc6-dev-s390x-cross \
|
|
+ qemu-user \
|
|
+ make \
|
|
+ file
|
|
+
|
|
+ENV CARGO_TARGET_S390X_UNKNOWN_LINUX_GNU_LINKER=s390x-linux-gnu-gcc \
|
|
+ CARGO_TARGET_S390X_UNKNOWN_LINUX_GNU_RUNNER="qemu-s390x -L /usr/s390x-linux-gnu" \
|
|
+ CC_s390x_unknown_linux_gnu=s390x-linux-gnu-gcc \
|
|
+ CXX_s390x_unknown_linux_gnu=s390x-linux-gnu-g++ \
|
|
+ OBJDUMP=s390x-linux-gnu-objdump
|
|
\ No newline at end of file
|
|
diff --git a/third_party/rust/packed_simd/ci/docker/sparc64-unknown-linux-gnu/Dockerfile b/third_party/rust/packed_simd/ci/docker/sparc64-unknown-linux-gnu/Dockerfile
|
|
new file mode 100644
|
|
index 000000000000..fe12af14da6f
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/ci/docker/sparc64-unknown-linux-gnu/Dockerfile
|
|
@@ -0,0 +1,18 @@
|
|
+FROM debian:stretch
|
|
+
|
|
+RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
+ curl ca-certificates \
|
|
+ gcc libc6-dev \
|
|
+ gcc-sparc64-linux-gnu libc6-dev-sparc64-cross \
|
|
+ qemu-system-sparc64 openbios-sparc seabios ipxe-qemu \
|
|
+ p7zip-full cpio
|
|
+
|
|
+COPY linux-sparc64.sh /
|
|
+RUN bash /linux-sparc64.sh
|
|
+
|
|
+COPY test-runner-linux /
|
|
+
|
|
+ENV CARGO_TARGET_SPARC64_UNKNOWN_LINUX_GNU_LINKER=sparc64-linux-gnu-gcc \
|
|
+ CARGO_TARGET_SPARC64_UNKNOWN_LINUX_GNU_RUNNER="/test-runner-linux sparc64" \
|
|
+ CC_sparc64_unknown_linux_gnu=sparc64-linux-gnu-gcc \
|
|
+ PATH=$PATH:/rust/bin
|
|
\ No newline at end of file
|
|
diff --git a/third_party/rust/packed_simd/ci/docker/thumbv7neon-linux-androideabi/Dockerfile b/third_party/rust/packed_simd/ci/docker/thumbv7neon-linux-androideabi/Dockerfile
|
|
new file mode 100644
|
|
index 000000000000..c1da77109c12
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/ci/docker/thumbv7neon-linux-androideabi/Dockerfile
|
|
@@ -0,0 +1,47 @@
|
|
+FROM ubuntu:16.04
|
|
+
|
|
+RUN dpkg --add-architecture i386 && \
|
|
+ apt-get update && \
|
|
+ apt-get install -y --no-install-recommends \
|
|
+ file \
|
|
+ make \
|
|
+ curl \
|
|
+ ca-certificates \
|
|
+ python \
|
|
+ unzip \
|
|
+ expect \
|
|
+ openjdk-9-jre \
|
|
+ libstdc++6:i386 \
|
|
+ libpulse0 \
|
|
+ gcc \
|
|
+ libc6-dev
|
|
+
|
|
+WORKDIR /android/
|
|
+COPY android* /android/
|
|
+
|
|
+ENV ANDROID_ARCH=arm
|
|
+ENV PATH=$PATH:/android/ndk-$ANDROID_ARCH/bin:/android/sdk/tools:/android/sdk/platform-tools
|
|
+
|
|
+RUN sh /android/android-install-ndk.sh $ANDROID_ARCH
|
|
+RUN sh /android/android-install-sdk.sh $ANDROID_ARCH
|
|
+RUN mv /root/.android /tmp
|
|
+RUN chmod 777 -R /tmp/.android
|
|
+RUN chmod 755 /android/sdk/tools/* /android/sdk/emulator/qemu/linux-x86_64/*
|
|
+
|
|
+ENV PATH=$PATH:/rust/bin \
|
|
+ CARGO_TARGET_THUMBV7NEON_LINUX_ANDROIDEABI_LINKER=arm-linux-androideabi-gcc \
|
|
+ CARGO_TARGET_THUMBV7NEON_LINUX_ANDROIDEABI_RUNNER=/tmp/runtest \
|
|
+ OBJDUMP=arm-linux-androideabi-objdump \
|
|
+ HOME=/tmp
|
|
+
|
|
+ADD runtest-android.rs /tmp/runtest.rs
|
|
+ENTRYPOINT [ \
|
|
+ "bash", \
|
|
+ "-c", \
|
|
+ # set SHELL so android can detect a 64bits system, see
|
|
+ # http://stackoverflow.com/a/41789144
|
|
+ "SHELL=/bin/dash /android/sdk/emulator/emulator @arm -no-window & \
|
|
+ rustc /tmp/runtest.rs -o /tmp/runtest && \
|
|
+ exec \"$@\"", \
|
|
+ "--" \
|
|
+]
|
|
diff --git a/third_party/rust/packed_simd/ci/docker/thumbv7neon-unknown-linux-gnueabihf/Dockerfile b/third_party/rust/packed_simd/ci/docker/thumbv7neon-unknown-linux-gnueabihf/Dockerfile
|
|
new file mode 100644
|
|
index 000000000000..696cb6c3fb52
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/ci/docker/thumbv7neon-unknown-linux-gnueabihf/Dockerfile
|
|
@@ -0,0 +1,13 @@
|
|
+FROM ubuntu:17.10
|
|
+RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
+ gcc \
|
|
+ ca-certificates \
|
|
+ libc6-dev \
|
|
+ gcc-arm-linux-gnueabihf \
|
|
+ libc6-dev-armhf-cross \
|
|
+ qemu-user \
|
|
+ make \
|
|
+ file
|
|
+ENV CARGO_TARGET_THUMBV7NEON_UNKNOWN_LINUX_GNUEABIHF_LINKER=arm-linux-gnueabihf-gcc \
|
|
+ CARGO_TARGET_THUMBV7NEON_UNKNOWN_LINUX_GNUEABIHF_RUNNER="qemu-arm -L /usr/arm-linux-gnueabihf" \
|
|
+ OBJDUMP=arm-linux-gnueabihf-objdump
|
|
diff --git a/third_party/rust/packed_simd/ci/docker/wasm32-unknown-unknown/Dockerfile b/third_party/rust/packed_simd/ci/docker/wasm32-unknown-unknown/Dockerfile
|
|
new file mode 100644
|
|
index 000000000000..f905cf1a36eb
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/ci/docker/wasm32-unknown-unknown/Dockerfile
|
|
@@ -0,0 +1,37 @@
|
|
+FROM ubuntu:18.04
|
|
+
|
|
+RUN apt-get update -y && apt-get install -y --no-install-recommends \
|
|
+ ca-certificates \
|
|
+ clang \
|
|
+ cmake \
|
|
+ curl \
|
|
+ git \
|
|
+ libc6-dev \
|
|
+ make \
|
|
+ python \
|
|
+ xz-utils
|
|
+
|
|
+# Install `wasm2wat`
|
|
+RUN git clone --recursive https://github.com/WebAssembly/wabt
|
|
+RUN make -C wabt -j$(nproc)
|
|
+ENV PATH=$PATH:/wabt/bin
|
|
+
|
|
+# Install `wasm-bindgen-test-runner`
|
|
+RUN curl -L https://github.com/rustwasm/wasm-bindgen/releases/download/0.2.19/wasm-bindgen-0.2.19-x86_64-unknown-linux-musl.tar.gz \
|
|
+ | tar xzf -
|
|
+ENV PATH=$PATH:/wasm-bindgen-0.2.19-x86_64-unknown-linux-musl
|
|
+ENV CARGO_TARGET_WASM32_UNKNOWN_UNKNOWN_RUNNER=wasm-bindgen-test-runner
|
|
+
|
|
+# Install `node`
|
|
+RUN curl https://nodejs.org/dist/v10.8.0/node-v10.8.0-linux-x64.tar.xz | tar xJf -
|
|
+ENV PATH=$PATH:/node-v10.8.0-linux-x64/bin
|
|
+
|
|
+# We use a shim linker that removes `--strip-debug` when passed to LLD. While
|
|
+# this typically results in invalid debug information in release mode it doesn't
|
|
+# result in an invalid names section which is what we're interested in.
|
|
+COPY lld-shim.rs /
|
|
+ENV CARGO_TARGET_WASM32_UNKNOWN_UNKNOWN_LINKER=/tmp/lld-shim
|
|
+
|
|
+# Rustc isn't available until this container starts, so defer compilation of the
|
|
+# shim.
|
|
+ENTRYPOINT /rust/bin/rustc /lld-shim.rs -o /tmp/lld-shim && exec bash "$@"
|
|
diff --git a/third_party/rust/packed_simd/ci/docker/x86_64-linux-android/Dockerfile b/third_party/rust/packed_simd/ci/docker/x86_64-linux-android/Dockerfile
|
|
new file mode 100644
|
|
index 000000000000..d52dd45b12bf
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/ci/docker/x86_64-linux-android/Dockerfile
|
|
@@ -0,0 +1,29 @@
|
|
+FROM ubuntu:16.04
|
|
+
|
|
+RUN apt-get update && \
|
|
+ apt-get install -y --no-install-recommends \
|
|
+ ca-certificates \
|
|
+ curl \
|
|
+ gcc \
|
|
+ libc-dev \
|
|
+ python \
|
|
+ unzip \
|
|
+ file \
|
|
+ make
|
|
+
|
|
+WORKDIR /android/
|
|
+ENV ANDROID_ARCH=x86_64
|
|
+COPY android-install-ndk.sh /android/
|
|
+RUN sh /android/android-install-ndk.sh $ANDROID_ARCH
|
|
+
|
|
+# We do not run x86_64-linux-android tests on an android emulator.
|
|
+# See ci/android-sysimage.sh for informations about how tests are run.
|
|
+COPY android-sysimage.sh /android/
|
|
+RUN bash /android/android-sysimage.sh x86_64 x86_64-24_r07.zip
|
|
+
|
|
+ENV PATH=$PATH:/rust/bin:/android/ndk-$ANDROID_ARCH/bin \
|
|
+ CARGO_TARGET_X86_64_LINUX_ANDROID_LINKER=x86_64-linux-android-gcc \
|
|
+ CC_x86_64_linux_android=x86_64-linux-android-gcc \
|
|
+ CXX_x86_64_linux_android=x86_64-linux-android-g++ \
|
|
+ OBJDUMP=x86_64-linux-android-objdump \
|
|
+ HOME=/tmp
|
|
diff --git a/third_party/rust/packed_simd/ci/docker/x86_64-unknown-linux-gnu-emulated/Dockerfile b/third_party/rust/packed_simd/ci/docker/x86_64-unknown-linux-gnu-emulated/Dockerfile
|
|
new file mode 100644
|
|
index 000000000000..a6bbe6653928
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/ci/docker/x86_64-unknown-linux-gnu-emulated/Dockerfile
|
|
@@ -0,0 +1,16 @@
|
|
+FROM ubuntu:18.04
|
|
+RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
+ gcc \
|
|
+ libc6-dev \
|
|
+ file \
|
|
+ make \
|
|
+ ca-certificates \
|
|
+ wget \
|
|
+ bzip2 \
|
|
+ cmake \
|
|
+ libclang-dev \
|
|
+ clang
|
|
+
|
|
+RUN wget https://github.com/gnzlbg/intel_sde/raw/master/sde-external-8.16.0-2018-01-30-lin.tar.bz2
|
|
+RUN tar -xjf sde-external-8.16.0-2018-01-30-lin.tar.bz2
|
|
+ENV CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER="/sde-external-8.16.0-2018-01-30-lin/sde64 --"
|
|
diff --git a/third_party/rust/packed_simd/ci/docker/x86_64-unknown-linux-gnu/Dockerfile b/third_party/rust/packed_simd/ci/docker/x86_64-unknown-linux-gnu/Dockerfile
|
|
new file mode 100644
|
|
index 000000000000..e6b000d0516e
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/ci/docker/x86_64-unknown-linux-gnu/Dockerfile
|
|
@@ -0,0 +1,10 @@
|
|
+FROM ubuntu:17.10
|
|
+RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
+ gcc \
|
|
+ libc6-dev \
|
|
+ file \
|
|
+ make \
|
|
+ ca-certificates \
|
|
+ cmake \
|
|
+ libclang-dev \
|
|
+ clang
|
|
diff --git a/third_party/rust/packed_simd/ci/dox.sh b/third_party/rust/packed_simd/ci/dox.sh
|
|
new file mode 100644
|
|
index 000000000000..1743366407e3
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/ci/dox.sh
|
|
@@ -0,0 +1,24 @@
|
|
+#!/bin/sh
|
|
+
|
|
+set -ex
|
|
+
|
|
+rm -rf target/doc
|
|
+mkdir -p target/doc
|
|
+
|
|
+# Build API documentation
|
|
+cargo doc --features=into_bits
|
|
+
|
|
+# Build Performance Guide
|
|
+# FIXME: https://github.com/rust-lang-nursery/mdBook/issues/780
|
|
+# mdbook build perf-guide -d target/doc/perf-guide
|
|
+cd perf-guide
|
|
+mdbook build
|
|
+cd -
|
|
+cp -r perf-guide/book target/doc/perf-guide
|
|
+
|
|
+# If we're on travis, not a PR, and on the right branch, publish!
|
|
+if [ "$TRAVIS_PULL_REQUEST" = "false" ] && [ "$TRAVIS_BRANCH" = "master" ]; then
|
|
+ pip install ghp_import --install-option="--prefix=$HOME/.local"
|
|
+ $HOME/.local/bin/ghp-import -n target/doc
|
|
+ git push -qf https://${GH_PAGES}@github.com/${TRAVIS_REPO_SLUG}.git gh-pages
|
|
+fi
|
|
diff --git a/third_party/rust/packed_simd/ci/linux-s390x.sh b/third_party/rust/packed_simd/ci/linux-s390x.sh
|
|
new file mode 100644
|
|
index 000000000000..972abeec569e
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/ci/linux-s390x.sh
|
|
@@ -0,0 +1,18 @@
|
|
+set -ex
|
|
+
|
|
+mkdir -m 777 /qemu
|
|
+cd /qemu
|
|
+
|
|
+curl -LO https://github.com/qemu/qemu/raw/master/pc-bios/s390-ccw.img
|
|
+curl -LO http://ftp.debian.org/debian/dists/testing/main/installer-s390x/20170828/images/generic/kernel.debian
|
|
+curl -LO http://ftp.debian.org/debian/dists/testing/main/installer-s390x/20170828/images/generic/initrd.debian
|
|
+
|
|
+mv kernel.debian kernel
|
|
+mv initrd.debian initrd.gz
|
|
+
|
|
+mkdir init
|
|
+cd init
|
|
+gunzip -c ../initrd.gz | cpio -id
|
|
+rm ../initrd.gz
|
|
+cp /usr/s390x-linux-gnu/lib/libgcc_s.so.1 usr/lib/
|
|
+chmod a+w .
|
|
diff --git a/third_party/rust/packed_simd/ci/linux-sparc64.sh b/third_party/rust/packed_simd/ci/linux-sparc64.sh
|
|
new file mode 100644
|
|
index 000000000000..4452b120e1b6
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/ci/linux-sparc64.sh
|
|
@@ -0,0 +1,17 @@
|
|
+set -ex
|
|
+
|
|
+mkdir -m 777 /qemu
|
|
+cd /qemu
|
|
+
|
|
+curl -LO https://cdimage.debian.org/cdimage/ports/9.0/sparc64/iso-cd/debian-9.0-sparc64-NETINST-1.iso
|
|
+7z e debian-9.0-sparc64-NETINST-1.iso boot/initrd.gz
|
|
+7z e debian-9.0-sparc64-NETINST-1.iso boot/sparc64
|
|
+mv sparc64 kernel
|
|
+rm debian-9.0-sparc64-NETINST-1.iso
|
|
+
|
|
+mkdir init
|
|
+cd init
|
|
+gunzip -c ../initrd.gz | cpio -id
|
|
+rm ../initrd.gz
|
|
+cp /usr/sparc64-linux-gnu/lib/libgcc_s.so.1 usr/lib/
|
|
+chmod a+w .
|
|
diff --git a/third_party/rust/packed_simd/ci/lld-shim.rs b/third_party/rust/packed_simd/ci/lld-shim.rs
|
|
new file mode 100644
|
|
index 000000000000..10263869e8dc
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/ci/lld-shim.rs
|
|
@@ -0,0 +1,11 @@
|
|
+use std::os::unix::prelude::*;
|
|
+use std::process::Command;
|
|
+use std::env;
|
|
+
|
|
+fn main() {
|
|
+ let args = env::args()
|
|
+ .skip(1)
|
|
+ .filter(|s| s != "--strip-debug")
|
|
+ .collect::<Vec<_>>();
|
|
+ panic!("failed to exec: {}", Command::new("rust-lld").args(&args).exec());
|
|
+}
|
|
diff --git a/third_party/rust/packed_simd/ci/max_line_width.sh b/third_party/rust/packed_simd/ci/max_line_width.sh
|
|
new file mode 100644
|
|
index 000000000000..f70639b6f89b
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/ci/max_line_width.sh
|
|
@@ -0,0 +1,17 @@
|
|
+#!/usr/bin/env sh
|
|
+
|
|
+set -x
|
|
+
|
|
+export success=true
|
|
+
|
|
+find . -iname '*.rs' | while read -r file; do
|
|
+ result=$(grep '.\{79\}' "${file}" | grep --invert 'http')
|
|
+ if [ "${result}" = "" ]
|
|
+ then
|
|
+ :
|
|
+ else
|
|
+ echo "file \"${file}\": $result"
|
|
+ exit 1
|
|
+ fi
|
|
+done
|
|
+
|
|
diff --git a/third_party/rust/packed_simd/ci/run-docker.sh b/third_party/rust/packed_simd/ci/run-docker.sh
|
|
new file mode 100644
|
|
index 000000000000..abdd6852fc3a
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/ci/run-docker.sh
|
|
@@ -0,0 +1,38 @@
|
|
+# Small script to run tests for a target (or all targets) inside all the
|
|
+# respective docker images.
|
|
+
|
|
+set -ex
|
|
+
|
|
+run() {
|
|
+ echo "Building docker container for TARGET=${TARGET} RUSTFLAGS=${RUSTFLAGS}"
|
|
+ docker build -t packed_simd -f ci/docker/${TARGET}/Dockerfile ci/
|
|
+ mkdir -p target
|
|
+ target=$(echo "${TARGET}" | sed 's/-emulated//')
|
|
+ echo "Running docker"
|
|
+ docker run \
|
|
+ --user `id -u`:`id -g` \
|
|
+ --rm \
|
|
+ --init \
|
|
+ --volume $HOME/.cargo:/cargo \
|
|
+ --env CARGO_HOME=/cargo \
|
|
+ --volume `rustc --print sysroot`:/rust:ro \
|
|
+ --env TARGET=$target \
|
|
+ --env NORUN \
|
|
+ --env NOVERIFY \
|
|
+ --env RUSTFLAGS \
|
|
+ --volume `pwd`:/checkout:ro \
|
|
+ --volume `pwd`/target:/checkout/target \
|
|
+ --workdir /checkout \
|
|
+ --privileged \
|
|
+ packed_simd \
|
|
+ bash \
|
|
+ -c 'PATH=$PATH:/rust/bin exec ci/run.sh'
|
|
+}
|
|
+
|
|
+if [ -z "${TARGET}" ]; then
|
|
+ for d in `ls ci/docker/`; do
|
|
+ run $d
|
|
+ done
|
|
+else
|
|
+ run ${TARGET}
|
|
+fi
|
|
diff --git a/third_party/rust/packed_simd/ci/run.sh b/third_party/rust/packed_simd/ci/run.sh
|
|
new file mode 100644
|
|
index 000000000000..7bb825883680
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/ci/run.sh
|
|
@@ -0,0 +1,96 @@
|
|
+#!/usr/bin/env bash
|
|
+
|
|
+set -ex
|
|
+
|
|
+: ${TARGET?"The TARGET environment variable must be set."}
|
|
+
|
|
+# Tests are all super fast anyway, and they fault often enough on travis that
|
|
+# having only one thread increases debuggability to be worth it.
|
|
+#export RUST_TEST_THREADS=1
|
|
+#export RUST_BACKTRACE=full
|
|
+#export RUST_TEST_NOCAPTURE=1
|
|
+
|
|
+# Some appveyor builds run out-of-memory; this attempts to mitigate that:
|
|
+# https://github.com/rust-lang-nursery/packed_simd/issues/39
|
|
+# export RUSTFLAGS="${RUSTFLAGS} -C codegen-units=1"
|
|
+# export CARGO_BUILD_JOBS=1
|
|
+
|
|
+export CARGO_SUBCMD=test
|
|
+if [[ "${NORUN}" == "1" ]]; then
|
|
+ export CARGO_SUBCMD=build
|
|
+fi
|
|
+
|
|
+if [[ ${TARGET} == "x86_64-apple-ios" ]] || [[ ${TARGET} == "i386-apple-ios" ]]; then
|
|
+ export RUSTFLAGS="${RUSTFLAGS} -Clink-arg=-mios-simulator-version-min=7.0"
|
|
+ rustc ./ci/deploy_and_run_on_ios_simulator.rs -o $HOME/runtest
|
|
+ export CARGO_TARGET_X86_64_APPLE_IOS_RUNNER=$HOME/runtest
|
|
+ export CARGO_TARGET_I386_APPLE_IOS_RUNNER=$HOME/runtest
|
|
+fi
|
|
+
|
|
+# The source directory is read-only. Need to copy internal crates to the target
|
|
+# directory for their Cargo.lock to be properly written.
|
|
+mkdir target || true
|
|
+
|
|
+rustc --version
|
|
+cargo --version
|
|
+echo "TARGET=${TARGET}"
|
|
+echo "HOST=${HOST}"
|
|
+echo "RUSTFLAGS=${RUSTFLAGS}"
|
|
+echo "NORUN=${NORUN}"
|
|
+echo "NOVERIFY=${NOVERIFY}"
|
|
+echo "CARGO_SUBCMD=${CARGO_SUBCMD}"
|
|
+echo "CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS}"
|
|
+echo "CARGO_INCREMENTAL=${CARGO_INCREMENTAL}"
|
|
+echo "RUST_TEST_THREADS=${RUST_TEST_THREADS}"
|
|
+echo "RUST_BACKTRACE=${RUST_BACKTRACE}"
|
|
+echo "RUST_TEST_NOCAPTURE=${RUST_TEST_NOCAPTURE}"
|
|
+
|
|
+cargo_test() {
|
|
+ cmd="cargo ${CARGO_SUBCMD} --verbose --target=${TARGET} ${@}"
|
|
+ if [ "${NORUN}" != "1" ]
|
|
+ then
|
|
+ if [ "$TARGET" != "wasm32-unknown-unknown" ]
|
|
+ then
|
|
+ cmd="$cmd -- --quiet"
|
|
+ fi
|
|
+ fi
|
|
+ mkdir target || true
|
|
+ ${cmd} 2>&1 | tee > target/output
|
|
+ if [[ ${PIPESTATUS[0]} != 0 ]]; then
|
|
+ cat target/output
|
|
+ return 1
|
|
+ fi
|
|
+}
|
|
+
|
|
+cargo_test_impl() {
|
|
+ ORIGINAL_RUSTFLAGS=${RUSTFLAGS}
|
|
+ RUSTFLAGS="${ORIGINAL_RUSTFLAGS} --cfg test_v16 --cfg test_v32 --cfg test_v64" cargo_test ${@}
|
|
+ RUSTFLAGS="${ORIGINAL_RUSTFLAGS} --cfg test_v128 --cfg test_v256" cargo_test ${@}
|
|
+ RUSTFLAGS="${ORIGINAL_RUSTFLAGS} --cfg test_v512" cargo_test ${@}
|
|
+ RUSTFLAGS=${ORIGINAL_RUSTFLAGS}
|
|
+}
|
|
+
|
|
+# Debug run:
|
|
+if [[ "${TARGET}" != "wasm32-unknown-unknown" ]]; then
|
|
+ # Run wasm32-unknown-unknown in release mode only
|
|
+ cargo_test_impl
|
|
+fi
|
|
+
|
|
+if [[ "${TARGET}" == "x86_64-unknown-linux-gnu" ]] || [[ "${TARGET}" == "x86_64-pc-windows-msvc" ]]; then
|
|
+ # use sleef on linux and windows x86_64 builds
|
|
+ cargo_test_impl --release --features=into_bits,core_arch,sleef-sys
|
|
+else
|
|
+ cargo_test_impl --release --features=into_bits,core_arch
|
|
+fi
|
|
+
|
|
+# Verify code generation
|
|
+if [[ "${NOVERIFY}" != "1" ]]; then
|
|
+ cp -r verify/verify target/verify
|
|
+ export STDSIMD_ASSERT_INSTR_LIMIT=30
|
|
+ if [[ "${TARGET}" == "i586-unknown-linux-gnu" ]]; then
|
|
+ export STDSIMD_ASSERT_INSTR_LIMIT=50
|
|
+ fi
|
|
+ cargo_test --release --manifest-path=target/verify/Cargo.toml
|
|
+fi
|
|
+
|
|
+. ci/run_examples.sh
|
|
diff --git a/third_party/rust/packed_simd/ci/run_examples.sh b/third_party/rust/packed_simd/ci/run_examples.sh
|
|
new file mode 100644
|
|
index 000000000000..5b26b18afb20
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/ci/run_examples.sh
|
|
@@ -0,0 +1,51 @@
|
|
+# Runs all examples.
|
|
+
|
|
+# FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/55
|
|
+# All examples fail to build for `armv7-apple-ios`.
|
|
+if [[ ${TARGET} == "armv7-apple-ios" ]]; then
|
|
+ exit 0
|
|
+fi
|
|
+
|
|
+# FIXME: travis exceeds 50 minutes on these targets
|
|
+# Skipping the examples is an attempt at preventing travis from timing-out
|
|
+if [[ ${TARGET} == "arm-linux-androidabi" ]] || [[ ${TARGET} == "aarch64-linux-androidabi" ]] \
|
|
+ || [[ ${TARGET} == "sparc64-unknown-linux-gnu" ]]; then
|
|
+ exit 0
|
|
+fi
|
|
+
|
|
+if [[ ${TARGET} == "wasm32-unknown-unknown" ]]; then
|
|
+ exit 0
|
|
+fi
|
|
+
|
|
+cp -r examples/aobench target/aobench
|
|
+cargo_test --manifest-path=target/aobench/Cargo.toml --release --no-default-features
|
|
+cargo_test --manifest-path=target/aobench/Cargo.toml --release --features=256bit
|
|
+
|
|
+cp -r examples/dot_product target/dot_product
|
|
+cargo_test --manifest-path=target/dot_product/Cargo.toml --release
|
|
+
|
|
+cp -r examples/fannkuch_redux target/fannkuch_redux
|
|
+cargo_test --manifest-path=target/fannkuch_redux/Cargo.toml --release
|
|
+
|
|
+# FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/56
|
|
+if [[ ${TARGET} != "i586-unknown-linux-gnu" ]]; then
|
|
+ cp -r examples/mandelbrot target/mandelbrot
|
|
+ cargo_test --manifest-path=target/mandelbrot/Cargo.toml --release
|
|
+fi
|
|
+
|
|
+cp -r examples/matrix_inverse target/matrix_inverse
|
|
+cargo_test --manifest-path=target/matrix_inverse/Cargo.toml --release
|
|
+
|
|
+cp -r examples/nbody target/nbody
|
|
+cargo_test --manifest-path=target/nbody/Cargo.toml --release
|
|
+
|
|
+cp -r examples/spectral_norm target/spectral_norm
|
|
+cargo_test --manifest-path=target/spectral_norm/Cargo.toml --release
|
|
+
|
|
+if [[ ${TARGET} != "i586-unknown-linux-gnu" ]]; then
|
|
+ cp -r examples/stencil target/stencil
|
|
+ cargo_test --manifest-path=target/stencil/Cargo.toml --release
|
|
+fi
|
|
+
|
|
+cp -r examples/triangle_xform target/triangle_xform
|
|
+cargo_test --manifest-path=target/triangle_xform/Cargo.toml --release
|
|
diff --git a/third_party/rust/packed_simd/ci/runtest-android.rs b/third_party/rust/packed_simd/ci/runtest-android.rs
|
|
new file mode 100644
|
|
index 000000000000..ed1cd80c834a
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/ci/runtest-android.rs
|
|
@@ -0,0 +1,45 @@
|
|
+use std::env;
|
|
+use std::process::Command;
|
|
+use std::path::{Path, PathBuf};
|
|
+
|
|
+fn main() {
|
|
+ let args = env::args_os()
|
|
+ .skip(1)
|
|
+ .filter(|arg| arg != "--quiet")
|
|
+ .collect::<Vec<_>>();
|
|
+ assert_eq!(args.len(), 1);
|
|
+ let test = PathBuf::from(&args[0]);
|
|
+ let dst = Path::new("/data/local/tmp").join(test.file_name().unwrap());
|
|
+
|
|
+ let status = Command::new("adb")
|
|
+ .arg("wait-for-device")
|
|
+ .status()
|
|
+ .expect("failed to run: adb wait-for-device");
|
|
+ assert!(status.success());
|
|
+
|
|
+ let status = Command::new("adb")
|
|
+ .arg("push")
|
|
+ .arg(&test)
|
|
+ .arg(&dst)
|
|
+ .status()
|
|
+ .expect("failed to run: adb pushr");
|
|
+ assert!(status.success());
|
|
+
|
|
+ let output = Command::new("adb")
|
|
+ .arg("shell")
|
|
+ .arg(&dst)
|
|
+ .output()
|
|
+ .expect("failed to run: adb shell");
|
|
+ assert!(status.success());
|
|
+
|
|
+ println!("status: {}\nstdout ---\n{}\nstderr ---\n{}",
|
|
+ output.status,
|
|
+ String::from_utf8_lossy(&output.stdout),
|
|
+ String::from_utf8_lossy(&output.stderr));
|
|
+
|
|
+ let stdout = String::from_utf8_lossy(&output.stdout);
|
|
+ let mut lines = stdout.lines().filter(|l| l.starts_with("test result"));
|
|
+ if !lines.all(|l| l.contains("test result: ok") && l.contains("0 failed")) {
|
|
+ panic!("failed to find successful test run");
|
|
+ }
|
|
+}
|
|
diff --git a/third_party/rust/packed_simd/ci/setup_benchmarks.sh b/third_party/rust/packed_simd/ci/setup_benchmarks.sh
|
|
new file mode 100644
|
|
index 000000000000..ddc4765d5ceb
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/ci/setup_benchmarks.sh
|
|
@@ -0,0 +1,10 @@
|
|
+#!/usr/bin/env bash
|
|
+
|
|
+set -ex
|
|
+
|
|
+# Get latest ISPC binary for the target and put it in the path
|
|
+git clone https://github.com/gnzlbg/ispc-binaries
|
|
+cp ispc-binaries/ispc-${TARGET} ispc
|
|
+
|
|
+# Rust-bindgen requires RUSTFMT
|
|
+rustup component add rustfmt-preview
|
|
diff --git a/third_party/rust/packed_simd/ci/test-runner-linux b/third_party/rust/packed_simd/ci/test-runner-linux
|
|
new file mode 100644
|
|
index 000000000000..0654f63bfdb9
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/ci/test-runner-linux
|
|
@@ -0,0 +1,24 @@
|
|
+#!/bin/sh
|
|
+
|
|
+set -e
|
|
+
|
|
+arch=$1
|
|
+prog=$2
|
|
+
|
|
+cd /qemu/init
|
|
+cp -f $2 prog
|
|
+find . | cpio --create --format='newc' --quiet | gzip > ../initrd.gz
|
|
+cd ..
|
|
+
|
|
+timeout 30s qemu-system-$arch \
|
|
+ -m 1024 \
|
|
+ -nographic \
|
|
+ -kernel kernel \
|
|
+ -initrd initrd.gz \
|
|
+ -append init=/prog > output || true
|
|
+
|
|
+# remove kernel messages
|
|
+tr -d '\r' < output | egrep -v '^\['
|
|
+
|
|
+# if the output contains a failure, return error
|
|
+! grep FAILED output > /dev/null
|
|
diff --git a/third_party/rust/packed_simd/contributing.md b/third_party/rust/packed_simd/contributing.md
|
|
new file mode 100644
|
|
index 000000000000..93fa92783740
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/contributing.md
|
|
@@ -0,0 +1,67 @@
|
|
+# Contributing to `packed_simd`
|
|
+
|
|
+Welcome! If you are reading this document, it means you are interested in contributing
|
|
+to the `packed_simd` crate.
|
|
+
|
|
+## Reporting issues
|
|
+
|
|
+All issues with this crate are tracked using GitHub's [Issue Tracker].
|
|
+
|
|
+You can use issues to bring bugs to the attention of the maintainers, to discuss
|
|
+certain problems encountered with the crate, or to request new features (although
|
|
+feature requests should be limited to things mentioned in the [RFC]).
|
|
+
|
|
+One thing to keep in mind is to always use the **latest** nightly toolchain when
|
|
+working on this crate. Due to the nature of this project, we use a lot of unstable
|
|
+features, meaning breakage happens often.
|
|
+
|
|
+[Issue Tracker]: https://github.com/rust-lang-nursery/packed_simd/issues
|
|
+[RFC]: https://github.com/rust-lang/rfcs/pull/2366
|
|
+
|
|
+### LLVM issues
|
|
+
|
|
+The Rust compiler relies on [LLVM](https://llvm.org/) for machine code generation,
|
|
+and quite a few LLVM bugs have been discovered during the development of this project.
|
|
+
|
|
+If you encounter issues with incorrect/suboptimal codegen, which you do not encounter
|
|
+when using the [SIMD vendor intrinsics](https://doc.rust-lang.org/nightly/std/arch/),
|
|
+it is likely the issue is with LLVM, or this crate's interaction with it.
|
|
+
|
|
+You should first open an issue **in this repo** to help us track the problem, and we
|
|
+will help determine what is the exact cause of the problem.
|
|
+If LLVM is indeed the cause, the issue will be reported upstream to the
|
|
+[LLVM bugtracker](https://bugs.llvm.org/).
|
|
+
|
|
+## Submitting Pull Requests
|
|
+
|
|
+New code is submitted to the crate using GitHub's [pull request] mechanism.
|
|
+You should first fork this repository, make your changes (preferrably in a new
|
|
+branch), then use GitHub's web UI to create a new PR.
|
|
+
|
|
+[pull request]: https://help.github.com/articles/about-pull-requests/
|
|
+
|
|
+### Examples
|
|
+
|
|
+The `examples` directory contains code showcasing SIMD code written with this crate,
|
|
+usually in comparison to scalar or ISPC code. If you have a project / idea which
|
|
+uses SIMD, we'd love to add it to the examples list.
|
|
+
|
|
+Every example should include a small `README`, describing the example code's purpose.
|
|
+If your example could potentially work as a benchmark, then add a `benchmark.sh`
|
|
+script to allow running the example benchmark code in CI. See an existing example's
|
|
+[`benchmark.sh`](examples/aobench/benchmark.sh) for a sample.
|
|
+
|
|
+Don't forget to update the crate's top-level `README` with a link to your example.
|
|
+
|
|
+### Perf guide
|
|
+
|
|
+The objective of the [performance guide][perf-guide] is to be a comprehensive
|
|
+resource detailing the process of optimizing Rust code with SIMD support.
|
|
+
|
|
+If you believe a certain section could be reworded, or if you have any tips & tricks
|
|
+related to SIMD which you'd like to share, please open a PR.
|
|
+
|
|
+[mdBook] is used to manage the formatting of the guide as a book.
|
|
+
|
|
+[perf-guide]: https://rust-lang-nursery.github.io/packed_simd/perf-guide/
|
|
+[mdBook]: https://github.com/rust-lang-nursery/mdBook
|
|
diff --git a/third_party/rust/packed_simd/perf-guide/.gitignore b/third_party/rust/packed_simd/perf-guide/.gitignore
|
|
new file mode 100644
|
|
index 000000000000..5a0bf0317d75
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/perf-guide/.gitignore
|
|
@@ -0,0 +1 @@
|
|
+/book
|
|
diff --git a/third_party/rust/packed_simd/perf-guide/book.toml b/third_party/rust/packed_simd/perf-guide/book.toml
|
|
new file mode 100644
|
|
index 000000000000..69ba3053ca25
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/perf-guide/book.toml
|
|
@@ -0,0 +1,12 @@
|
|
+[book]
|
|
+authors = ["Gonzalo Brito Gadeschi", "Gabriel Majeri"]
|
|
+multilingual = false
|
|
+src = "src"
|
|
+title = "Rust SIMD Performance Guide"
|
|
+description = "This book describes how to write performant SIMD code in Rust."
|
|
+
|
|
+[build]
|
|
+create-missing = false
|
|
+
|
|
+[output.html]
|
|
+additional-css = ["./src/ascii.css"]
|
|
diff --git a/third_party/rust/packed_simd/perf-guide/src/SUMMARY.md b/third_party/rust/packed_simd/perf-guide/src/SUMMARY.md
|
|
new file mode 100644
|
|
index 000000000000..1e76898865c5
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/perf-guide/src/SUMMARY.md
|
|
@@ -0,0 +1,21 @@
|
|
+# Summary
|
|
+
|
|
+[Introduction](./introduction.md)
|
|
+
|
|
+- [Floating-point Math](./float-math/fp.md)
|
|
+ - [Short-vector Math Library](./float-math/svml.md)
|
|
+ - [Approximate functions](./float-math/approx.md)
|
|
+ - [Fused multiply-accumulate](./float-math/fma.md)
|
|
+
|
|
+- [Target features](./target-feature/features.md)
|
|
+ - [Using `RUSTFLAGS`](./target-feature/rustflags.md)
|
|
+ - [Using the `target_feature` attribute](./target-feature/attribute.md)
|
|
+ - [Interaction with inlining](./target-feature/inlining.md)
|
|
+ - [Detecting features at runtime](./target-feature/runtime.md)
|
|
+
|
|
+- [Bounds checking](./bound_checks.md)
|
|
+- [Vertical and horizontal operations](./vert-hor-ops.md)
|
|
+
|
|
+- [Performance profiling](./prof/profiling.md)
|
|
+ - [Profiling on Linux](./prof/linux.md)
|
|
+ - [Using machine code analyzers](./prof/mca.md)
|
|
diff --git a/third_party/rust/packed_simd/perf-guide/src/ascii.css b/third_party/rust/packed_simd/perf-guide/src/ascii.css
|
|
new file mode 100644
|
|
index 000000000000..4c02651195f9
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/perf-guide/src/ascii.css
|
|
@@ -0,0 +1,4 @@
|
|
+code {
|
|
+ /* "Source Code Pro" breaks ASCII art */
|
|
+ font-family: Consolas, "Ubuntu Mono", Menlo, "DejaVu Sans Mono", monospace;
|
|
+}
|
|
diff --git a/third_party/rust/packed_simd/perf-guide/src/bound_checks.md b/third_party/rust/packed_simd/perf-guide/src/bound_checks.md
|
|
new file mode 100644
|
|
index 000000000000..2eeedb5ac829
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/perf-guide/src/bound_checks.md
|
|
@@ -0,0 +1,22 @@
|
|
+# Bounds checking
|
|
+
|
|
+Reading and writing packed vectors to/from slices is checked by default.
|
|
+Independently of the configuration options used, the safe functions:
|
|
+
|
|
+* `Simd<[T; N]>::from_slice_aligned(& s[..])`
|
|
+* `Simd<[T; N]>::write_to_slice_aligned(&mut s[..])`
|
|
+
|
|
+always check that:
|
|
+
|
|
+* the slice is big enough to hold the vector
|
|
+* the slice is suitably aligned to perform an aligned load/store for a `Simd<[T;
|
|
+ N]>` (this alignment is often much larger than that of `T`).
|
|
+
|
|
+There are `_unaligned` versions that use unaligned load and stores, as well as
|
|
+`unsafe` `_unchecked` that do not perform any checks iff `debug-assertions =
|
|
+false` / `debug = false`. That is, the `_unchecked` methods do still assert size
|
|
+and alignment in debug builds and could also do so in release builds depending
|
|
+on the configuration options.
|
|
+
|
|
+These assertions do often significantly impact performance and you should be
|
|
+aware of them.
|
|
diff --git a/third_party/rust/packed_simd/perf-guide/src/float-math/approx.md b/third_party/rust/packed_simd/perf-guide/src/float-math/approx.md
|
|
new file mode 100644
|
|
index 000000000000..2237c67ec4b3
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/perf-guide/src/float-math/approx.md
|
|
@@ -0,0 +1,8 @@
|
|
+# Approximate functions
|
|
+
|
|
+<!-- TODO:
|
|
+
|
|
+Explain that they exists, that they are often _much_ faster, how to use them,
|
|
+that people should check whether the error is good enough for their
|
|
+applications. Explain that this error is currently unstable and might change.
|
|
+-->
|
|
diff --git a/third_party/rust/packed_simd/perf-guide/src/float-math/fma.md b/third_party/rust/packed_simd/perf-guide/src/float-math/fma.md
|
|
new file mode 100644
|
|
index 000000000000..357748383d63
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/perf-guide/src/float-math/fma.md
|
|
@@ -0,0 +1,6 @@
|
|
+# Fused Multiply Add
|
|
+
|
|
+<!-- TODO:
|
|
+Explain that this is a compound operation, infinite precision, difference
|
|
+between `mul_add` and `mul_adde`, that LLVM cannot do this by itself, etc.
|
|
+-->
|
|
diff --git a/third_party/rust/packed_simd/perf-guide/src/float-math/fp.md b/third_party/rust/packed_simd/perf-guide/src/float-math/fp.md
|
|
new file mode 100644
|
|
index 000000000000..711fcc4fd598
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/perf-guide/src/float-math/fp.md
|
|
@@ -0,0 +1,3 @@
|
|
+# Floating-point math
|
|
+
|
|
+This chapter contains information pertaining to working with floating-point numbers.
|
|
diff --git a/third_party/rust/packed_simd/perf-guide/src/float-math/svml.md b/third_party/rust/packed_simd/perf-guide/src/float-math/svml.md
|
|
new file mode 100644
|
|
index 000000000000..266c2531cc04
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/perf-guide/src/float-math/svml.md
|
|
@@ -0,0 +1,7 @@
|
|
+# Short Vector Math Library
|
|
+
|
|
+<!-- TODO:
|
|
+Explain how is short-vector math performed by default (just scalarized libm calls).
|
|
+
|
|
+Explain how to enable `sleef`, etc.
|
|
+-->
|
|
diff --git a/third_party/rust/packed_simd/perf-guide/src/introduction.md b/third_party/rust/packed_simd/perf-guide/src/introduction.md
|
|
new file mode 100644
|
|
index 000000000000..7243e19c8a54
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/perf-guide/src/introduction.md
|
|
@@ -0,0 +1,26 @@
|
|
+# Introduction
|
|
+
|
|
+## What is SIMD
|
|
+
|
|
+<!-- TODO:
|
|
+describe what SIMD is, which algorithms can benefit from it,
|
|
+give usage examples
|
|
+-->
|
|
+
|
|
+## History of SIMD in Rust
|
|
+
|
|
+<!-- TODO:
|
|
+discuss history of unstable std::simd,
|
|
+stabilization of std::arch, etc.
|
|
+-->
|
|
+
|
|
+## Discover packed_simd
|
|
+
|
|
+<!-- TODO: describe scope of this project -->
|
|
+
|
|
+Writing fast and portable SIMD algorithms using `packed_simd` is, unfortunately,
|
|
+not trivial. There are many pitfals that one should be aware of, and some idioms
|
|
+that help avoid those pitfalls.
|
|
+
|
|
+This book attempts to document these best practices and provides practical examples
|
|
+on how to apply the tips to _your_ code.
|
|
diff --git a/third_party/rust/packed_simd/perf-guide/src/prof/linux.md b/third_party/rust/packed_simd/perf-guide/src/prof/linux.md
|
|
new file mode 100644
|
|
index 000000000000..96c7d67bc476
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/perf-guide/src/prof/linux.md
|
|
@@ -0,0 +1,107 @@
|
|
+# Performance profiling on Linux
|
|
+
|
|
+## Using `perf`
|
|
+
|
|
+[perf](https://perf.wiki.kernel.org/) is the most powerful performance profiler
|
|
+for Linux, featuring support for various hardware Performance Monitoring Units,
|
|
+as well as integration with the kernel's performance events framework.
|
|
+
|
|
+We will only look at how can the `perf` command can be used to profile SIMD code.
|
|
+Full system profiling is outside of the scope of this book.
|
|
+
|
|
+### Recording
|
|
+
|
|
+The first step is to record a program's execution during an average workload.
|
|
+It helps if you can isolate the parts of your program which have performance
|
|
+issues, and set up a benchmark which can be easily (re)run.
|
|
+
|
|
+Build the benchmark binary in release mode, after having enabled debug info:
|
|
+
|
|
+```sh
|
|
+$ cargo build --release
|
|
+Finished release [optimized + debuginfo] target(s) in 0.02s
|
|
+```
|
|
+
|
|
+Then use the `perf record` subcommand:
|
|
+
|
|
+```sh
|
|
+$ perf record --call-graph=dwarf ./target/release/my-program
|
|
+[ perf record: Woken up 10 times to write data ]
|
|
+[ perf record: Captured and wrote 2,356 MB perf.data (292 samples) ]
|
|
+```
|
|
+
|
|
+Instead of using `--call-graph=dwarf`, which can become pretty slow, you can use
|
|
+`--call-graph=lbr` if you have a processor with support for Last Branch Record
|
|
+(i.e. Intel Haswell and newer).
|
|
+
|
|
+`perf` will, by default, record the count of CPU cycles it takes to execute
|
|
+various parts of your program. You can use the `-e` command line option
|
|
+to enable other performance events, such as `cache-misses`. Use `perf list`
|
|
+to get a list of all hardware counters supported by your CPU.
|
|
+
|
|
+### Viewing the report
|
|
+
|
|
+The next step is getting a bird's eye view of the program's execution.
|
|
+`perf` provides a `ncurses`-based interface which will get you started.
|
|
+
|
|
+Use `perf report` to open a visualization of your program's performance:
|
|
+
|
|
+```sh
|
|
+perf report --hierarchy -M intel
|
|
+```
|
|
+
|
|
+`--hierarchy` will display a tree-like structure of where your program spent
|
|
+most of its time. `-M intel` enables disassembly output with Intel syntax, which
|
|
+is subjectively more readable than the default AT&T syntax.
|
|
+
|
|
+Here is the output from profiling the `nbody` benchmark:
|
|
+
|
|
+```
|
|
+- 100,00% nbody
|
|
+ - 94,18% nbody
|
|
+ + 93,48% [.] nbody_lib::simd::advance
|
|
+ + 0,70% [.] nbody_lib::run
|
|
+ + 5,06% libc-2.28.so
|
|
+```
|
|
+
|
|
+If you move with the arrow keys to any node in the tree, you can the press `a`
|
|
+to have `perf` _annotate_ that node. This means it will:
|
|
+
|
|
+- disassemble the function
|
|
+
|
|
+- associate every instruction with the percentage of time which was spent executing it
|
|
+
|
|
+- interleaves the disassembly with the source code,
|
|
+ assuming it found the debug symbols
|
|
+ (you can use `s` to toggle this behaviour)
|
|
+
|
|
+`perf` will, by default, open the instruction which it identified as being the
|
|
+hottest spot in the function:
|
|
+
|
|
+```
|
|
+0,76 │ movapd xmm2,xmm0
|
|
+0,38 │ movhlps xmm2,xmm0
|
|
+ │ addpd xmm2,xmm0
|
|
+ │ unpcklpd xmm1,xmm2
|
|
+12,50 │ sqrtpd xmm0,xmm1
|
|
+1,52 │ mulpd xmm0,xmm1
|
|
+```
|
|
+
|
|
+In this case, `sqrtpd` will be highlighted in red, since that's the instruction
|
|
+which the CPU spends most of its time executing.
|
|
+
|
|
+## Using Valgrind
|
|
+
|
|
+Valgrind is a set of tools which initially helped C/C++ programmers find unsafe
|
|
+memory accesses in their code. Nowadays the project also has
|
|
+
|
|
+- a heap profiler called `massif`
|
|
+
|
|
+- a cache utilization profiler called `cachegrind`
|
|
+
|
|
+- a call-graph performance profiler called `callgrind`
|
|
+
|
|
+<!--
|
|
+TODO: explain valgrind's dynamic binary translation, warn about massive
|
|
+slowdown, talk about `kcachegrind` for a GUI
|
|
+-->
|
|
diff --git a/third_party/rust/packed_simd/perf-guide/src/prof/mca.md b/third_party/rust/packed_simd/perf-guide/src/prof/mca.md
|
|
new file mode 100644
|
|
index 000000000000..65ddf1a4eb3a
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/perf-guide/src/prof/mca.md
|
|
@@ -0,0 +1,100 @@
|
|
+# Machine code analysis tools
|
|
+
|
|
+## The microarchitecture of modern CPUs
|
|
+
|
|
+While you might have heard of Instruction Set Architectures, such as `x86` or
|
|
+`arm` or `mips`, the term _microarchitecture_ (also written here as _µ-arch_),
|
|
+refers to the internal details of an actual family of CPUs, such as Intel's
|
|
+_Haswell_ or AMD's _Jaguar_.
|
|
+
|
|
+Replacing scalar code with SIMD code will improve performance on all CPUs
|
|
+supporting the required vector extensions.
|
|
+However, due to microarchitectural differences, the actual speed-up at
|
|
+runtime might vary.
|
|
+
|
|
+**Example**: a simple example arises when optimizing for AMD K8 CPUs.
|
|
+The assembly generated for an empty function should look like this:
|
|
+
|
|
+```asm
|
|
+nop
|
|
+ret
|
|
+```
|
|
+
|
|
+The `nop` is used to align the `ret` instruction for better performance.
|
|
+However, the compiler will actually generated the following code:
|
|
+
|
|
+```asm
|
|
+repz ret
|
|
+```
|
|
+
|
|
+The `repz` instruction will repeat the following instruction until a certain
|
|
+condition. Of course, in this situation, the function will simply immediately
|
|
+return, and the `ret` instruction is still aligned.
|
|
+However, AMD K8's branch predictor performs better with the latter code.
|
|
+
|
|
+For those looking to absolutely maximize performance for a certain target µ-arch,
|
|
+you will have to read some CPU manuals, or ask the compiler to do it for you
|
|
+with `-C target-cpu`.
|
|
+
|
|
+### Summary of CPU internals
|
|
+
|
|
+Modern processors are able to execute instructions out-of-order for better performance,
|
|
+by utilizing tricks such as [branch prediction], [instruction pipelining],
|
|
+or [superscalar execution].
|
|
+
|
|
+[branch prediction]: https://en.wikipedia.org/wiki/Branch_predictor
|
|
+[instruction pipelining]: https://en.wikipedia.org/wiki/Instruction_pipelining
|
|
+[superscalar execution]: https://en.wikipedia.org/wiki/Superscalar_processor
|
|
+
|
|
+SIMD instructions are also subject to these optimizations, meaning it can get pretty
|
|
+difficult to determine where the slowdown happens.
|
|
+For example, if the profiler reports a store operation is slow, one of two things
|
|
+could be happening:
|
|
+
|
|
+- the store is limited by the CPU's memory bandwidth, which is actually an ideal
|
|
+ scenario, all things considered;
|
|
+
|
|
+- memory bandwidth is nowhere near its peak, but the value to be stored is at the
|
|
+ end of a long chain of operations, and this store is where the profiler
|
|
+ encountered the pipeline stall;
|
|
+
|
|
+Since most profilers are simple tools which don't understand the subtleties of
|
|
+instruction scheduling, you
|
|
+
|
|
+## Analyzing the machine code
|
|
+
|
|
+Certain tools have knowledge of internal CPU microarchitecture, i.e. they know
|
|
+
|
|
+- how many physical [register files] a CPU actually has
|
|
+
|
|
+- what is the latency / throughtput of an instruction
|
|
+
|
|
+- what [µ-ops] are generated for a set of instructions
|
|
+
|
|
+and many other architectural details.
|
|
+
|
|
+[register files]: https://en.wikipedia.org/wiki/Register_file
|
|
+[µ-ops]: https://en.wikipedia.org/wiki/Micro-operation
|
|
+
|
|
+These tools are therefore able to provide accurate information as to why some
|
|
+instructions are inefficient, and where the bottleneck is.
|
|
+
|
|
+The disadvantage is that the output of these tools requires advanced knowledge
|
|
+of the target architecture to understand, i.e. they **cannot** point out what
|
|
+the cause of the issue is explicitly.
|
|
+
|
|
+## Intel's Architecture Code Analyzer (IACA)
|
|
+
|
|
+[IACA] is a free tool offered by Intel for analyzing the performance of various
|
|
+computational kernels.
|
|
+
|
|
+Being a proprietary, closed source tool, it _only_ supports Intel's µ-arches.
|
|
+
|
|
+[IACA]: https://software.intel.com/en-us/articles/intel-architecture-code-analyzer
|
|
+
|
|
+## llvm-mca
|
|
+
|
|
+<!--
|
|
+TODO: once LLVM 7 gets released, write a chapter on using llvm-mca
|
|
+with SIMD disassembly.
|
|
+-->
|
|
diff --git a/third_party/rust/packed_simd/perf-guide/src/prof/profiling.md b/third_party/rust/packed_simd/perf-guide/src/prof/profiling.md
|
|
new file mode 100644
|
|
index 000000000000..02ba78d2f22f
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/perf-guide/src/prof/profiling.md
|
|
@@ -0,0 +1,14 @@
|
|
+# Performance profiling
|
|
+
|
|
+While the rest of the book provides practical advice on how to improve the performance
|
|
+of SIMD code, this chapter is dedicated to [**performance profiling**][profiling].
|
|
+Profiling consists of recording a program's execution in order to identify program
|
|
+hotspots.
|
|
+
|
|
+**Important**: most profilers require debug information in order to accurately
|
|
+link the program hotspots back to the corresponding source code lines. Rust will
|
|
+disable debug info generation by default for optimized builds, but you can change
|
|
+that [in your `Cargo.toml`][cargo-ref].
|
|
+
|
|
+[profiling]: https://en.wikipedia.org/wiki/Profiling_(computer_programming)
|
|
+[cargo-ref]: https://doc.rust-lang.org/cargo/reference/manifest.html#the-profile-sections
|
|
diff --git a/third_party/rust/packed_simd/perf-guide/src/target-feature/attribute.md b/third_party/rust/packed_simd/perf-guide/src/target-feature/attribute.md
|
|
new file mode 100644
|
|
index 000000000000..ee670fea5bd8
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/perf-guide/src/target-feature/attribute.md
|
|
@@ -0,0 +1,5 @@
|
|
+# The `target_feature` attribute
|
|
+
|
|
+<!-- TODO:
|
|
+Explain the `#[target_feature]` attribute
|
|
+-->
|
|
diff --git a/third_party/rust/packed_simd/perf-guide/src/target-feature/features.md b/third_party/rust/packed_simd/perf-guide/src/target-feature/features.md
|
|
new file mode 100644
|
|
index 000000000000..b93030ca6708
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/perf-guide/src/target-feature/features.md
|
|
@@ -0,0 +1,13 @@
|
|
+# Enabling target features
|
|
+
|
|
+Not all processors of a certain architecture will have SIMD processing units,
|
|
+and using a SIMD instruction which is not supported will trigger undefined behavior.
|
|
+
|
|
+To allow building safe, portable programs, the Rust compiler will **not**, by default,
|
|
+generate any sort of vector instructions, unless it can statically determine
|
|
+they are supported. For example, on AMD64, SSE2 support is architecturally guaranteed.
|
|
+The `x86_64-apple-darwin` target enables up to SSSE3. The get a defintive list of
|
|
+which features are enabled by default on various platforms, refer to the target
|
|
+specifications [in the compiler's source code][targets].
|
|
+
|
|
+[targets]: https://github.com/rust-lang/rust/tree/master/src/librustc_target/spec
|
|
diff --git a/third_party/rust/packed_simd/perf-guide/src/target-feature/inlining.md b/third_party/rust/packed_simd/perf-guide/src/target-feature/inlining.md
|
|
new file mode 100644
|
|
index 000000000000..86705102a74b
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/perf-guide/src/target-feature/inlining.md
|
|
@@ -0,0 +1,5 @@
|
|
+# Inlining
|
|
+
|
|
+<!-- TODO:
|
|
+Explain how the `#[target_feature]` attribute interacts with inlining
|
|
+-->
|
|
diff --git a/third_party/rust/packed_simd/perf-guide/src/target-feature/practice.md b/third_party/rust/packed_simd/perf-guide/src/target-feature/practice.md
|
|
new file mode 100644
|
|
index 000000000000..5b55c61c268a
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/perf-guide/src/target-feature/practice.md
|
|
@@ -0,0 +1,31 @@
|
|
+# Target features in practice
|
|
+
|
|
+Using `RUSTFLAGS` will allow the crate being compiled, as well as all its
|
|
+transitive dependencies to use certain target features.
|
|
+
|
|
+A tehnique used to avoid undefined behavior at runtime is to compile and
|
|
+ship multiple binaries, each compiled with a certain set of features.
|
|
+This might not be feasible in some cases, and can quickly get out of hand
|
|
+as more and more vector extensions are added to an architecture.
|
|
+
|
|
+Rust can be more flexible: you can build a single binary/library which automatically
|
|
+picks the best supported vector instructions depending on the host machine.
|
|
+The trick consists of monomorphizing parts of the code during building, and then
|
|
+using run-time feature detection to select the right code path when running.
|
|
+
|
|
+<!-- TODO
|
|
+Explain how to create efficient functions that dispatch to different
|
|
+implementations at run-time without issues (e.g. using `#[inline(always)]` for
|
|
+the impls, wrapping in `#[target_feature]`, and the wrapping those in a function
|
|
+that does run-time feature detection).
|
|
+-->
|
|
+
|
|
+**NOTE** (x86 specific): because the AVX (256-bit) registers extend the existing
|
|
+SSE (128-bit) registers, mixing SSE and AVX instructions in a program can cause
|
|
+performance issues.
|
|
+
|
|
+The solution is to compile all code, even the code written with 128-bit vectors,
|
|
+with the AVX target feature enabled. This will cause the compiler to prefix the
|
|
+generated instructions with the [VEX] prefix.
|
|
+
|
|
+[VEX]: https://en.wikipedia.org/wiki/VEX_prefix
|
|
diff --git a/third_party/rust/packed_simd/perf-guide/src/target-feature/runtime.md b/third_party/rust/packed_simd/perf-guide/src/target-feature/runtime.md
|
|
new file mode 100644
|
|
index 000000000000..47ddcc8660db
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/perf-guide/src/target-feature/runtime.md
|
|
@@ -0,0 +1,5 @@
|
|
+# Detecting host features at runtime
|
|
+
|
|
+<!-- TODO:
|
|
+Explain cost (how it works).
|
|
+-->
|
|
diff --git a/third_party/rust/packed_simd/perf-guide/src/target-feature/rustflags.md b/third_party/rust/packed_simd/perf-guide/src/target-feature/rustflags.md
|
|
new file mode 100644
|
|
index 000000000000..e2e806e085b6
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/perf-guide/src/target-feature/rustflags.md
|
|
@@ -0,0 +1,77 @@
|
|
+# Using RUSTFLAGS
|
|
+
|
|
+One of the easiest ways to benefit from SIMD is to allow the compiler
|
|
+to generate code using certain vector instruction extensions.
|
|
+
|
|
+The environment variable `RUSTFLAGS` can be used to pass options for code
|
|
+generation to the Rust compiler. These flags will affect **all** compiled crates.
|
|
+
|
|
+There are two flags which can be used to enable specific vector extensions:
|
|
+
|
|
+## target-feature
|
|
+
|
|
+- Syntax: `-C target-feature=<features>`
|
|
+
|
|
+- Provides the compiler with a comma-separated set of instruction extensions
|
|
+ to enable.
|
|
+
|
|
+ **Example**: Use `-C target-features=+sse3,+avx` to enable generating instructions
|
|
+ for [Streaming SIMD Extensions 3](https://en.wikipedia.org/wiki/SSE3) and
|
|
+ [Advanced Vector Extensions](https://en.wikipedia.org/wiki/Advanced_Vector_Extensions).
|
|
+
|
|
+- To list target triples for all targets supported by Rust, use:
|
|
+
|
|
+ ```sh
|
|
+ rustc --print target-list
|
|
+ ```
|
|
+
|
|
+- To list all support target features for a certain target triple, use:
|
|
+
|
|
+ ```sh
|
|
+ rustc --target=${TRIPLE} --print target-features
|
|
+ ```
|
|
+
|
|
+- Note that all CPU features are independent, and will have to be enabled individually.
|
|
+
|
|
+ **Example**: Setting `-C target-features=+avx2` will _not_ enable `fma`, even though
|
|
+ all CPUs which support AVX2 also support FMA. To enable both, one has to use
|
|
+ `-C target-features=+avx2,+fma`
|
|
+
|
|
+- Some features also depend on other features, which need to be enabled for the
|
|
+ target instructions to be generated.
|
|
+
|
|
+ **Example**: Unless `v7` is specified as the target CPU (see below), to enable
|
|
+ NEON on ARM it is necessary to use `-C target-feature=+v7,+neon`.
|
|
+
|
|
+## target-cpu
|
|
+
|
|
+- Syntax: `-C target-cpu=<cpu>`
|
|
+
|
|
+- Sets the identifier of a CPU family / model for which to build and optimize the code.
|
|
+
|
|
+ **Example**: `RUSTFLAGS='-C target-cpu=cortex-a75'`
|
|
+
|
|
+- To list all supported target CPUs for a certain target triple, use:
|
|
+
|
|
+ ```sh
|
|
+ rustc --target=${TRIPLE} --print target-cpus
|
|
+ ```
|
|
+
|
|
+ **Example**:
|
|
+
|
|
+ ```sh
|
|
+ rustc --target=i686-pc-windows-msvc --print target-cpus
|
|
+ ```
|
|
+
|
|
+- The compiler will translate this into a list of target features. Therefore,
|
|
+ individual feature checks (`#[cfg(target_feature = "...")]`) will still
|
|
+ work properly.
|
|
+
|
|
+- It will cause the code generator to optimize the generated code for that
|
|
+ specific CPU model.
|
|
+
|
|
+- Using `native` as the CPU model will cause Rust to generate and optimize code
|
|
+ for the CPU running the compiler. It is useful when building programs which you
|
|
+ plan to only use locally. This should never be used when the generated programs
|
|
+ are meant to be run on other computers, such as when packaging for distribution
|
|
+ or cross-compiling.
|
|
diff --git a/third_party/rust/packed_simd/perf-guide/src/vert-hor-ops.md b/third_party/rust/packed_simd/perf-guide/src/vert-hor-ops.md
|
|
new file mode 100644
|
|
index 000000000000..d0dd1be12a19
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/perf-guide/src/vert-hor-ops.md
|
|
@@ -0,0 +1,76 @@
|
|
+# Vertical and horizontal operations
|
|
+
|
|
+In SIMD terminology, each vector has a certain "width" (number of lanes).
|
|
+A vector processor is able to perform two kinds of operations on a vector:
|
|
+
|
|
+- Vertical operations:
|
|
+ operate on two vectors of the same width, result has same width
|
|
+
|
|
+**Example**: vertical addition of two `f32x4` vectors
|
|
+
|
|
+ %0 == | 2 | -3.5 | 0 | 7 |
|
|
+ + + + +
|
|
+ %1 == | 4 | 1.5 | -1 | 0 |
|
|
+ = = = =
|
|
+ %0 + %1 == | 6 | -2 | -1 | 7 |
|
|
+
|
|
+- Horizontal operations:
|
|
+ reduce the elements of two vectors in some way,
|
|
+ the result's elements combine information from the two original ones
|
|
+
|
|
+**Example**: horizontal addition of two `u64x2` vectors
|
|
+
|
|
+ %0 == | 1 | 3 |
|
|
+ └─+───┘
|
|
+ └───────┐
|
|
+ │
|
|
+ %1 == | 4 | -1 | │
|
|
+ └─+──┘ │
|
|
+ └───┐ │
|
|
+ │ │
|
|
+ ┌─────│───┘
|
|
+ ▼ ▼
|
|
+ %0 + %1 == | 4 | 3 |
|
|
+
|
|
+## Performance consideration of horizontal operations
|
|
+
|
|
+The result of vertical operations, like vector negation: `-a`, for a given lane,
|
|
+does not depend on the result of the operation for the other lanes. The result
|
|
+of horizontal operations, like the vector `sum` reduction: `a.sum()`, depends on
|
|
+the value of all vector lanes.
|
|
+
|
|
+In virtually all architectures vertical operations are fast, while horizontal
|
|
+operations are, by comparison, very slow.
|
|
+
|
|
+Consider the following two functions for computing the sum of all `f32` values
|
|
+in a slice:
|
|
+
|
|
+```rust
|
|
+fn fast_sum(x: &[f32]) -> f32 {
|
|
+ assert!(x.len() % 4 == 0);
|
|
+ let mut sum = f32x4::splat(0.); // [0., 0., 0., 0.]
|
|
+ for i in (0..x.len()).step_by(4) {
|
|
+ sum += f32x4::from_slice_unaligned(&x[i..]);
|
|
+ }
|
|
+ sum.sum()
|
|
+}
|
|
+
|
|
+fn slow_sum(x: &[f32]) -> f32 {
|
|
+ assert!(x.len() % 4 == 0);
|
|
+ let mut sum: f32 = 0.;
|
|
+ for i in (0..x.len()).step_by(4) {
|
|
+ sum += f32x4::from_slice_unaligned(&x[i..]).sum();
|
|
+ }
|
|
+ sum
|
|
+}
|
|
+```
|
|
+
|
|
+The inner loop over the slice is where the bulk of the work actually happens.
|
|
+There, the `fast_sum` function perform vertical operations into a vector, doing
|
|
+a single horizontal reduction at the end, while the `slow_sum` function performs
|
|
+horizontal vector operations inside of the loop.
|
|
+
|
|
+On all widely-used architectures, `fast_sum` is a large constant factor faster
|
|
+than `slow_sum`. You can run the [slice_sum]() example and see for yourself. On
|
|
+the particular machine tested there the algorithm using the horizontal vector
|
|
+addition is 2.7x slower than the one using vertical vector operations!
|
|
diff --git a/third_party/rust/packed_simd/readme.md b/third_party/rust/packed_simd/readme.md
|
|
new file mode 100644
|
|
index 000000000000..3b27a2bba0d6
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/readme.md
|
|
@@ -0,0 +1,182 @@
|
|
+# `Simd<[T; N]>`
|
|
+
|
|
+## Implementation of [Rust RFC #2366: `std::simd`][rfc2366]
|
|
+
|
|
+[![Travis-CI Status]][travis] [![Appveyor Status]][appveyor] [![Latest Version]][crates.io] [![docs]][master_docs]
|
|
+
|
|
+> This aims to be a 100% conforming implementation of Rust RFC 2366 for stabilization.
|
|
+
|
|
+**WARNING**: this crate only supports the most recent nightly Rust toolchain.
|
|
+
|
|
+## Documentation
|
|
+
|
|
+* [API docs (`master` branch)][master_docs]
|
|
+* [Performance guide][perf_guide]
|
|
+* [API docs (`docs.rs`)][docs.rs]: **CURRENTLY DOWN** due to
|
|
+ https://github.com/rust-lang-nursery/packed_simd/issues/110
|
|
+* [RFC2366 `std::simd`][rfc2366]: - contains motivation, design rationale,
|
|
+ discussion, etc.
|
|
+
|
|
+## Examples
|
|
+
|
|
+Most of the examples come with both a scalar and a vectorized implementation.
|
|
+
|
|
+* [`aobench`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/aobench)
|
|
+* [`fannkuch_redux`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/fannkuch_redux)
|
|
+* [`matrix inverse`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/matrix_inverse)
|
|
+* [`mandelbrot`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/mandelbrot)
|
|
+* [`n-body`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/nbody)
|
|
+* [`options_pricing`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/options_pricing)
|
|
+* [`spectral_norm`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/spectral_norm)
|
|
+* [`triangle transform`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/triangle_xform)
|
|
+* [`stencil`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/stencil)
|
|
+* [`vector dot product`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/dot_product)
|
|
+
|
|
+## Cargo features
|
|
+
|
|
+* `into_bits` (default: disabled): enables `FromBits`/`IntoBits` trait
|
|
+ implementations for the vector types. These allow reinterpreting the bits of a
|
|
+ vector type as those of another vector type safely by just using the
|
|
+ `.into_bits()` method.
|
|
+
|
|
+* `core_arch` (default: disabled): enable this feature to recompile `core::arch`
|
|
+ for the target-features enabled. `packed_simd` includes optimizations for some
|
|
+ target feature combinations that are enabled by this feature. Note, however,
|
|
+ that this is an unstable dependency, that rustc might break at any time.
|
|
+
|
|
+* `sleef-sys` (default: disabled - `x86_64` only): internally uses the [SLEEF]
|
|
+ short-vector math library when profitable via the [`sleef-sys`][sleef_sys]
|
|
+ crate. [SLEEF] is licensed under the [Boost Software License
|
|
+ v1.0][boost_license], an extremely permissive license, and can be statically
|
|
+ linked without issues.
|
|
+
|
|
+## Performance
|
|
+
|
|
+The following [ISPC] examples are also part of `packed_simd`'s
|
|
+[`examples/`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/)
|
|
+directory, where `packed_simd`+[`rayon`][rayon] are used to emulate [ISPC]'s
|
|
+Single-Program-Multiple-Data (SPMD) programming model. The performance results
|
|
+on different hardware is shown in the `readme.md` of each example. The following
|
|
+table summarizes the performance ranges, where `+` means speed-up and `-`
|
|
+slowdown:
|
|
+
|
|
+* `aobench`: `[-1.02x, +1.53x]`,
|
|
+* `stencil`: `[+1.06x, +1.72x]`,
|
|
+* `mandelbrot`: `[-1.74x, +1.2x]`,
|
|
+* `options_pricing`:
|
|
+ * `black_scholes`: `+1.0x`
|
|
+ * `binomial_put`: `+1.4x`
|
|
+
|
|
+ While SPMD is not the intended use case for `packed_simd`, it is possible to
|
|
+ combine the library with [`rayon`][rayon] to poorly emulate [ISPC]'s SPMD programming
|
|
+ model in Rust. Writing performant code is not as straightforward as with
|
|
+ [ISPC], but with some care (e.g. see the [Performance Guide][perf_guide]) one
|
|
+ can easily match and often out-perform [ISPC]'s "default performance".
|
|
+
|
|
+## Platform support
|
|
+
|
|
+The following table describes the supported platforms: `build` shows whether the
|
|
+library compiles without issues for a given target, while `run` shows whether
|
|
+the full testsuite passes on the target.
|
|
+
|
|
+| Linux targets: | build | run |
|
|
+|-----------------------------------|-----------|---------|
|
|
+| `i586-unknown-linux-gnu` | ✓ | ✓ |
|
|
+| `i686-unknown-linux-gnu` | ✓ | ✓ |
|
|
+| `x86_64-unknown-linux-gnu` | ✓ | ✓ |
|
|
+| `arm-unknown-linux-gnueabi` | ✗ | ✗ |
|
|
+| `arm-unknown-linux-gnueabihf` | ✓ | ✓ |
|
|
+| `armv7-unknown-linux-gnueabi` | ✓ | ✓ |
|
|
+| `aarch64-unknown-linux-gnu` | ✓ | ✓ |
|
|
+| `mips-unknown-linux-gnu` | ✓ | ✓ |
|
|
+| `mipsel-unknown-linux-musl` | ✓ | ✓ |
|
|
+| `mips64-unknown-linux-gnuabi64` | ✓ | ✓ |
|
|
+| `mips64el-unknown-linux-gnuabi64` | ✓ | ✓ |
|
|
+| `powerpc-unknown-linux-gnu` | ✗ | ✗ |
|
|
+| `powerpc64-unknown-linux-gnu` | ✗ | ✗ |
|
|
+| `powerpc64le-unknown-linux-gnu` | ✗ | ✗ |
|
|
+| `s390x-unknown-linux-gnu` | ✓ | ✓* |
|
|
+| `sparc64-unknown-linux-gnu` | ✓ | ✓* |
|
|
+| `thumbv7neon-unknown-linux-gnueabihf` | ✓ | ✓ |
|
|
+| **MacOSX targets:** | **build** | **run** |
|
|
+| `x86_64-apple-darwin` | ✓ | ✓ |
|
|
+| `i686-apple-darwin` | ✓ | ✓ |
|
|
+| **Windows targets:** | **build** | **run** |
|
|
+| `x86_64-pc-windows-msvc` | ✓ | ✓ |
|
|
+| `i686-pc-windows-msvc` | ✓ | ✓ |
|
|
+| `x86_64-pc-windows-gnu` | ✗ | ✗ |
|
|
+| `i686-pc-windows-gnu` | ✗ | ✗ |
|
|
+| **WebAssembly targets:** | **build** | **run** |
|
|
+| `wasm32-unknown-unknown` | ✓ | ✓ |
|
|
+| **Android targets:** | **build** | **run** |
|
|
+| `x86_64-linux-android` | ✓ | ✓ |
|
|
+| `arm-linux-androideabi` | ✓ | ✓ |
|
|
+| `aarch64-linux-android` | ✓ | ✗ |
|
|
+| `thumbv7neon-linux-androideabi` | ✓ | ✓ |
|
|
+| **iOS targets:** | **build** | **run** |
|
|
+| `i386-apple-ios` | ✓ | ✗ |
|
|
+| `x86_64-apple-ios` | ✓ | ✗ |
|
|
+| `armv7-apple-ios` | ✓ | ✗** |
|
|
+| `aarch64-apple-ios` | ✓ | ✗** |
|
|
+| **xBSD targets:** | **build** | **run** |
|
|
+| `i686-unknown-freebsd` | ✗ | ✗** |
|
|
+| `x86_64-unknown-freebsd` | ✗ | ✗** |
|
|
+| `x86_64-unknown-netbsd` | ✗ | ✗** |
|
|
+| **Solaris targets:** | **build** | **run** |
|
|
+| `x86_64-sun-solaris` | ✗ | ✗** |
|
|
+
|
|
+[*] most of the test suite passes correctly on these platform but
|
|
+there are correctness bugs open in the issue tracker.
|
|
+
|
|
+[**] it is currently not easily possible to run these platforms on CI.
|
|
+
|
|
+## Machine code verification
|
|
+
|
|
+The
|
|
+[`verify/`](https://github.com/rust-lang-nursery/packed_simd/tree/master/verify)
|
|
+crate tests disassembles the portable packed vector APIs at run-time and
|
|
+compares the generated machine code against the desired one to make sure that
|
|
+this crate remains efficient.
|
|
+
|
|
+## License
|
|
+
|
|
+This project is licensed under either of
|
|
+
|
|
+* [Apache License, Version 2.0](http://www.apache.org/licenses/LICENSE-2.0)
|
|
+ ([LICENSE-APACHE](LICENSE-APACHE))
|
|
+
|
|
+* [MIT License](http://opensource.org/licenses/MIT)
|
|
+ ([LICENSE-MIT](LICENSE-MIT))
|
|
+
|
|
+at your option.
|
|
+
|
|
+## Contributing
|
|
+
|
|
+We welcome all people who want to contribute.
|
|
+Please see the [contributing instructions] for more information.
|
|
+
|
|
+Contributions in any form (issues, pull requests, etc.) to this project
|
|
+must adhere to Rust's [Code of Conduct].
|
|
+
|
|
+Unless you explicitly state otherwise, any contribution intentionally submitted
|
|
+for inclusion in `packed_simd` by you, as defined in the Apache-2.0 license, shall be
|
|
+dual licensed as above, without any additional terms or conditions.
|
|
+
|
|
+[travis]: https://travis-ci.org/rust-lang-nursery/packed_simd
|
|
+[Travis-CI Status]: https://travis-ci.org/rust-lang-nursery/packed_simd.svg?branch=master
|
|
+[appveyor]: https://ci.appveyor.com/project/gnzlbg/packed-simd
|
|
+[Appveyor Status]: https://ci.appveyor.com/api/projects/status/hd7v9dvr442hgdix?svg=true
|
|
+[Latest Version]: https://img.shields.io/crates/v/packed_simd.svg
|
|
+[crates.io]: https://crates.io/crates/packed_simd
|
|
+[docs]: https://docs.rs/packed_simd/badge.svg
|
|
+[docs.rs]: https://docs.rs/packed_simd/
|
|
+[master_docs]: https://rust-lang-nursery.github.io/packed_simd/packed_simd/
|
|
+[perf_guide]: https://rust-lang-nursery.github.io/packed_simd/perf-guide/
|
|
+[rfc2366]: https://github.com/rust-lang/rfcs/pull/2366
|
|
+[ISPC]: https://ispc.github.io/
|
|
+[rayon]: https://crates.io/crates/rayon
|
|
+[boost_license]: https://www.boost.org/LICENSE_1_0.txt
|
|
+[SLEEF]: https://sleef.org/
|
|
+[sleef_sys]: https://crates.io/crates/sleef-sys
|
|
+[contributing instructions]: contributing.md
|
|
+[Code of Conduct]: https://www.rust-lang.org/en-US/conduct.html
|
|
diff --git a/third_party/rust/packed_simd/rustfmt.toml b/third_party/rust/packed_simd/rustfmt.toml
|
|
new file mode 100644
|
|
index 000000000000..5b400a4ce440
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/rustfmt.toml
|
|
@@ -0,0 +1,7 @@
|
|
+max_width = 79
|
|
+use_small_heuristics = "Max"
|
|
+wrap_comments = true
|
|
+comment_width = 79
|
|
+fn_args_density = "Compressed"
|
|
+edition = "2018"
|
|
+error_on_line_overflow = true
|
|
\ No newline at end of file
|
|
diff --git a/third_party/rust/packed_simd/src/api.rs b/third_party/rust/packed_simd/src/api.rs
|
|
new file mode 100644
|
|
index 000000000000..9959a052ae96
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/api.rs
|
|
@@ -0,0 +1,301 @@
|
|
+//! Implements the Simd<[T; N]> APIs
|
|
+
|
|
+crate mod cast;
|
|
+#[macro_use]
|
|
+mod cmp;
|
|
+#[macro_use]
|
|
+mod default;
|
|
+#[macro_use]
|
|
+mod fmt;
|
|
+#[macro_use]
|
|
+mod from;
|
|
+#[macro_use]
|
|
+mod hash;
|
|
+#[macro_use]
|
|
+mod math;
|
|
+#[macro_use]
|
|
+mod minimal;
|
|
+#[macro_use]
|
|
+mod ops;
|
|
+#[macro_use]
|
|
+mod ptr;
|
|
+#[macro_use]
|
|
+mod reductions;
|
|
+#[macro_use]
|
|
+mod select;
|
|
+#[macro_use]
|
|
+mod shuffle;
|
|
+#[macro_use]
|
|
+mod shuffle1_dyn;
|
|
+#[macro_use]
|
|
+mod slice;
|
|
+#[macro_use]
|
|
+mod swap_bytes;
|
|
+#[macro_use]
|
|
+mod bit_manip;
|
|
+
|
|
+#[cfg(feature = "into_bits")]
|
|
+crate mod into_bits;
|
|
+
|
|
+macro_rules! impl_i {
|
|
+ ([$elem_ty:ident; $elem_n:expr]: $tuple_id:ident, $mask_ty:ident
|
|
+ | $ielem_ty:ident | $test_tt:tt | $($elem_ids:ident),*
|
|
+ | From: $($from_vec_ty:ident),* | $(#[$doc:meta])*) => {
|
|
+ impl_minimal_iuf!([$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt
|
|
+ | $($elem_ids),* | $(#[$doc])*);
|
|
+ impl_ops_vector_arithmetic!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
|
+ impl_ops_scalar_arithmetic!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
|
+ impl_ops_vector_bitwise!(
|
|
+ [$elem_ty; $elem_n]: $tuple_id | $test_tt | (!(0 as $elem_ty), 0)
|
|
+ );
|
|
+ impl_ops_scalar_bitwise!(
|
|
+ [$elem_ty; $elem_n]: $tuple_id | $test_tt | (!(0 as $elem_ty), 0)
|
|
+ );
|
|
+ impl_ops_vector_shifts!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
|
+ impl_ops_scalar_shifts!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
|
+ impl_ops_vector_rotates!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
|
+ impl_ops_vector_neg!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
|
+ impl_ops_vector_int_min_max!(
|
|
+ [$elem_ty; $elem_n]: $tuple_id | $test_tt
|
|
+ );
|
|
+ impl_reduction_integer_arithmetic!(
|
|
+ [$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt
|
|
+ );
|
|
+ impl_reduction_min_max!(
|
|
+ [$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt
|
|
+ );
|
|
+ impl_reduction_bitwise!(
|
|
+ [$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt
|
|
+ | (|x|{ x as $elem_ty }) | (!(0 as $elem_ty), 0)
|
|
+ );
|
|
+ impl_fmt_debug!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
|
+ impl_fmt_lower_hex!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
|
+ impl_fmt_upper_hex!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
|
+ impl_fmt_octal!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
|
+ impl_fmt_binary!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
|
+ impl_from_array!([$elem_ty; $elem_n]: $tuple_id | $test_tt | (1, 1));
|
|
+ impl_from_vectors!(
|
|
+ [$elem_ty; $elem_n]: $tuple_id | $test_tt | $($from_vec_ty),*
|
|
+ );
|
|
+ impl_default!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
|
+ impl_hash!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
|
+ impl_slice_from_slice!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
|
+ impl_slice_write_to_slice!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
|
+ impl_swap_bytes!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
|
+ impl_bit_manip!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
|
+ impl_shuffle1_dyn!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
|
+ impl_cmp_partial_eq!(
|
|
+ [$elem_ty; $elem_n]: $tuple_id | $test_tt | (0, 1)
|
|
+ );
|
|
+ impl_cmp_eq!([$elem_ty; $elem_n]: $tuple_id | $test_tt | (0, 1));
|
|
+ impl_cmp_vertical!(
|
|
+ [$elem_ty; $elem_n]: $tuple_id, $mask_ty, false, (1, 0) | $test_tt
|
|
+ );
|
|
+ impl_cmp_partial_ord!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
|
+ impl_cmp_ord!([$elem_ty; $elem_n]: $tuple_id | $test_tt | (0, 1));
|
|
+
|
|
+ test_select!($elem_ty, $mask_ty, $tuple_id, (1, 2) | $test_tt);
|
|
+ test_cmp_partial_ord_int!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
|
+ test_shuffle1_dyn!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
|
+ }
|
|
+}
|
|
+
|
|
+macro_rules! impl_u {
|
|
+ ([$elem_ty:ident; $elem_n:expr]: $tuple_id:ident, $mask_ty:ident
|
|
+ | $ielem_ty:ident | $test_tt:tt | $($elem_ids:ident),*
|
|
+ | From: $($from_vec_ty:ident),* | $(#[$doc:meta])*) => {
|
|
+ impl_minimal_iuf!([$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt
|
|
+ | $($elem_ids),* | $(#[$doc])*);
|
|
+ impl_ops_vector_arithmetic!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
|
+ impl_ops_scalar_arithmetic!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
|
+ impl_ops_vector_bitwise!(
|
|
+ [$elem_ty; $elem_n]: $tuple_id | $test_tt | (!(0 as $elem_ty), 0)
|
|
+ );
|
|
+ impl_ops_scalar_bitwise!(
|
|
+ [$elem_ty; $elem_n]: $tuple_id | $test_tt | (!(0 as $elem_ty), 0)
|
|
+ );
|
|
+ impl_ops_vector_shifts!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
|
+ impl_ops_scalar_shifts!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
|
+ impl_ops_vector_rotates!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
|
+ impl_ops_vector_int_min_max!(
|
|
+ [$elem_ty; $elem_n]: $tuple_id | $test_tt
|
|
+ );
|
|
+ impl_reduction_integer_arithmetic!(
|
|
+ [$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt
|
|
+ );
|
|
+ impl_reduction_min_max!(
|
|
+ [$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt
|
|
+ );
|
|
+ impl_reduction_bitwise!(
|
|
+ [$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt
|
|
+ | (|x|{ x as $elem_ty }) | (!(0 as $elem_ty), 0)
|
|
+ );
|
|
+ impl_fmt_debug!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
|
+ impl_fmt_lower_hex!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
|
+ impl_fmt_upper_hex!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
|
+ impl_fmt_octal!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
|
+ impl_fmt_binary!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
|
+ impl_from_array!([$elem_ty; $elem_n]: $tuple_id | $test_tt | (1, 1));
|
|
+ impl_from_vectors!(
|
|
+ [$elem_ty; $elem_n]: $tuple_id | $test_tt | $($from_vec_ty),*
|
|
+ );
|
|
+ impl_default!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
|
+ impl_hash!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
|
+ impl_slice_from_slice!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
|
+ impl_slice_write_to_slice!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
|
+ impl_swap_bytes!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
|
+ impl_bit_manip!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
|
+ impl_shuffle1_dyn!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
|
+ impl_cmp_partial_eq!(
|
|
+ [$elem_ty; $elem_n]: $tuple_id | $test_tt | (1, 0)
|
|
+ );
|
|
+ impl_cmp_eq!([$elem_ty; $elem_n]: $tuple_id | $test_tt | (0, 1));
|
|
+ impl_cmp_vertical!(
|
|
+ [$elem_ty; $elem_n]: $tuple_id, $mask_ty, false, (1, 0) | $test_tt
|
|
+ );
|
|
+ impl_cmp_partial_ord!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
|
+ impl_cmp_ord!([$elem_ty; $elem_n]: $tuple_id | $test_tt | (0, 1));
|
|
+
|
|
+ test_select!($elem_ty, $mask_ty, $tuple_id, (1, 2) | $test_tt);
|
|
+ test_cmp_partial_ord_int!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
|
+ test_shuffle1_dyn!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
|
+ }
|
|
+}
|
|
+
|
|
+macro_rules! impl_f {
|
|
+ ([$elem_ty:ident; $elem_n:expr]: $tuple_id:ident, $mask_ty:ident
|
|
+ | $ielem_ty:ident | $test_tt:tt | $($elem_ids:ident),*
|
|
+ | From: $($from_vec_ty:ident),* | $(#[$doc:meta])*) => {
|
|
+ impl_minimal_iuf!([$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt
|
|
+ | $($elem_ids),* | $(#[$doc])*);
|
|
+ impl_ops_vector_arithmetic!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
|
+ impl_ops_scalar_arithmetic!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
|
+ impl_ops_vector_neg!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
|
+ impl_ops_vector_float_min_max!(
|
|
+ [$elem_ty; $elem_n]: $tuple_id | $test_tt
|
|
+ );
|
|
+ impl_reduction_float_arithmetic!(
|
|
+ [$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
|
+ impl_reduction_min_max!(
|
|
+ [$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt
|
|
+ );
|
|
+ impl_fmt_debug!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
|
+ impl_from_array!([$elem_ty; $elem_n]: $tuple_id | $test_tt | (1., 1.));
|
|
+ impl_from_vectors!(
|
|
+ [$elem_ty; $elem_n]: $tuple_id | $test_tt | $($from_vec_ty),*
|
|
+ );
|
|
+ impl_default!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
|
+ impl_cmp_partial_eq!(
|
|
+ [$elem_ty; $elem_n]: $tuple_id | $test_tt | (1., 0.)
|
|
+ );
|
|
+ impl_slice_from_slice!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
|
+ impl_slice_write_to_slice!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
|
+ impl_shuffle1_dyn!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
|
+
|
|
+ impl_float_consts!([$elem_ty; $elem_n]: $tuple_id);
|
|
+ impl_float_category!([$elem_ty; $elem_n]: $tuple_id, $mask_ty);
|
|
+
|
|
+ // floating-point math
|
|
+ impl_math_float_abs!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
|
+ impl_math_float_cos!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
|
+ impl_math_float_exp!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
|
+ impl_math_float_ln!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
|
+ impl_math_float_mul_add!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
|
+ impl_math_float_mul_adde!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
|
+ impl_math_float_powf!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
|
+ impl_math_float_recpre!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
|
+ impl_math_float_rsqrte!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
|
+ impl_math_float_sin!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
|
+ impl_math_float_sqrt!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
|
+ impl_math_float_sqrte!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
|
+ impl_cmp_vertical!(
|
|
+ [$elem_ty; $elem_n]: $tuple_id, $mask_ty, false, (1., 0.)
|
|
+ | $test_tt
|
|
+ );
|
|
+
|
|
+ test_select!($elem_ty, $mask_ty, $tuple_id, (1., 2.) | $test_tt);
|
|
+ test_reduction_float_min_max!(
|
|
+ [$elem_ty; $elem_n]: $tuple_id | $test_tt
|
|
+ );
|
|
+ test_shuffle1_dyn!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
|
+ }
|
|
+}
|
|
+
|
|
+macro_rules! impl_m {
|
|
+ ([$elem_ty:ident; $elem_n:expr]: $tuple_id:ident | $ielem_ty:ident
|
|
+ | $test_tt:tt | $($elem_ids:ident),* | From: $($from_vec_ty:ident),*
|
|
+ | $(#[$doc:meta])*) => {
|
|
+ impl_minimal_mask!(
|
|
+ [$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt
|
|
+ | $($elem_ids),* | $(#[$doc])*
|
|
+ );
|
|
+ impl_ops_vector_mask_bitwise!(
|
|
+ [$elem_ty; $elem_n]: $tuple_id | $test_tt | (true, false)
|
|
+ );
|
|
+ impl_ops_scalar_mask_bitwise!(
|
|
+ [$elem_ty; $elem_n]: $tuple_id | $test_tt | (true, false)
|
|
+ );
|
|
+ impl_reduction_bitwise!(
|
|
+ [bool; $elem_n]: $tuple_id | $ielem_ty | $test_tt
|
|
+ | (|x|{ x != 0 }) | (true, false)
|
|
+ );
|
|
+ impl_reduction_mask!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
|
+ impl_fmt_debug!([bool; $elem_n]: $tuple_id | $test_tt);
|
|
+ impl_from_array!(
|
|
+ [$elem_ty; $elem_n]: $tuple_id | $test_tt
|
|
+ | (crate::$elem_ty::new(true), true)
|
|
+ );
|
|
+ impl_from_vectors!(
|
|
+ [$elem_ty; $elem_n]: $tuple_id | $test_tt | $($from_vec_ty),*
|
|
+ );
|
|
+ impl_default!([bool; $elem_n]: $tuple_id | $test_tt);
|
|
+ impl_cmp_partial_eq!(
|
|
+ [$elem_ty; $elem_n]: $tuple_id | $test_tt | (true, false)
|
|
+ );
|
|
+ impl_cmp_eq!(
|
|
+ [$elem_ty; $elem_n]: $tuple_id | $test_tt | (true, false)
|
|
+ );
|
|
+ impl_cmp_vertical!(
|
|
+ [$elem_ty; $elem_n]: $tuple_id, $tuple_id, true, (true, false)
|
|
+ | $test_tt
|
|
+ );
|
|
+ impl_select!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
|
+ impl_cmp_partial_ord!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
|
+ impl_cmp_ord!(
|
|
+ [$elem_ty; $elem_n]: $tuple_id | $test_tt | (false, true)
|
|
+ );
|
|
+ impl_shuffle1_dyn!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
|
+
|
|
+ test_cmp_partial_ord_mask!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
|
+ test_shuffle1_dyn_mask!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
|
|
+ }
|
|
+}
|
|
+
|
|
+macro_rules! impl_const_p {
|
|
+ ([$elem_ty:ty; $elem_n:expr]: $tuple_id:ident, $mask_ty:ident,
|
|
+ $usize_ty:ident, $isize_ty:ident
|
|
+ | $test_tt:tt | $($elem_ids:ident),*
|
|
+ | From: $($from_vec_ty:ident),* | $(#[$doc:meta])*) => {
|
|
+ impl_minimal_p!(
|
|
+ [$elem_ty; $elem_n]: $tuple_id, $mask_ty, $usize_ty, $isize_ty
|
|
+ | ref_ | $test_tt | $($elem_ids),*
|
|
+ | (1 as $elem_ty, 0 as $elem_ty) | $(#[$doc])*
|
|
+ );
|
|
+ impl_ptr_read!([$elem_ty; $elem_n]: $tuple_id, $mask_ty | $test_tt);
|
|
+ }
|
|
+}
|
|
+
|
|
+macro_rules! impl_mut_p {
|
|
+ ([$elem_ty:ty; $elem_n:expr]: $tuple_id:ident, $mask_ty:ident,
|
|
+ $usize_ty:ident, $isize_ty:ident
|
|
+ | $test_tt:tt | $($elem_ids:ident),*
|
|
+ | From: $($from_vec_ty:ident),* | $(#[$doc:meta])*) => {
|
|
+ impl_minimal_p!(
|
|
+ [$elem_ty; $elem_n]: $tuple_id, $mask_ty, $usize_ty, $isize_ty
|
|
+ | ref_mut_ | $test_tt | $($elem_ids),*
|
|
+ | (1 as $elem_ty, 0 as $elem_ty) | $(#[$doc])*
|
|
+ );
|
|
+ impl_ptr_read!([$elem_ty; $elem_n]: $tuple_id, $mask_ty | $test_tt);
|
|
+ impl_ptr_write!([$elem_ty; $elem_n]: $tuple_id, $mask_ty | $test_tt);
|
|
+ }
|
|
+}
|
|
diff --git a/third_party/rust/packed_simd/src/api/bit_manip.rs b/third_party/rust/packed_simd/src/api/bit_manip.rs
|
|
new file mode 100644
|
|
index 000000000000..3d3c4eb8850a
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/api/bit_manip.rs
|
|
@@ -0,0 +1,128 @@
|
|
+//! Bit manipulations.
|
|
+
|
|
+macro_rules! impl_bit_manip {
|
|
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
|
|
+ impl $id {
|
|
+ /// Returns the number of ones in the binary representation of
|
|
+ /// the lanes of `self`.
|
|
+ #[inline]
|
|
+ pub fn count_ones(self) -> Self {
|
|
+ super::codegen::bit_manip::BitManip::ctpop(self)
|
|
+ }
|
|
+
|
|
+ /// Returns the number of zeros in the binary representation of
|
|
+ /// the lanes of `self`.
|
|
+ #[inline]
|
|
+ pub fn count_zeros(self) -> Self {
|
|
+ super::codegen::bit_manip::BitManip::ctpop(!self)
|
|
+ }
|
|
+
|
|
+ /// Returns the number of leading zeros in the binary
|
|
+ /// representation of the lanes of `self`.
|
|
+ #[inline]
|
|
+ pub fn leading_zeros(self) -> Self {
|
|
+ super::codegen::bit_manip::BitManip::ctlz(self)
|
|
+ }
|
|
+
|
|
+ /// Returns the number of trailing zeros in the binary
|
|
+ /// representation of the lanes of `self`.
|
|
+ #[inline]
|
|
+ pub fn trailing_zeros(self) -> Self {
|
|
+ super::codegen::bit_manip::BitManip::cttz(self)
|
|
+ }
|
|
+ }
|
|
+
|
|
+ test_if! {
|
|
+ $test_tt:
|
|
+ paste::item_with_macros! {
|
|
+ #[allow(overflowing_literals)]
|
|
+ pub mod [<$id _bit_manip>] {
|
|
+ use super::*;
|
|
+
|
|
+ const LANE_WIDTH: usize = mem::size_of::<$elem_ty>() * 8;
|
|
+
|
|
+ macro_rules! test_func {
|
|
+ ($x:expr, $func:ident) => {{
|
|
+ let mut actual = $x;
|
|
+ for i in 0..$id::lanes() {
|
|
+ actual = actual.replace(
|
|
+ i,
|
|
+ $x.extract(i).$func() as $elem_ty
|
|
+ );
|
|
+ }
|
|
+ let expected = $x.$func();
|
|
+ assert_eq!(actual, expected);
|
|
+ }};
|
|
+ }
|
|
+
|
|
+ const BYTES: [u8; 64] = [
|
|
+ 0, 1, 2, 3, 4, 5, 6, 7,
|
|
+ 8, 9, 10, 11, 12, 13, 14, 15,
|
|
+ 16, 17, 18, 19, 20, 21, 22, 23,
|
|
+ 24, 25, 26, 27, 28, 29, 30, 31,
|
|
+ 32, 33, 34, 35, 36, 37, 38, 39,
|
|
+ 40, 41, 42, 43, 44, 45, 46, 47,
|
|
+ 48, 49, 50, 51, 52, 53, 54, 55,
|
|
+ 56, 57, 58, 59, 60, 61, 62, 63,
|
|
+ ];
|
|
+
|
|
+ fn load_bytes() -> $id {
|
|
+ let elems: &mut [$elem_ty] = unsafe {
|
|
+ slice::from_raw_parts_mut(
|
|
+ BYTES.as_mut_ptr() as *mut $elem_ty,
|
|
+ $id::lanes(),
|
|
+ )
|
|
+ };
|
|
+ $id::from_slice_unaligned(elems)
|
|
+ }
|
|
+
|
|
+ #[cfg_attr(not(target_arch = "wasm32"), test)]
|
|
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ fn count_ones() {
|
|
+ test_func!($id::splat(0), count_ones);
|
|
+ test_func!($id::splat(!0), count_ones);
|
|
+ test_func!(load_bytes(), count_ones);
|
|
+ }
|
|
+
|
|
+ #[cfg_attr(not(target_arch = "wasm32"), test)]
|
|
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ fn count_zeros() {
|
|
+ test_func!($id::splat(0), count_zeros);
|
|
+ test_func!($id::splat(!0), count_zeros);
|
|
+ test_func!(load_bytes(), count_zeros);
|
|
+ }
|
|
+
|
|
+ #[cfg_attr(not(target_arch = "wasm32"), test)]
|
|
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ fn leading_zeros() {
|
|
+ test_func!($id::splat(0), leading_zeros);
|
|
+ test_func!($id::splat(1), leading_zeros);
|
|
+ // some implementations use `pshufb` which has unique
|
|
+ // behavior when the 8th bit is set.
|
|
+ test_func!($id::splat(0b1000_0010), leading_zeros);
|
|
+ test_func!($id::splat(!0), leading_zeros);
|
|
+ test_func!(
|
|
+ $id::splat(1 << (LANE_WIDTH - 1)),
|
|
+ leading_zeros
|
|
+ );
|
|
+ test_func!(load_bytes(), leading_zeros);
|
|
+ }
|
|
+
|
|
+ #[cfg_attr(not(target_arch = "wasm32"), test)]
|
|
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ fn trailing_zeros() {
|
|
+ test_func!($id::splat(0), trailing_zeros);
|
|
+ test_func!($id::splat(1), trailing_zeros);
|
|
+ test_func!($id::splat(0b1000_0010), trailing_zeros);
|
|
+ test_func!($id::splat(!0), trailing_zeros);
|
|
+ test_func!(
|
|
+ $id::splat(1 << (LANE_WIDTH - 1)),
|
|
+ trailing_zeros
|
|
+ );
|
|
+ test_func!(load_bytes(), trailing_zeros);
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+}
|
|
diff --git a/third_party/rust/packed_simd/src/api/cast.rs b/third_party/rust/packed_simd/src/api/cast.rs
|
|
new file mode 100644
|
|
index 000000000000..f1c32ca1a38b
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/api/cast.rs
|
|
@@ -0,0 +1,108 @@
|
|
+//! Implementation of `FromCast` and `IntoCast`.
|
|
+#![allow(clippy::module_name_repetitions)]
|
|
+
|
|
+/// Numeric cast from `T` to `Self`.
|
|
+///
|
|
+/// > Note: This is a temporary workaround until the conversion traits
|
|
+/// specified > in [RFC2484] are implemented.
|
|
+///
|
|
+/// Numeric cast between vectors with the same number of lanes, such that:
|
|
+///
|
|
+/// * casting integer vectors whose lane types have the same size (e.g. `i32xN`
|
|
+/// -> `u32xN`) is a **no-op**,
|
|
+///
|
|
+/// * casting from a larger integer to a smaller integer (e.g. `u32xN` ->
|
|
+/// `u8xN`) will **truncate**,
|
|
+///
|
|
+/// * casting from a smaller integer to a larger integer (e.g. `u8xN` ->
|
|
+/// `u32xN`) will:
|
|
+/// * **zero-extend** if the source is unsigned, or
|
|
+/// * **sign-extend** if the source is signed,
|
|
+///
|
|
+/// * casting from a float to an integer will **round the float towards zero**,
|
|
+///
|
|
+/// * casting from an integer to float will produce the floating point
|
|
+/// representation of the integer, **rounding to nearest, ties to even**,
|
|
+///
|
|
+/// * casting from an `f32` to an `f64` is perfect and lossless,
|
|
+///
|
|
+/// * casting from an `f64` to an `f32` **rounds to nearest, ties to even**.
|
|
+///
|
|
+/// [RFC2484]: https://github.com/rust-lang/rfcs/pull/2484
|
|
+pub trait FromCast<T>: crate::marker::Sized {
|
|
+ /// Numeric cast from `T` to `Self`.
|
|
+ fn from_cast(_: T) -> Self;
|
|
+}
|
|
+
|
|
+/// Numeric cast from `Self` to `T`.
|
|
+///
|
|
+/// > Note: This is a temporary workaround until the conversion traits
|
|
+/// specified > in [RFC2484] are implemented.
|
|
+///
|
|
+/// Numeric cast between vectors with the same number of lanes, such that:
|
|
+///
|
|
+/// * casting integer vectors whose lane types have the same size (e.g. `i32xN`
|
|
+/// -> `u32xN`) is a **no-op**,
|
|
+///
|
|
+/// * casting from a larger integer to a smaller integer (e.g. `u32xN` ->
|
|
+/// `u8xN`) will **truncate**,
|
|
+///
|
|
+/// * casting from a smaller integer to a larger integer (e.g. `u8xN` ->
|
|
+/// `u32xN`) will:
|
|
+/// * **zero-extend** if the source is unsigned, or
|
|
+/// * **sign-extend** if the source is signed,
|
|
+///
|
|
+/// * casting from a float to an integer will **round the float towards zero**,
|
|
+///
|
|
+/// * casting from an integer to float will produce the floating point
|
|
+/// representation of the integer, **rounding to nearest, ties to even**,
|
|
+///
|
|
+/// * casting from an `f32` to an `f64` is perfect and lossless,
|
|
+///
|
|
+/// * casting from an `f64` to an `f32` **rounds to nearest, ties to even**.
|
|
+///
|
|
+/// [RFC2484]: https://github.com/rust-lang/rfcs/pull/2484
|
|
+pub trait Cast<T>: crate::marker::Sized {
|
|
+ /// Numeric cast from `self` to `T`.
|
|
+ fn cast(self) -> T;
|
|
+}
|
|
+
|
|
+/// `FromCast` implies `Cast`.
|
|
+impl<T, U> Cast<U> for T
|
|
+where
|
|
+ U: FromCast<T>,
|
|
+{
|
|
+ #[inline]
|
|
+ fn cast(self) -> U {
|
|
+ U::from_cast(self)
|
|
+ }
|
|
+}
|
|
+
|
|
+/// `FromCast` and `Cast` are reflexive
|
|
+impl<T> FromCast<T> for T {
|
|
+ #[inline]
|
|
+ fn from_cast(t: Self) -> Self {
|
|
+ t
|
|
+ }
|
|
+}
|
|
+
|
|
+#[macro_use]
|
|
+mod macros;
|
|
+
|
|
+mod v16;
|
|
+pub use self::v16::*;
|
|
+
|
|
+mod v32;
|
|
+pub use self::v32::*;
|
|
+
|
|
+mod v64;
|
|
+pub use self::v64::*;
|
|
+
|
|
+mod v128;
|
|
+pub use self::v128::*;
|
|
+
|
|
+mod v256;
|
|
+pub use self::v256::*;
|
|
+
|
|
+mod v512;
|
|
+pub use self::v512::*;
|
|
diff --git a/third_party/rust/packed_simd/src/api/cast/macros.rs b/third_party/rust/packed_simd/src/api/cast/macros.rs
|
|
new file mode 100644
|
|
index 000000000000..3bb29f0b80b7
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/api/cast/macros.rs
|
|
@@ -0,0 +1,82 @@
|
|
+//! Macros implementing `FromCast`
|
|
+
|
|
+macro_rules! impl_from_cast_ {
|
|
+ ($id:ident[$test_tt:tt]: $from_ty:ident) => {
|
|
+ impl crate::api::cast::FromCast<$from_ty> for $id {
|
|
+ #[inline]
|
|
+ fn from_cast(x: $from_ty) -> Self {
|
|
+ use crate::llvm::simd_cast;
|
|
+ debug_assert_eq!($from_ty::lanes(), $id::lanes());
|
|
+ Simd(unsafe { simd_cast(x.0) })
|
|
+ }
|
|
+ }
|
|
+
|
|
+ test_if!{
|
|
+ $test_tt:
|
|
+ paste::item! {
|
|
+ pub mod [<$id _from_cast_ $from_ty>] {
|
|
+ use super::*;
|
|
+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ fn test() {
|
|
+ assert_eq!($id::lanes(), $from_ty::lanes());
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+}
|
|
+
|
|
+macro_rules! impl_from_cast {
|
|
+ ($id:ident[$test_tt:tt]: $($from_ty:ident),*) => {
|
|
+ $(
|
|
+ impl_from_cast_!($id[$test_tt]: $from_ty);
|
|
+ )*
|
|
+ }
|
|
+}
|
|
+
|
|
+macro_rules! impl_from_cast_mask_ {
|
|
+ ($id:ident[$test_tt:tt]: $from_ty:ident) => {
|
|
+ impl crate::api::cast::FromCast<$from_ty> for $id {
|
|
+ #[inline]
|
|
+ fn from_cast(x: $from_ty) -> Self {
|
|
+ debug_assert_eq!($from_ty::lanes(), $id::lanes());
|
|
+ x.ne($from_ty::default())
|
|
+ .select($id::splat(true), $id::splat(false))
|
|
+ }
|
|
+ }
|
|
+
|
|
+ test_if!{
|
|
+ $test_tt:
|
|
+ paste::item! {
|
|
+ pub mod [<$id _from_cast_ $from_ty>] {
|
|
+ use super::*;
|
|
+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ fn test() {
|
|
+ assert_eq!($id::lanes(), $from_ty::lanes());
|
|
+
|
|
+ let x = $from_ty::default();
|
|
+ let m: $id = x.cast();
|
|
+ assert!(m.none());
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+}
|
|
+
|
|
+macro_rules! impl_from_cast_mask {
|
|
+ ($id:ident[$test_tt:tt]: $($from_ty:ident),*) => {
|
|
+ $(
|
|
+ impl_from_cast_mask_!($id[$test_tt]: $from_ty);
|
|
+ )*
|
|
+ }
|
|
+}
|
|
+
|
|
+#[allow(unused)]
|
|
+macro_rules! impl_into_cast {
|
|
+ ($id:ident[$test_tt:tt]: $($from_ty:ident),*) => {
|
|
+ $(
|
|
+ impl_from_cast_!($from_ty[$test_tt]: $id);
|
|
+ )*
|
|
+ }
|
|
+}
|
|
diff --git a/third_party/rust/packed_simd/src/api/cast/v128.rs b/third_party/rust/packed_simd/src/api/cast/v128.rs
|
|
new file mode 100644
|
|
index 000000000000..78c07f3a5597
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/api/cast/v128.rs
|
|
@@ -0,0 +1,79 @@
|
|
+//! `FromCast` and `IntoCast` implementations for portable 128-bit wide vectors
|
|
+#![rustfmt::skip]
|
|
+
|
|
+use crate::*;
|
|
+
|
|
+impl_from_cast!(
|
|
+ i8x16[test_v128]: u8x16, m8x16, i16x16, u16x16, m16x16, i32x16, u32x16, f32x16, m32x16
|
|
+);
|
|
+impl_from_cast!(
|
|
+ u8x16[test_v128]: i8x16, m8x16, i16x16, u16x16, m16x16, i32x16, u32x16, f32x16, m32x16
|
|
+);
|
|
+impl_from_cast_mask!(
|
|
+ m8x16[test_v128]: i8x16, u8x16, i16x16, u16x16, m16x16, i32x16, u32x16, f32x16, m32x16
|
|
+);
|
|
+
|
|
+impl_from_cast!(
|
|
+ i16x8[test_v128]: i8x8, u8x8, m8x8, u16x8, m16x8, i32x8, u32x8, f32x8, m32x8,
|
|
+ i64x8, u64x8, f64x8, m64x8, isizex8, usizex8, msizex8
|
|
+);
|
|
+impl_from_cast!(
|
|
+ u16x8[test_v128]: i8x8, u8x8, m8x8, i16x8, m16x8, i32x8, u32x8, f32x8, m32x8,
|
|
+ i64x8, u64x8, f64x8, m64x8, isizex8, usizex8, msizex8
|
|
+);
|
|
+impl_from_cast_mask!(
|
|
+ m16x8[test_v128]: i8x8, u8x8, m8x8, i16x8, u16x8, i32x8, u32x8, f32x8, m32x8,
|
|
+ i64x8, u64x8, f64x8, m64x8, isizex8, usizex8, msizex8
|
|
+);
|
|
+
|
|
+impl_from_cast!(
|
|
+ i32x4[test_v128]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, u32x4, f32x4, m32x4,
|
|
+ i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4
|
|
+);
|
|
+impl_from_cast!(
|
|
+ u32x4[test_v128]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, f32x4, m32x4,
|
|
+ i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4
|
|
+);
|
|
+impl_from_cast!(
|
|
+ f32x4[test_v128]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, m32x4,
|
|
+ i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4
|
|
+);
|
|
+impl_from_cast_mask!(
|
|
+ m32x4[test_v128]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4,
|
|
+ i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4
|
|
+);
|
|
+
|
|
+impl_from_cast!(
|
|
+ i64x2[test_v128]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2,
|
|
+ u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2
|
|
+);
|
|
+impl_from_cast!(
|
|
+ u64x2[test_v128]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2,
|
|
+ i64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2
|
|
+);
|
|
+impl_from_cast!(
|
|
+ f64x2[test_v128]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2,
|
|
+ i64x2, u64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2
|
|
+);
|
|
+impl_from_cast_mask!(
|
|
+ m64x2[test_v128]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2,
|
|
+ i64x2, u64x2, f64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2
|
|
+);
|
|
+
|
|
+impl_from_cast!(
|
|
+ isizex2[test_v128]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2,
|
|
+ i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, usizex2, msizex2
|
|
+);
|
|
+impl_from_cast!(
|
|
+ usizex2[test_v128]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2,
|
|
+ i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, msizex2
|
|
+);
|
|
+impl_from_cast_mask!(
|
|
+ msizex2[test_v128]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2,
|
|
+ i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2
|
|
+);
|
|
+
|
|
+// FIXME[test_v128]: 64-bit single element vectors into_cast impls
|
|
+impl_from_cast!(i128x1[test_v128]: u128x1, m128x1);
|
|
+impl_from_cast!(u128x1[test_v128]: i128x1, m128x1);
|
|
+impl_from_cast!(m128x1[test_v128]: i128x1, u128x1);
|
|
diff --git a/third_party/rust/packed_simd/src/api/cast/v16.rs b/third_party/rust/packed_simd/src/api/cast/v16.rs
|
|
new file mode 100644
|
|
index 000000000000..d292936baa41
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/api/cast/v16.rs
|
|
@@ -0,0 +1,17 @@
|
|
+//! `FromCast` and `IntoCast` implementations for portable 16-bit wide vectors
|
|
+#![rustfmt::skip]
|
|
+
|
|
+use crate::*;
|
|
+
|
|
+impl_from_cast!(
|
|
+ i8x2[test_v16]: u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2,
|
|
+ i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2
|
|
+);
|
|
+impl_from_cast!(
|
|
+ u8x2[test_v16]: i8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2,
|
|
+ i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2
|
|
+);
|
|
+impl_from_cast_mask!(
|
|
+ m8x2[test_v16]: i8x2, u8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2,
|
|
+ i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2
|
|
+);
|
|
diff --git a/third_party/rust/packed_simd/src/api/cast/v256.rs b/third_party/rust/packed_simd/src/api/cast/v256.rs
|
|
new file mode 100644
|
|
index 000000000000..0a669e0beebe
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/api/cast/v256.rs
|
|
@@ -0,0 +1,81 @@
|
|
+//! `FromCast` and `IntoCast` implementations for portable 256-bit wide vectors
|
|
+#![rustfmt::skip]
|
|
+
|
|
+use crate::*;
|
|
+
|
|
+impl_from_cast!(i8x32[test_v256]: u8x32, m8x32, i16x32, u16x32, m16x32);
|
|
+impl_from_cast!(u8x32[test_v256]: i8x32, m8x32, i16x32, u16x32, m16x32);
|
|
+impl_from_cast_mask!(m8x32[test_v256]: i8x32, u8x32, i16x32, u16x32, m16x32);
|
|
+
|
|
+impl_from_cast!(
|
|
+ i16x16[test_v256]: i8x16, u8x16, m8x16, u16x16, m16x16,
|
|
+ i32x16, u32x16, f32x16, m32x16
|
|
+);
|
|
+impl_from_cast!(
|
|
+ u16x16[test_v256]: i8x16, u8x16, m8x16, i16x16, m16x16,
|
|
+ i32x16, u32x16, f32x16, m32x16
|
|
+);
|
|
+impl_from_cast_mask!(
|
|
+ m16x16[test_v256]: i8x16, u8x16, m8x16, i16x16, u16x16,
|
|
+ i32x16, u32x16, f32x16, m32x16
|
|
+);
|
|
+
|
|
+impl_from_cast!(
|
|
+ i32x8[test_v256]: i8x8, u8x8, m8x8, i16x8, u16x8, m16x8, u32x8, f32x8, m32x8,
|
|
+ i64x8, u64x8, f64x8, m64x8, isizex8, usizex8, msizex8
|
|
+);
|
|
+impl_from_cast!(
|
|
+ u32x8[test_v256]: i8x8, u8x8, m8x8, i16x8, u16x8, m16x8, i32x8, f32x8, m32x8,
|
|
+ i64x8, u64x8, f64x8, m64x8, isizex8, usizex8, msizex8
|
|
+);
|
|
+impl_from_cast!(
|
|
+ f32x8[test_v256]: i8x8, u8x8, m8x8, i16x8, u16x8, m16x8, i32x8, u32x8, m32x8,
|
|
+ i64x8, u64x8, f64x8, m64x8, isizex8, usizex8, msizex8
|
|
+);
|
|
+impl_from_cast_mask!(
|
|
+ m32x8[test_v256]: i8x8, u8x8, m8x8, i16x8, u16x8, m16x8, i32x8, u32x8, f32x8,
|
|
+ i64x8, u64x8, f64x8, m64x8, isizex8, usizex8, msizex8
|
|
+);
|
|
+
|
|
+impl_from_cast!(
|
|
+ i64x4[test_v256]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4,
|
|
+ u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4
|
|
+);
|
|
+impl_from_cast!(
|
|
+ u64x4[test_v256]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4,
|
|
+ i64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4
|
|
+);
|
|
+impl_from_cast!(
|
|
+ f64x4[test_v256]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4,
|
|
+ i64x4, u64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4
|
|
+);
|
|
+impl_from_cast_mask!(
|
|
+ m64x4[test_v256]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4,
|
|
+ i64x4, u64x4, f64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4
|
|
+);
|
|
+
|
|
+impl_from_cast!(
|
|
+ i128x2[test_v256]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2,
|
|
+ i64x2, u64x2, f64x2, m64x2, u128x2, m128x2, isizex2, usizex2, msizex2
|
|
+);
|
|
+impl_from_cast!(
|
|
+ u128x2[test_v256]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2,
|
|
+ i64x2, u64x2, f64x2, m64x2, i128x2, m128x2, isizex2, usizex2, msizex2
|
|
+);
|
|
+impl_from_cast_mask!(
|
|
+ m128x2[test_v256]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2,
|
|
+ i64x2, u64x2, m64x2, f64x2, i128x2, u128x2, isizex2, usizex2, msizex2
|
|
+);
|
|
+
|
|
+impl_from_cast!(
|
|
+ isizex4[test_v256]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4,
|
|
+ i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, usizex4, msizex4
|
|
+);
|
|
+impl_from_cast!(
|
|
+ usizex4[test_v256]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4,
|
|
+ i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, msizex4
|
|
+);
|
|
+impl_from_cast_mask!(
|
|
+ msizex4[test_v256]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4,
|
|
+ i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4
|
|
+);
|
|
diff --git a/third_party/rust/packed_simd/src/api/cast/v32.rs b/third_party/rust/packed_simd/src/api/cast/v32.rs
|
|
new file mode 100644
|
|
index 000000000000..65050cdacb4e
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/api/cast/v32.rs
|
|
@@ -0,0 +1,30 @@
|
|
+//! `FromCast` and `IntoCast` implementations for portable 32-bit wide vectors
|
|
+#![rustfmt::skip]
|
|
+
|
|
+use crate::*;
|
|
+
|
|
+impl_from_cast!(
|
|
+ i8x4[test_v32]: u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4,
|
|
+ i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4
|
|
+);
|
|
+impl_from_cast!(
|
|
+ u8x4[test_v32]: i8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4,
|
|
+ i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4
|
|
+);
|
|
+impl_from_cast_mask!(
|
|
+ m8x4[test_v32]: i8x4, u8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4,
|
|
+ i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4
|
|
+);
|
|
+
|
|
+impl_from_cast!(
|
|
+ i16x2[test_v32]: i8x2, u8x2, m8x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2,
|
|
+ i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2
|
|
+);
|
|
+impl_from_cast!(
|
|
+ u16x2[test_v32]: i8x2, u8x2, m8x2, i16x2, m16x2, i32x2, u32x2, f32x2, m32x2,
|
|
+ i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2
|
|
+);
|
|
+impl_from_cast_mask!(
|
|
+ m16x2[test_v32]: i8x2, u8x2, m8x2, i16x2, u16x2, i32x2, u32x2, f32x2, m32x2,
|
|
+ i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2
|
|
+);
|
|
diff --git a/third_party/rust/packed_simd/src/api/cast/v512.rs b/third_party/rust/packed_simd/src/api/cast/v512.rs
|
|
new file mode 100644
|
|
index 000000000000..9ae1caed35e2
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/api/cast/v512.rs
|
|
@@ -0,0 +1,68 @@
|
|
+//! `FromCast` and `IntoCast` implementations for portable 512-bit wide vectors
|
|
+#![rustfmt::skip]
|
|
+
|
|
+use crate::*;
|
|
+
|
|
+impl_from_cast!(i8x64[test_v512]: u8x64, m8x64);
|
|
+impl_from_cast!(u8x64[test_v512]: i8x64, m8x64);
|
|
+impl_from_cast_mask!(m8x64[test_v512]: i8x64, u8x64);
|
|
+
|
|
+impl_from_cast!(i16x32[test_v512]: i8x32, u8x32, m8x32, u16x32, m16x32);
|
|
+impl_from_cast!(u16x32[test_v512]: i8x32, u8x32, m8x32, i16x32, m16x32);
|
|
+impl_from_cast_mask!(m16x32[test_v512]: i8x32, u8x32, m8x32, i16x32, u16x32);
|
|
+
|
|
+impl_from_cast!(
|
|
+ i32x16[test_v512]: i8x16, u8x16, m8x16, i16x16, u16x16, m16x16, u32x16, f32x16, m32x16
|
|
+);
|
|
+impl_from_cast!(
|
|
+ u32x16[test_v512]: i8x16, u8x16, m8x16, i16x16, u16x16, m16x16, i32x16, f32x16, m32x16
|
|
+);
|
|
+impl_from_cast!(
|
|
+ f32x16[test_v512]: i8x16, u8x16, m8x16, i16x16, u16x16, m16x16, i32x16, u32x16, m32x16
|
|
+);
|
|
+impl_from_cast_mask!(
|
|
+ m32x16[test_v512]: i8x16, u8x16, m8x16, i16x16, u16x16, m16x16, i32x16, u32x16, f32x16
|
|
+);
|
|
+
|
|
+impl_from_cast!(
|
|
+ i64x8[test_v512]: i8x8, u8x8, m8x8, i16x8, u16x8, m16x8, i32x8, u32x8, f32x8, m32x8,
|
|
+ u64x8, f64x8, m64x8, isizex8, usizex8, msizex8
|
|
+);
|
|
+impl_from_cast!(
|
|
+ u64x8[test_v512]: i8x8, u8x8, m8x8, i16x8, u16x8, m16x8, i32x8, u32x8, f32x8, m32x8,
|
|
+ i64x8, f64x8, m64x8, isizex8, usizex8, msizex8
|
|
+);
|
|
+impl_from_cast!(
|
|
+ f64x8[test_v512]: i8x8, u8x8, m8x8, i16x8, u16x8, m16x8, i32x8, u32x8, f32x8, m32x8,
|
|
+ i64x8, u64x8, m64x8, isizex8, usizex8, msizex8
|
|
+);
|
|
+impl_from_cast_mask!(
|
|
+ m64x8[test_v512]: i8x8, u8x8, m8x8, i16x8, u16x8, m16x8, i32x8, u32x8, f32x8, m32x8,
|
|
+ i64x8, u64x8, f64x8, isizex8, usizex8, msizex8
|
|
+);
|
|
+
|
|
+impl_from_cast!(
|
|
+ i128x4[test_v512]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4,
|
|
+ i64x4, u64x4, f64x4, m64x4, u128x4, m128x4, isizex4, usizex4, msizex4
|
|
+);
|
|
+impl_from_cast!(
|
|
+ u128x4[test_v512]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4,
|
|
+ i64x4, u64x4, f64x4, m64x4, i128x4, m128x4, isizex4, usizex4, msizex4
|
|
+);
|
|
+impl_from_cast_mask!(
|
|
+ m128x4[test_v512]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4,
|
|
+ i64x4, u64x4, m64x4, f64x4, i128x4, u128x4, isizex4, usizex4, msizex4
|
|
+);
|
|
+
|
|
+impl_from_cast!(
|
|
+ isizex8[test_v512]: i8x8, u8x8, m8x8, i16x8, u16x8, m16x8, i32x8, u32x8, f32x8, m32x8,
|
|
+ i64x8, u64x8, f64x8, m64x8, usizex8, msizex8
|
|
+);
|
|
+impl_from_cast!(
|
|
+ usizex8[test_v512]: i8x8, u8x8, m8x8, i16x8, u16x8, m16x8, i32x8, u32x8, f32x8, m32x8,
|
|
+ i64x8, u64x8, f64x8, m64x8, isizex8, msizex8
|
|
+);
|
|
+impl_from_cast_mask!(
|
|
+ msizex8[test_v512]: i8x8, u8x8, m8x8, i16x8, u16x8, m16x8, i32x8, u32x8, f32x8, m32x8,
|
|
+ i64x8, u64x8, f64x8, m64x8, isizex8, usizex8
|
|
+);
|
|
diff --git a/third_party/rust/packed_simd/src/api/cast/v64.rs b/third_party/rust/packed_simd/src/api/cast/v64.rs
|
|
new file mode 100644
|
|
index 000000000000..0e2f78f7335b
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/api/cast/v64.rs
|
|
@@ -0,0 +1,47 @@
|
|
+//! `FromCast` and `IntoCast` implementations for portable 64-bit wide vectors
|
|
+#![rustfmt::skip]
|
|
+
|
|
+use crate::*;
|
|
+
|
|
+impl_from_cast!(
|
|
+ i8x8[test_v64]: u8x8, m8x8, i16x8, u16x8, m16x8, i32x8, u32x8, f32x8, m32x8,
|
|
+ i64x8, u64x8, f64x8, m64x8, isizex8, usizex8, msizex8
|
|
+);
|
|
+impl_from_cast!(
|
|
+ u8x8[test_v64]: i8x8, m8x8, i16x8, u16x8, m16x8, i32x8, u32x8, f32x8, m32x8,
|
|
+ i64x8, u64x8, f64x8, m64x8, isizex8, usizex8, msizex8
|
|
+);
|
|
+impl_from_cast_mask!(
|
|
+ m8x8[test_v64]: i8x8, u8x8, i16x8, u16x8, m16x8, i32x8, u32x8, f32x8, m32x8,
|
|
+ i64x8, u64x8, f64x8, m64x8, isizex8, usizex8, msizex8
|
|
+);
|
|
+
|
|
+impl_from_cast!(
|
|
+ i16x4[test_v64]: i8x4, u8x4, m8x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4,
|
|
+ i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4
|
|
+);
|
|
+impl_from_cast!(
|
|
+ u16x4[test_v64]: i8x4, u8x4, m8x4, i16x4, m16x4, i32x4, u32x4, f32x4, m32x4,
|
|
+ i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4
|
|
+);
|
|
+impl_from_cast_mask!(
|
|
+ m16x4[test_v64]: i8x4, u8x4, m8x4, i16x4, u16x4, i32x4, u32x4, f32x4, m32x4,
|
|
+ i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4
|
|
+);
|
|
+
|
|
+impl_from_cast!(
|
|
+ i32x2[test_v64]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, u32x2, f32x2, m32x2,
|
|
+ i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2
|
|
+);
|
|
+impl_from_cast!(
|
|
+ u32x2[test_v64]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, f32x2, m32x2,
|
|
+ i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2
|
|
+);
|
|
+impl_from_cast!(
|
|
+ f32x2[test_v64]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, m32x2,
|
|
+ i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2
|
|
+);
|
|
+impl_from_cast_mask!(
|
|
+ m32x2[test_v64]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2,
|
|
+ i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2
|
|
+);
|
|
diff --git a/third_party/rust/packed_simd/src/api/cmp.rs b/third_party/rust/packed_simd/src/api/cmp.rs
|
|
new file mode 100644
|
|
index 000000000000..6d5301ddddbd
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/api/cmp.rs
|
|
@@ -0,0 +1,16 @@
|
|
+//! Implement cmp traits for vector types
|
|
+
|
|
+#[macro_use]
|
|
+mod partial_eq;
|
|
+
|
|
+#[macro_use]
|
|
+mod eq;
|
|
+
|
|
+#[macro_use]
|
|
+mod partial_ord;
|
|
+
|
|
+#[macro_use]
|
|
+mod ord;
|
|
+
|
|
+#[macro_use]
|
|
+mod vertical;
|
|
diff --git a/third_party/rust/packed_simd/src/api/cmp/eq.rs b/third_party/rust/packed_simd/src/api/cmp/eq.rs
|
|
new file mode 100644
|
|
index 000000000000..3c55d0dce57e
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/api/cmp/eq.rs
|
|
@@ -0,0 +1,27 @@
|
|
+//! Implements `Eq` for vector types.
|
|
+
|
|
+macro_rules! impl_cmp_eq {
|
|
+ (
|
|
+ [$elem_ty:ident; $elem_count:expr]:
|
|
+ $id:ident | $test_tt:tt |
|
|
+ ($true:expr, $false:expr)
|
|
+ ) => {
|
|
+ impl crate::cmp::Eq for $id {}
|
|
+ impl crate::cmp::Eq for LexicographicallyOrdered<$id> {}
|
|
+
|
|
+ test_if!{
|
|
+ $test_tt:
|
|
+ paste::item! {
|
|
+ pub mod [<$id _cmp_eq>] {
|
|
+ use super::*;
|
|
+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ fn eq() {
|
|
+ fn foo<E: crate::cmp::Eq>(_: E) {}
|
|
+ let a = $id::splat($false);
|
|
+ foo(a);
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+}
|
|
diff --git a/third_party/rust/packed_simd/src/api/cmp/ord.rs b/third_party/rust/packed_simd/src/api/cmp/ord.rs
|
|
new file mode 100644
|
|
index 000000000000..e54ba3bfde9a
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/api/cmp/ord.rs
|
|
@@ -0,0 +1,43 @@
|
|
+//! Implements `Ord` for vector types.
|
|
+
|
|
+macro_rules! impl_cmp_ord {
|
|
+ (
|
|
+ [$elem_ty:ident; $elem_count:expr]:
|
|
+ $id:ident | $test_tt:tt |
|
|
+ ($true:expr, $false:expr)
|
|
+ ) => {
|
|
+ impl $id {
|
|
+ /// Returns a wrapper that implements `Ord`.
|
|
+ #[inline]
|
|
+ pub fn lex_ord(&self) -> LexicographicallyOrdered<$id> {
|
|
+ LexicographicallyOrdered(*self)
|
|
+ }
|
|
+ }
|
|
+
|
|
+ impl crate::cmp::Ord for LexicographicallyOrdered<$id> {
|
|
+ #[inline]
|
|
+ fn cmp(&self, other: &Self) -> crate::cmp::Ordering {
|
|
+ match self.partial_cmp(other) {
|
|
+ Some(x) => x,
|
|
+ None => unsafe { crate::hint::unreachable_unchecked() },
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ test_if!{
|
|
+ $test_tt:
|
|
+ paste::item! {
|
|
+ pub mod [<$id _cmp_ord>] {
|
|
+ use super::*;
|
|
+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ fn eq() {
|
|
+ fn foo<E: crate::cmp::Ord>(_: E) {}
|
|
+ let a = $id::splat($false);
|
|
+ foo(a.partial_lex_ord());
|
|
+ foo(a.lex_ord());
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+}
|
|
diff --git a/third_party/rust/packed_simd/src/api/cmp/partial_eq.rs b/third_party/rust/packed_simd/src/api/cmp/partial_eq.rs
|
|
new file mode 100644
|
|
index 000000000000..1712a0de56cb
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/api/cmp/partial_eq.rs
|
|
@@ -0,0 +1,67 @@
|
|
+//! Implements `PartialEq` for vector types.
|
|
+
|
|
+macro_rules! impl_cmp_partial_eq {
|
|
+ (
|
|
+ [$elem_ty:ident; $elem_count:expr]:
|
|
+ $id:ident | $test_tt:tt |
|
|
+ ($true:expr, $false:expr)
|
|
+ ) => {
|
|
+ // FIXME: https://github.com/rust-lang-nursery/rust-clippy/issues/2892
|
|
+ #[allow(clippy::partialeq_ne_impl)]
|
|
+ impl crate::cmp::PartialEq<$id> for $id {
|
|
+ #[inline]
|
|
+ fn eq(&self, other: &Self) -> bool {
|
|
+ $id::eq(*self, *other).all()
|
|
+ }
|
|
+ #[inline]
|
|
+ fn ne(&self, other: &Self) -> bool {
|
|
+ $id::ne(*self, *other).any()
|
|
+ }
|
|
+ }
|
|
+
|
|
+ // FIXME: https://github.com/rust-lang-nursery/rust-clippy/issues/2892
|
|
+ #[allow(clippy::partialeq_ne_impl)]
|
|
+ impl crate::cmp::PartialEq<LexicographicallyOrdered<$id>>
|
|
+ for LexicographicallyOrdered<$id>
|
|
+ {
|
|
+ #[inline]
|
|
+ fn eq(&self, other: &Self) -> bool {
|
|
+ self.0 == other.0
|
|
+ }
|
|
+ #[inline]
|
|
+ fn ne(&self, other: &Self) -> bool {
|
|
+ self.0 != other.0
|
|
+ }
|
|
+ }
|
|
+
|
|
+ test_if! {
|
|
+ $test_tt:
|
|
+ paste::item! {
|
|
+ pub mod [<$id _cmp_PartialEq>] {
|
|
+ use super::*;
|
|
+ #[cfg_attr(not(target_arch = "wasm32"), test)]
|
|
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ fn partial_eq() {
|
|
+ let a = $id::splat($false);
|
|
+ let b = $id::splat($true);
|
|
+
|
|
+ assert!(a != b);
|
|
+ assert!(!(a == b));
|
|
+ assert!(a == a);
|
|
+ assert!(!(a != a));
|
|
+
|
|
+ if $id::lanes() > 1 {
|
|
+ let a = $id::splat($false).replace(0, $true);
|
|
+ let b = $id::splat($true);
|
|
+
|
|
+ assert!(a != b);
|
|
+ assert!(!(a == b));
|
|
+ assert!(a == a);
|
|
+ assert!(!(a != a));
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+}
|
|
diff --git a/third_party/rust/packed_simd/src/api/cmp/partial_ord.rs b/third_party/rust/packed_simd/src/api/cmp/partial_ord.rs
|
|
new file mode 100644
|
|
index 000000000000..a2292918bae1
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/api/cmp/partial_ord.rs
|
|
@@ -0,0 +1,234 @@
|
|
+//! Implements `PartialOrd` for vector types.
|
|
+//!
|
|
+//! This implements a lexicographical order.
|
|
+
|
|
+macro_rules! impl_cmp_partial_ord {
|
|
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
|
|
+ impl $id {
|
|
+ /// Returns a wrapper that implements `PartialOrd`.
|
|
+ #[inline]
|
|
+ pub fn partial_lex_ord(&self) -> LexicographicallyOrdered<$id> {
|
|
+ LexicographicallyOrdered(*self)
|
|
+ }
|
|
+ }
|
|
+
|
|
+ impl crate::cmp::PartialOrd<LexicographicallyOrdered<$id>>
|
|
+ for LexicographicallyOrdered<$id>
|
|
+ {
|
|
+ #[inline]
|
|
+ fn partial_cmp(
|
|
+ &self, other: &Self,
|
|
+ ) -> Option<crate::cmp::Ordering> {
|
|
+ if PartialEq::eq(self, other) {
|
|
+ Some(crate::cmp::Ordering::Equal)
|
|
+ } else if PartialOrd::lt(self, other) {
|
|
+ Some(crate::cmp::Ordering::Less)
|
|
+ } else if PartialOrd::gt(self, other) {
|
|
+ Some(crate::cmp::Ordering::Greater)
|
|
+ } else {
|
|
+ None
|
|
+ }
|
|
+ }
|
|
+ #[inline]
|
|
+ fn lt(&self, other: &Self) -> bool {
|
|
+ let m_lt = self.0.lt(other.0);
|
|
+ let m_eq = self.0.eq(other.0);
|
|
+ for i in 0..$id::lanes() {
|
|
+ if m_eq.extract(i) {
|
|
+ continue;
|
|
+ }
|
|
+ return m_lt.extract(i);
|
|
+ }
|
|
+ false
|
|
+ }
|
|
+ #[inline]
|
|
+ fn le(&self, other: &Self) -> bool {
|
|
+ self.lt(other) | PartialEq::eq(self, other)
|
|
+ }
|
|
+ #[inline]
|
|
+ fn ge(&self, other: &Self) -> bool {
|
|
+ self.gt(other) | PartialEq::eq(self, other)
|
|
+ }
|
|
+ #[inline]
|
|
+ fn gt(&self, other: &Self) -> bool {
|
|
+ let m_gt = self.0.gt(other.0);
|
|
+ let m_eq = self.0.eq(other.0);
|
|
+ for i in 0..$id::lanes() {
|
|
+ if m_eq.extract(i) {
|
|
+ continue;
|
|
+ }
|
|
+ return m_gt.extract(i);
|
|
+ }
|
|
+ false
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+}
|
|
+
|
|
+macro_rules! test_cmp_partial_ord_int {
|
|
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
|
|
+ test_if!{
|
|
+ $test_tt:
|
|
+ paste::item! {
|
|
+ pub mod [<$id _cmp_PartialOrd>] {
|
|
+ use super::*;
|
|
+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ fn partial_lex_ord() {
|
|
+ use crate::testing::utils::{test_cmp};
|
|
+ // constant values
|
|
+ let a = $id::splat(0);
|
|
+ let b = $id::splat(1);
|
|
+
|
|
+ test_cmp(a.partial_lex_ord(), b.partial_lex_ord(),
|
|
+ Some(crate::cmp::Ordering::Less));
|
|
+ test_cmp(b.partial_lex_ord(), a.partial_lex_ord(),
|
|
+ Some(crate::cmp::Ordering::Greater));
|
|
+ test_cmp(a.partial_lex_ord(), a.partial_lex_ord(),
|
|
+ Some(crate::cmp::Ordering::Equal));
|
|
+ test_cmp(b.partial_lex_ord(), b.partial_lex_ord(),
|
|
+ Some(crate::cmp::Ordering::Equal));
|
|
+
|
|
+ // variable values: a = [0, 1, 2, 3]; b = [3, 2, 1, 0]
|
|
+ let mut a = $id::splat(0);
|
|
+ let mut b = $id::splat(0);
|
|
+ for i in 0..$id::lanes() {
|
|
+ a = a.replace(i, i as $elem_ty);
|
|
+ b = b.replace(i, ($id::lanes() - i) as $elem_ty);
|
|
+ }
|
|
+ test_cmp(a.partial_lex_ord(), b.partial_lex_ord(),
|
|
+ Some(crate::cmp::Ordering::Less));
|
|
+ test_cmp(b.partial_lex_ord(), a.partial_lex_ord(),
|
|
+ Some(crate::cmp::Ordering::Greater));
|
|
+ test_cmp(a.partial_lex_ord(), a.partial_lex_ord(),
|
|
+ Some(crate::cmp::Ordering::Equal));
|
|
+ test_cmp(b.partial_lex_ord(), b.partial_lex_ord(),
|
|
+ Some(crate::cmp::Ordering::Equal));
|
|
+
|
|
+ // variable values: a = [0, 1, 2, 3]; b = [0, 1, 2, 4]
|
|
+ let mut b = a;
|
|
+ b = b.replace(
|
|
+ $id::lanes() - 1,
|
|
+ a.extract($id::lanes() - 1) + 1 as $elem_ty
|
|
+ );
|
|
+ test_cmp(a.partial_lex_ord(), b.partial_lex_ord(),
|
|
+ Some(crate::cmp::Ordering::Less));
|
|
+ test_cmp(b.partial_lex_ord(), a.partial_lex_ord(),
|
|
+ Some(crate::cmp::Ordering::Greater));
|
|
+ test_cmp(a.partial_lex_ord(), a.partial_lex_ord(),
|
|
+ Some(crate::cmp::Ordering::Equal));
|
|
+ test_cmp(b.partial_lex_ord(), b.partial_lex_ord(),
|
|
+ Some(crate::cmp::Ordering::Equal));
|
|
+
|
|
+ if $id::lanes() > 2 {
|
|
+ // variable values a = [0, 1, 0, 0]; b = [0, 1, 2, 3]
|
|
+ let b = a;
|
|
+ let mut a = $id::splat(0);
|
|
+ a = a.replace(1, 1 as $elem_ty);
|
|
+ test_cmp(a.partial_lex_ord(), b.partial_lex_ord(),
|
|
+ Some(crate::cmp::Ordering::Less));
|
|
+ test_cmp(b.partial_lex_ord(), a.partial_lex_ord(),
|
|
+ Some(crate::cmp::Ordering::Greater));
|
|
+ test_cmp(a.partial_lex_ord(), a.partial_lex_ord(),
|
|
+ Some(crate::cmp::Ordering::Equal));
|
|
+ test_cmp(b.partial_lex_ord(), b.partial_lex_ord(),
|
|
+ Some(crate::cmp::Ordering::Equal));
|
|
+
|
|
+ // variable values: a = [0, 1, 2, 3]; b = [0, 1, 3, 2]
|
|
+ let mut b = a;
|
|
+ b = b.replace(
|
|
+ 2, a.extract($id::lanes() - 1) + 1 as $elem_ty
|
|
+ );
|
|
+ test_cmp(a.partial_lex_ord(), b.partial_lex_ord(),
|
|
+ Some(crate::cmp::Ordering::Less));
|
|
+ test_cmp(b.partial_lex_ord(), a.partial_lex_ord(),
|
|
+ Some(crate::cmp::Ordering::Greater));
|
|
+ test_cmp(a.partial_lex_ord(), a.partial_lex_ord(),
|
|
+ Some(crate::cmp::Ordering::Equal));
|
|
+ test_cmp(b.partial_lex_ord(), b.partial_lex_ord(),
|
|
+ Some(crate::cmp::Ordering::Equal));
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+}
|
|
+
|
|
+macro_rules! test_cmp_partial_ord_mask {
|
|
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
|
|
+ test_if!{
|
|
+ $test_tt:
|
|
+ paste::item! {
|
|
+ pub mod [<$id _cmp_PartialOrd>] {
|
|
+ use super::*;
|
|
+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ fn partial_lex_ord() {
|
|
+ use crate::testing::utils::{test_cmp};
|
|
+ use crate::cmp::Ordering;
|
|
+
|
|
+ // constant values
|
|
+ let a = $id::splat(false);
|
|
+ let b = $id::splat(true);
|
|
+
|
|
+ test_cmp(a.partial_lex_ord(), b.partial_lex_ord(),
|
|
+ Some(Ordering::Less));
|
|
+ test_cmp(b.partial_lex_ord(), a.partial_lex_ord(),
|
|
+ Some(Ordering::Greater));
|
|
+ test_cmp(a.partial_lex_ord(), a.partial_lex_ord(),
|
|
+ Some(Ordering::Equal));
|
|
+ test_cmp(b.partial_lex_ord(), b.partial_lex_ord(),
|
|
+ Some(Ordering::Equal));
|
|
+
|
|
+ // variable values:
|
|
+ // a = [false, false, false, false];
|
|
+ // b = [false, false, false, true]
|
|
+ let a = $id::splat(false);
|
|
+ let mut b = $id::splat(false);
|
|
+ b = b.replace($id::lanes() - 1, true);
|
|
+ test_cmp(a.partial_lex_ord(), b.partial_lex_ord(),
|
|
+ Some(Ordering::Less));
|
|
+ test_cmp(b.partial_lex_ord(), a.partial_lex_ord(),
|
|
+ Some(Ordering::Greater));
|
|
+ test_cmp(a.partial_lex_ord(), a.partial_lex_ord(),
|
|
+ Some(Ordering::Equal));
|
|
+ test_cmp(b.partial_lex_ord(), b.partial_lex_ord(),
|
|
+ Some(Ordering::Equal));
|
|
+
|
|
+ // variable values:
|
|
+ // a = [true, true, true, false];
|
|
+ // b = [true, true, true, true]
|
|
+ let mut a = $id::splat(true);
|
|
+ let b = $id::splat(true);
|
|
+ a = a.replace($id::lanes() - 1, false);
|
|
+ test_cmp(a.partial_lex_ord(), b.partial_lex_ord(),
|
|
+ Some(Ordering::Less));
|
|
+ test_cmp(b.partial_lex_ord(), a.partial_lex_ord(),
|
|
+ Some(Ordering::Greater));
|
|
+ test_cmp(a.partial_lex_ord(), a.partial_lex_ord(),
|
|
+ Some(Ordering::Equal));
|
|
+ test_cmp(b.partial_lex_ord(), b.partial_lex_ord(),
|
|
+ Some(Ordering::Equal));
|
|
+
|
|
+ if $id::lanes() > 2 {
|
|
+ // variable values
|
|
+ // a = [false, true, false, false];
|
|
+ // b = [false, true, true, true]
|
|
+ let mut a = $id::splat(false);
|
|
+ let mut b = $id::splat(true);
|
|
+ a = a.replace(1, true);
|
|
+ b = b.replace(0, false);
|
|
+ test_cmp(a.partial_lex_ord(), b.partial_lex_ord(),
|
|
+ Some(Ordering::Less));
|
|
+ test_cmp(b.partial_lex_ord(), a.partial_lex_ord(),
|
|
+ Some(Ordering::Greater));
|
|
+ test_cmp(a.partial_lex_ord(), a.partial_lex_ord(),
|
|
+ Some(Ordering::Equal));
|
|
+ test_cmp(b.partial_lex_ord(), b.partial_lex_ord(),
|
|
+ Some(Ordering::Equal));
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+}
|
|
diff --git a/third_party/rust/packed_simd/src/api/cmp/vertical.rs b/third_party/rust/packed_simd/src/api/cmp/vertical.rs
|
|
new file mode 100644
|
|
index 000000000000..ea4a0d1a3467
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/api/cmp/vertical.rs
|
|
@@ -0,0 +1,114 @@
|
|
+//! Vertical (lane-wise) vector comparisons returning vector masks.
|
|
+
|
|
+macro_rules! impl_cmp_vertical {
|
|
+ (
|
|
+ [$elem_ty:ident; $elem_count:expr]:
|
|
+ $id:ident,
|
|
+ $mask_ty:ident,
|
|
+ $is_mask:expr,($true:expr, $false:expr) | $test_tt:tt
|
|
+ ) => {
|
|
+ impl $id {
|
|
+ /// Lane-wise equality comparison.
|
|
+ #[inline]
|
|
+ pub fn eq(self, other: Self) -> $mask_ty {
|
|
+ use crate::llvm::simd_eq;
|
|
+ Simd(unsafe { simd_eq(self.0, other.0) })
|
|
+ }
|
|
+
|
|
+ /// Lane-wise inequality comparison.
|
|
+ #[inline]
|
|
+ pub fn ne(self, other: Self) -> $mask_ty {
|
|
+ use crate::llvm::simd_ne;
|
|
+ Simd(unsafe { simd_ne(self.0, other.0) })
|
|
+ }
|
|
+
|
|
+ /// Lane-wise less-than comparison.
|
|
+ #[inline]
|
|
+ pub fn lt(self, other: Self) -> $mask_ty {
|
|
+ use crate::llvm::{simd_gt, simd_lt};
|
|
+ if $is_mask {
|
|
+ Simd(unsafe { simd_gt(self.0, other.0) })
|
|
+ } else {
|
|
+ Simd(unsafe { simd_lt(self.0, other.0) })
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /// Lane-wise less-than-or-equals comparison.
|
|
+ #[inline]
|
|
+ pub fn le(self, other: Self) -> $mask_ty {
|
|
+ use crate::llvm::{simd_ge, simd_le};
|
|
+ if $is_mask {
|
|
+ Simd(unsafe { simd_ge(self.0, other.0) })
|
|
+ } else {
|
|
+ Simd(unsafe { simd_le(self.0, other.0) })
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /// Lane-wise greater-than comparison.
|
|
+ #[inline]
|
|
+ pub fn gt(self, other: Self) -> $mask_ty {
|
|
+ use crate::llvm::{simd_gt, simd_lt};
|
|
+ if $is_mask {
|
|
+ Simd(unsafe { simd_lt(self.0, other.0) })
|
|
+ } else {
|
|
+ Simd(unsafe { simd_gt(self.0, other.0) })
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /// Lane-wise greater-than-or-equals comparison.
|
|
+ #[inline]
|
|
+ pub fn ge(self, other: Self) -> $mask_ty {
|
|
+ use crate::llvm::{simd_ge, simd_le};
|
|
+ if $is_mask {
|
|
+ Simd(unsafe { simd_le(self.0, other.0) })
|
|
+ } else {
|
|
+ Simd(unsafe { simd_ge(self.0, other.0) })
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ test_if!{
|
|
+ $test_tt:
|
|
+ paste::item! {
|
|
+ pub mod [<$id _cmp_vertical>] {
|
|
+ use super::*;
|
|
+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ fn cmp() {
|
|
+ let a = $id::splat($false);
|
|
+ let b = $id::splat($true);
|
|
+
|
|
+ let r = a.lt(b);
|
|
+ let e = $mask_ty::splat(true);
|
|
+ assert!(r == e);
|
|
+ let r = a.le(b);
|
|
+ assert!(r == e);
|
|
+
|
|
+ let e = $mask_ty::splat(false);
|
|
+ let r = a.gt(b);
|
|
+ assert!(r == e);
|
|
+ let r = a.ge(b);
|
|
+ assert!(r == e);
|
|
+ let r = a.eq(b);
|
|
+ assert!(r == e);
|
|
+
|
|
+ let mut a = a;
|
|
+ let mut b = b;
|
|
+ let mut e = e;
|
|
+ for i in 0..$id::lanes() {
|
|
+ if i % 2 == 0 {
|
|
+ a = a.replace(i, $false);
|
|
+ b = b.replace(i, $true);
|
|
+ e = e.replace(i, true);
|
|
+ } else {
|
|
+ a = a.replace(i, $true);
|
|
+ b = b.replace(i, $false);
|
|
+ e = e.replace(i, false);
|
|
+ }
|
|
+ }
|
|
+ let r = a.lt(b);
|
|
+ assert!(r == e);
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+}
|
|
diff --git a/third_party/rust/packed_simd/src/api/default.rs b/third_party/rust/packed_simd/src/api/default.rs
|
|
new file mode 100644
|
|
index 000000000000..843d51bcc4bb
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/api/default.rs
|
|
@@ -0,0 +1,28 @@
|
|
+//! Implements `Default` for vector types.
|
|
+
|
|
+macro_rules! impl_default {
|
|
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
|
|
+ impl Default for $id {
|
|
+ #[inline]
|
|
+ fn default() -> Self {
|
|
+ Self::splat($elem_ty::default())
|
|
+ }
|
|
+ }
|
|
+
|
|
+ test_if!{
|
|
+ $test_tt:
|
|
+ paste::item! {
|
|
+ pub mod [<$id _default>] {
|
|
+ use super::*;
|
|
+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ fn default() {
|
|
+ let a = $id::default();
|
|
+ for i in 0..$id::lanes() {
|
|
+ assert_eq!(a.extract(i), $elem_ty::default());
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+}
|
|
diff --git a/third_party/rust/packed_simd/src/api/fmt.rs b/third_party/rust/packed_simd/src/api/fmt.rs
|
|
new file mode 100644
|
|
index 000000000000..f3f55c401548
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/api/fmt.rs
|
|
@@ -0,0 +1,12 @@
|
|
+//! Implements formatting APIs
|
|
+
|
|
+#[macro_use]
|
|
+mod debug;
|
|
+#[macro_use]
|
|
+mod lower_hex;
|
|
+#[macro_use]
|
|
+mod upper_hex;
|
|
+#[macro_use]
|
|
+mod octal;
|
|
+#[macro_use]
|
|
+mod binary;
|
|
diff --git a/third_party/rust/packed_simd/src/api/fmt/binary.rs b/third_party/rust/packed_simd/src/api/fmt/binary.rs
|
|
new file mode 100644
|
|
index 000000000000..b60769082d51
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/api/fmt/binary.rs
|
|
@@ -0,0 +1,56 @@
|
|
+//! Implement Octal formatting
|
|
+
|
|
+macro_rules! impl_fmt_binary {
|
|
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
|
|
+ impl crate::fmt::Binary for $id {
|
|
+ #[allow(clippy::missing_inline_in_public_items)]
|
|
+ fn fmt(
|
|
+ &self, f: &mut crate::fmt::Formatter<'_>,
|
|
+ ) -> crate::fmt::Result {
|
|
+ write!(f, "{}(", stringify!($id))?;
|
|
+ for i in 0..$elem_count {
|
|
+ if i > 0 {
|
|
+ write!(f, ", ")?;
|
|
+ }
|
|
+ self.extract(i).fmt(f)?;
|
|
+ }
|
|
+ write!(f, ")")
|
|
+ }
|
|
+ }
|
|
+ test_if! {
|
|
+ $test_tt:
|
|
+ paste::item! {
|
|
+ pub mod [<$id _fmt_binary>] {
|
|
+ use super::*;
|
|
+ #[cfg_attr(not(target_arch = "wasm32"), test)]
|
|
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ fn binary() {
|
|
+ use arrayvec::{ArrayString,ArrayVec};
|
|
+ type TinyString = ArrayString<[u8; 512]>;
|
|
+
|
|
+ use crate::fmt::Write;
|
|
+ let v = $id::splat($elem_ty::default());
|
|
+ let mut s = TinyString::new();
|
|
+ write!(&mut s, "{:#b}", v).unwrap();
|
|
+
|
|
+ let mut beg = TinyString::new();
|
|
+ write!(&mut beg, "{}(", stringify!($id)).unwrap();
|
|
+ assert!(s.starts_with(beg.as_str()));
|
|
+ assert!(s.ends_with(")"));
|
|
+ let s: ArrayVec<[TinyString; 64]>
|
|
+ = s.replace(beg.as_str(), "")
|
|
+ .replace(")", "").split(",")
|
|
+ .map(|v| TinyString::from(v.trim()).unwrap())
|
|
+ .collect();
|
|
+ assert_eq!(s.len(), $id::lanes());
|
|
+ for (index, ss) in s.into_iter().enumerate() {
|
|
+ let mut e = TinyString::new();
|
|
+ write!(&mut e, "{:#b}", v.extract(index)).unwrap();
|
|
+ assert_eq!(ss, e);
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+}
|
|
diff --git a/third_party/rust/packed_simd/src/api/fmt/debug.rs b/third_party/rust/packed_simd/src/api/fmt/debug.rs
|
|
new file mode 100644
|
|
index 000000000000..ad0b8a59a1f0
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/api/fmt/debug.rs
|
|
@@ -0,0 +1,62 @@
|
|
+//! Implement debug formatting
|
|
+
|
|
+macro_rules! impl_fmt_debug_tests {
|
|
+ ([$elem_ty:ty; $elem_count:expr]: $id:ident | $test_tt:tt) => {
|
|
+ test_if! {
|
|
+ $test_tt:
|
|
+ paste::item! {
|
|
+ pub mod [<$id _fmt_debug>] {
|
|
+ use super::*;
|
|
+ #[cfg_attr(not(target_arch = "wasm32"), test)]
|
|
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ fn debug() {
|
|
+ use arrayvec::{ArrayString,ArrayVec};
|
|
+ type TinyString = ArrayString<[u8; 512]>;
|
|
+
|
|
+ use crate::fmt::Write;
|
|
+ let v = $id::default();
|
|
+ let mut s = TinyString::new();
|
|
+ write!(&mut s, "{:?}", v).unwrap();
|
|
+
|
|
+ let mut beg = TinyString::new();
|
|
+ write!(&mut beg, "{}(", stringify!($id)).unwrap();
|
|
+ assert!(s.starts_with(beg.as_str()));
|
|
+ assert!(s.ends_with(")"));
|
|
+ let s: ArrayVec<[TinyString; 64]>
|
|
+ = s.replace(beg.as_str(), "")
|
|
+ .replace(")", "").split(",")
|
|
+ .map(|v| TinyString::from(v.trim()).unwrap())
|
|
+ .collect();
|
|
+ assert_eq!(s.len(), $id::lanes());
|
|
+ for (index, ss) in s.into_iter().enumerate() {
|
|
+ let mut e = TinyString::new();
|
|
+ write!(&mut e, "{:?}", v.extract(index)).unwrap();
|
|
+ assert_eq!(ss, e);
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+}
|
|
+
|
|
+macro_rules! impl_fmt_debug {
|
|
+ ([$elem_ty:ty; $elem_count:expr]: $id:ident | $test_tt:tt) => {
|
|
+ impl crate::fmt::Debug for $id {
|
|
+ #[allow(clippy::missing_inline_in_public_items)]
|
|
+ fn fmt(
|
|
+ &self, f: &mut crate::fmt::Formatter<'_>,
|
|
+ ) -> crate::fmt::Result {
|
|
+ write!(f, "{}(", stringify!($id))?;
|
|
+ for i in 0..$elem_count {
|
|
+ if i > 0 {
|
|
+ write!(f, ", ")?;
|
|
+ }
|
|
+ self.extract(i).fmt(f)?;
|
|
+ }
|
|
+ write!(f, ")")
|
|
+ }
|
|
+ }
|
|
+ impl_fmt_debug_tests!([$elem_ty; $elem_count]: $id | $test_tt);
|
|
+ };
|
|
+}
|
|
diff --git a/third_party/rust/packed_simd/src/api/fmt/lower_hex.rs b/third_party/rust/packed_simd/src/api/fmt/lower_hex.rs
|
|
new file mode 100644
|
|
index 000000000000..5a7aa14b5b8a
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/api/fmt/lower_hex.rs
|
|
@@ -0,0 +1,56 @@
|
|
+//! Implement `LowerHex` formatting
|
|
+
|
|
+macro_rules! impl_fmt_lower_hex {
|
|
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
|
|
+ impl crate::fmt::LowerHex for $id {
|
|
+ #[allow(clippy::missing_inline_in_public_items)]
|
|
+ fn fmt(
|
|
+ &self, f: &mut crate::fmt::Formatter<'_>,
|
|
+ ) -> crate::fmt::Result {
|
|
+ write!(f, "{}(", stringify!($id))?;
|
|
+ for i in 0..$elem_count {
|
|
+ if i > 0 {
|
|
+ write!(f, ", ")?;
|
|
+ }
|
|
+ self.extract(i).fmt(f)?;
|
|
+ }
|
|
+ write!(f, ")")
|
|
+ }
|
|
+ }
|
|
+ test_if! {
|
|
+ $test_tt:
|
|
+ paste::item! {
|
|
+ pub mod [<$id _fmt_lower_hex>] {
|
|
+ use super::*;
|
|
+ #[cfg_attr(not(target_arch = "wasm32"), test)]
|
|
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ fn lower_hex() {
|
|
+ use arrayvec::{ArrayString,ArrayVec};
|
|
+ type TinyString = ArrayString<[u8; 512]>;
|
|
+
|
|
+ use crate::fmt::Write;
|
|
+ let v = $id::splat($elem_ty::default());
|
|
+ let mut s = TinyString::new();
|
|
+ write!(&mut s, "{:#x}", v).unwrap();
|
|
+
|
|
+ let mut beg = TinyString::new();
|
|
+ write!(&mut beg, "{}(", stringify!($id)).unwrap();
|
|
+ assert!(s.starts_with(beg.as_str()));
|
|
+ assert!(s.ends_with(")"));
|
|
+ let s: ArrayVec<[TinyString; 64]>
|
|
+ = s.replace(beg.as_str(), "").replace(")", "")
|
|
+ .split(",")
|
|
+ .map(|v| TinyString::from(v.trim()).unwrap())
|
|
+ .collect();
|
|
+ assert_eq!(s.len(), $id::lanes());
|
|
+ for (index, ss) in s.into_iter().enumerate() {
|
|
+ let mut e = TinyString::new();
|
|
+ write!(&mut e, "{:#x}", v.extract(index)).unwrap();
|
|
+ assert_eq!(ss, e);
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+}
|
|
diff --git a/third_party/rust/packed_simd/src/api/fmt/octal.rs b/third_party/rust/packed_simd/src/api/fmt/octal.rs
|
|
new file mode 100644
|
|
index 000000000000..83ac8abc7dae
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/api/fmt/octal.rs
|
|
@@ -0,0 +1,56 @@
|
|
+//! Implement Octal formatting
|
|
+
|
|
+macro_rules! impl_fmt_octal {
|
|
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
|
|
+ impl crate::fmt::Octal for $id {
|
|
+ #[allow(clippy::missing_inline_in_public_items)]
|
|
+ fn fmt(
|
|
+ &self, f: &mut crate::fmt::Formatter<'_>,
|
|
+ ) -> crate::fmt::Result {
|
|
+ write!(f, "{}(", stringify!($id))?;
|
|
+ for i in 0..$elem_count {
|
|
+ if i > 0 {
|
|
+ write!(f, ", ")?;
|
|
+ }
|
|
+ self.extract(i).fmt(f)?;
|
|
+ }
|
|
+ write!(f, ")")
|
|
+ }
|
|
+ }
|
|
+ test_if! {
|
|
+ $test_tt:
|
|
+ paste::item! {
|
|
+ pub mod [<$id _fmt_octal>] {
|
|
+ use super::*;
|
|
+ #[cfg_attr(not(target_arch = "wasm32"), test)]
|
|
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ fn octal_hex() {
|
|
+ use arrayvec::{ArrayString,ArrayVec};
|
|
+ type TinyString = ArrayString<[u8; 512]>;
|
|
+
|
|
+ use crate::fmt::Write;
|
|
+ let v = $id::splat($elem_ty::default());
|
|
+ let mut s = TinyString::new();
|
|
+ write!(&mut s, "{:#o}", v).unwrap();
|
|
+
|
|
+ let mut beg = TinyString::new();
|
|
+ write!(&mut beg, "{}(", stringify!($id)).unwrap();
|
|
+ assert!(s.starts_with(beg.as_str()));
|
|
+ assert!(s.ends_with(")"));
|
|
+ let s: ArrayVec<[TinyString; 64]>
|
|
+ = s.replace(beg.as_str(), "").replace(")", "")
|
|
+ .split(",")
|
|
+ .map(|v| TinyString::from(v.trim()).unwrap())
|
|
+ .collect();
|
|
+ assert_eq!(s.len(), $id::lanes());
|
|
+ for (index, ss) in s.into_iter().enumerate() {
|
|
+ let mut e = TinyString::new();
|
|
+ write!(&mut e, "{:#o}", v.extract(index)).unwrap();
|
|
+ assert_eq!(ss, e);
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+}
|
|
diff --git a/third_party/rust/packed_simd/src/api/fmt/upper_hex.rs b/third_party/rust/packed_simd/src/api/fmt/upper_hex.rs
|
|
new file mode 100644
|
|
index 000000000000..aa88f673abf0
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/api/fmt/upper_hex.rs
|
|
@@ -0,0 +1,56 @@
|
|
+//! Implement `UpperHex` formatting
|
|
+
|
|
+macro_rules! impl_fmt_upper_hex {
|
|
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
|
|
+ impl crate::fmt::UpperHex for $id {
|
|
+ #[allow(clippy::missing_inline_in_public_items)]
|
|
+ fn fmt(
|
|
+ &self, f: &mut crate::fmt::Formatter<'_>,
|
|
+ ) -> crate::fmt::Result {
|
|
+ write!(f, "{}(", stringify!($id))?;
|
|
+ for i in 0..$elem_count {
|
|
+ if i > 0 {
|
|
+ write!(f, ", ")?;
|
|
+ }
|
|
+ self.extract(i).fmt(f)?;
|
|
+ }
|
|
+ write!(f, ")")
|
|
+ }
|
|
+ }
|
|
+ test_if! {
|
|
+ $test_tt:
|
|
+ paste::item! {
|
|
+ pub mod [<$id _fmt_upper_hex>] {
|
|
+ use super::*;
|
|
+ #[cfg_attr(not(target_arch = "wasm32"), test)]
|
|
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ fn upper_hex() {
|
|
+ use arrayvec::{ArrayString,ArrayVec};
|
|
+ type TinyString = ArrayString<[u8; 512]>;
|
|
+
|
|
+ use crate::fmt::Write;
|
|
+ let v = $id::splat($elem_ty::default());
|
|
+ let mut s = TinyString::new();
|
|
+ write!(&mut s, "{:#X}", v).unwrap();
|
|
+
|
|
+ let mut beg = TinyString::new();
|
|
+ write!(&mut beg, "{}(", stringify!($id)).unwrap();
|
|
+ assert!(s.starts_with(beg.as_str()));
|
|
+ assert!(s.ends_with(")"));
|
|
+ let s: ArrayVec<[TinyString; 64]>
|
|
+ = s.replace(beg.as_str(), "").replace(")", "")
|
|
+ .split(",")
|
|
+ .map(|v| TinyString::from(v.trim()).unwrap())
|
|
+ .collect();
|
|
+ assert_eq!(s.len(), $id::lanes());
|
|
+ for (index, ss) in s.into_iter().enumerate() {
|
|
+ let mut e = TinyString::new();
|
|
+ write!(&mut e, "{:#X}", v.extract(index)).unwrap();
|
|
+ assert_eq!(ss, e);
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+}
|
|
diff --git a/third_party/rust/packed_simd/src/api/from.rs b/third_party/rust/packed_simd/src/api/from.rs
|
|
new file mode 100644
|
|
index 000000000000..c30c4d6e216d
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/api/from.rs
|
|
@@ -0,0 +1,7 @@
|
|
+//! Implementations of the `From` and `Into` traits
|
|
+
|
|
+#[macro_use]
|
|
+mod from_array;
|
|
+
|
|
+#[macro_use]
|
|
+mod from_vector;
|
|
diff --git a/third_party/rust/packed_simd/src/api/from/from_array.rs b/third_party/rust/packed_simd/src/api/from/from_array.rs
|
|
new file mode 100644
|
|
index 000000000000..964d1501df6a
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/api/from/from_array.rs
|
|
@@ -0,0 +1,121 @@
|
|
+//! Implements `From<[T; N]>` and `Into<[T; N]>` for vector types.
|
|
+
|
|
+macro_rules! impl_from_array {
|
|
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt
|
|
+ | ($non_default_array:expr, $non_default_vec:expr)) => {
|
|
+ impl From<[$elem_ty; $elem_count]> for $id {
|
|
+ #[inline]
|
|
+ fn from(array: [$elem_ty; $elem_count]) -> Self {
|
|
+ union U {
|
|
+ array: [$elem_ty; $elem_count],
|
|
+ vec: $id,
|
|
+ }
|
|
+ unsafe { U { array }.vec }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ impl From<$id> for [$elem_ty; $elem_count] {
|
|
+ #[inline]
|
|
+ fn from(vec: $id) -> Self {
|
|
+ union U {
|
|
+ array: [$elem_ty; $elem_count],
|
|
+ vec: $id,
|
|
+ }
|
|
+ unsafe { U { vec }.array }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ // FIXME: `Into::into` is not inline, but due to
|
|
+ // the blanket impl in `std`, which is not
|
|
+ // marked `default`, we cannot override it here with
|
|
+ // specialization.
|
|
+ /*
|
|
+ impl Into<[$elem_ty; $elem_count]> for $id {
|
|
+ #[inline]
|
|
+ fn into(self) -> [$elem_ty; $elem_count] {
|
|
+ union U {
|
|
+ array: [$elem_ty; $elem_count],
|
|
+ vec: $id,
|
|
+ }
|
|
+ unsafe { U { vec: self }.array }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ impl Into<$id> for [$elem_ty; $elem_count] {
|
|
+ #[inline]
|
|
+ fn into(self) -> $id {
|
|
+ union U {
|
|
+ array: [$elem_ty; $elem_count],
|
|
+ vec: $id,
|
|
+ }
|
|
+ unsafe { U { array: self }.vec }
|
|
+ }
|
|
+ }
|
|
+ */
|
|
+
|
|
+ test_if! {
|
|
+ $test_tt:
|
|
+ paste::item! {
|
|
+ mod [<$id _from>] {
|
|
+ use super::*;
|
|
+ #[test]
|
|
+ fn array() {
|
|
+ let vec: $id = Default::default();
|
|
+
|
|
+ // FIXME: Workaround for arrays with more than 32
|
|
+ // elements.
|
|
+ //
|
|
+ // Safe because we never take a reference to any
|
|
+ // uninitialized element.
|
|
+ union W {
|
|
+ array: [$elem_ty; $elem_count],
|
|
+ other: ()
|
|
+ }
|
|
+ let mut array = W { other: () };
|
|
+ for i in 0..$elem_count {
|
|
+ let default: $elem_ty = Default::default();
|
|
+ // note: array.other is the active member and
|
|
+ // initialized so we can take a reference to it:
|
|
+ let p = unsafe {
|
|
+ &mut array.other as *mut () as *mut $elem_ty
|
|
+ };
|
|
+ // note: default is a valid bit-pattern for
|
|
+ // $elem_ty:
|
|
+ unsafe {
|
|
+ crate::ptr::write(p.wrapping_add(i), default)
|
|
+ };
|
|
+ }
|
|
+ // note: the array variant of the union is properly
|
|
+ // initialized:
|
|
+ let mut array = unsafe {
|
|
+ array.array
|
|
+ };
|
|
+
|
|
+ array[0] = $non_default_array;
|
|
+ let vec = vec.replace(0, $non_default_vec);
|
|
+
|
|
+ let vec_from_array = $id::from(array);
|
|
+ assert_eq!(vec_from_array, vec);
|
|
+ let array_from_vec
|
|
+ = <[$elem_ty; $elem_count]>::from(vec);
|
|
+ // FIXME: Workaround for arrays with more than 32
|
|
+ // elements.
|
|
+ for i in 0..$elem_count {
|
|
+ assert_eq!(array_from_vec[i], array[i]);
|
|
+ }
|
|
+
|
|
+ let vec_from_into_array: $id = array.into();
|
|
+ assert_eq!(vec_from_into_array, vec);
|
|
+ let array_from_into_vec: [$elem_ty; $elem_count]
|
|
+ = vec.into();
|
|
+ // FIXME: Workaround for arrays with more than 32
|
|
+ // elements.
|
|
+ for i in 0..$elem_count {
|
|
+ assert_eq!(array_from_into_vec[i], array[i]);
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+}
|
|
diff --git a/third_party/rust/packed_simd/src/api/from/from_vector.rs b/third_party/rust/packed_simd/src/api/from/from_vector.rs
|
|
new file mode 100644
|
|
index 000000000000..55f70016d51d
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/api/from/from_vector.rs
|
|
@@ -0,0 +1,67 @@
|
|
+//! Implements `From` and `Into` for vector types.
|
|
+
|
|
+macro_rules! impl_from_vector {
|
|
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt
|
|
+ | $source:ident) => {
|
|
+ impl From<$source> for $id {
|
|
+ #[inline]
|
|
+ fn from(source: $source) -> Self {
|
|
+ fn static_assert_same_number_of_lanes<T, U>()
|
|
+ where
|
|
+ T: crate::sealed::Simd,
|
|
+ U: crate::sealed::Simd<LanesType = T::LanesType>,
|
|
+ {
|
|
+ }
|
|
+ use crate::llvm::simd_cast;
|
|
+ static_assert_same_number_of_lanes::<$id, $source>();
|
|
+ Simd(unsafe { simd_cast(source.0) })
|
|
+ }
|
|
+ }
|
|
+
|
|
+ // FIXME: `Into::into` is not inline, but due to the blanket impl in
|
|
+ // `std`, which is not marked `default`, we cannot override it here
|
|
+ // with specialization.
|
|
+
|
|
+ /*
|
|
+ impl Into<$id> for $source {
|
|
+ #[inline]
|
|
+ fn into(self) -> $id {
|
|
+ unsafe { simd_cast(self) }
|
|
+ }
|
|
+ }
|
|
+ */
|
|
+
|
|
+ test_if! {
|
|
+ $test_tt:
|
|
+ paste::item! {
|
|
+ pub mod [<$id _from_ $source>] {
|
|
+ use super::*;
|
|
+ #[cfg_attr(not(target_arch = "wasm32"), test)]
|
|
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ fn from() {
|
|
+ assert_eq!($id::lanes(), $source::lanes());
|
|
+ let source: $source = Default::default();
|
|
+ let vec: $id = Default::default();
|
|
+
|
|
+ let e = $id::from(source);
|
|
+ assert_eq!(e, vec);
|
|
+
|
|
+ let e: $id = source.into();
|
|
+ assert_eq!(e, vec);
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+}
|
|
+
|
|
+macro_rules! impl_from_vectors {
|
|
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt
|
|
+ | $($source:ident),*) => {
|
|
+ $(
|
|
+ impl_from_vector!(
|
|
+ [$elem_ty; $elem_count]: $id | $test_tt | $source
|
|
+ );
|
|
+ )*
|
|
+ }
|
|
+}
|
|
diff --git a/third_party/rust/packed_simd/src/api/hash.rs b/third_party/rust/packed_simd/src/api/hash.rs
|
|
new file mode 100644
|
|
index 000000000000..08d42496ea8b
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/api/hash.rs
|
|
@@ -0,0 +1,47 @@
|
|
+//! Implements `Hash` for vector types.
|
|
+
|
|
+macro_rules! impl_hash {
|
|
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
|
|
+ impl crate::hash::Hash for $id {
|
|
+ #[inline]
|
|
+ fn hash<H: crate::hash::Hasher>(&self, state: &mut H) {
|
|
+ unsafe {
|
|
+ union A {
|
|
+ data: [$elem_ty; $id::lanes()],
|
|
+ vec: $id,
|
|
+ }
|
|
+ A { vec: *self }.data.hash(state)
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ test_if! {
|
|
+ $test_tt:
|
|
+ paste::item! {
|
|
+ pub mod [<$id _hash>] {
|
|
+ use super::*;
|
|
+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ fn hash() {
|
|
+ use crate::hash::{Hash, Hasher};
|
|
+ #[allow(deprecated)]
|
|
+ use crate::hash::{SipHasher13};
|
|
+ type A = [$elem_ty; $id::lanes()];
|
|
+ let a: A = [42 as $elem_ty; $id::lanes()];
|
|
+ assert_eq!(
|
|
+ crate::mem::size_of::<A>(),
|
|
+ crate::mem::size_of::<$id>()
|
|
+ );
|
|
+ #[allow(deprecated)]
|
|
+ let mut a_hash = SipHasher13::new();
|
|
+ let mut v_hash = a_hash.clone();
|
|
+ a.hash(&mut a_hash);
|
|
+
|
|
+ let v = $id::splat(42 as $elem_ty);
|
|
+ v.hash(&mut v_hash);
|
|
+ assert_eq!(a_hash.finish(), v_hash.finish());
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+}
|
|
diff --git a/third_party/rust/packed_simd/src/api/into_bits.rs b/third_party/rust/packed_simd/src/api/into_bits.rs
|
|
new file mode 100644
|
|
index 000000000000..f2cc1bae5397
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/api/into_bits.rs
|
|
@@ -0,0 +1,59 @@
|
|
+//! Implementation of `FromBits` and `IntoBits`.
|
|
+
|
|
+/// Safe lossless bitwise conversion from `T` to `Self`.
|
|
+pub trait FromBits<T>: crate::marker::Sized {
|
|
+ /// Safe lossless bitwise transmute from `T` to `Self`.
|
|
+ fn from_bits(t: T) -> Self;
|
|
+}
|
|
+
|
|
+/// Safe lossless bitwise conversion from `Self` to `T`.
|
|
+pub trait IntoBits<T>: crate::marker::Sized {
|
|
+ /// Safe lossless bitwise transmute from `self` to `T`.
|
|
+ fn into_bits(self) -> T;
|
|
+}
|
|
+
|
|
+/// `FromBits` implies `IntoBits`.
|
|
+impl<T, U> IntoBits<U> for T
|
|
+where
|
|
+ U: FromBits<T>,
|
|
+{
|
|
+ #[inline]
|
|
+ fn into_bits(self) -> U {
|
|
+ debug_assert!(
|
|
+ crate::mem::size_of::<Self>() == crate::mem::size_of::<U>()
|
|
+ );
|
|
+ U::from_bits(self)
|
|
+ }
|
|
+}
|
|
+
|
|
+/// `FromBits` and `IntoBits` are reflexive
|
|
+impl<T> FromBits<T> for T {
|
|
+ #[inline]
|
|
+ fn from_bits(t: Self) -> Self {
|
|
+ t
|
|
+ }
|
|
+}
|
|
+
|
|
+#[macro_use]
|
|
+mod macros;
|
|
+
|
|
+mod v16;
|
|
+pub use self::v16::*;
|
|
+
|
|
+mod v32;
|
|
+pub use self::v32::*;
|
|
+
|
|
+mod v64;
|
|
+pub use self::v64::*;
|
|
+
|
|
+mod v128;
|
|
+pub use self::v128::*;
|
|
+
|
|
+mod v256;
|
|
+pub use self::v256::*;
|
|
+
|
|
+mod v512;
|
|
+pub use self::v512::*;
|
|
+
|
|
+mod arch_specific;
|
|
+pub use self::arch_specific::*;
|
|
diff --git a/third_party/rust/packed_simd/src/api/into_bits/arch_specific.rs b/third_party/rust/packed_simd/src/api/into_bits/arch_specific.rs
|
|
new file mode 100644
|
|
index 000000000000..6cc2fa37b728
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/api/into_bits/arch_specific.rs
|
|
@@ -0,0 +1,190 @@
|
|
+//! `FromBits` and `IntoBits` between portable vector types and the
|
|
+//! architecture-specific vector types.
|
|
+#![rustfmt::skip]
|
|
+
|
|
+// FIXME: MIPS FromBits/IntoBits
|
|
+
|
|
+#[allow(unused)]
|
|
+use crate::*;
|
|
+
|
|
+/// This macro implements FromBits for the portable and the architecture
|
|
+/// specific vector types.
|
|
+///
|
|
+/// The "leaf" case is at the bottom, and the most generic case is at the top.
|
|
+/// The generic case is split into smaller cases recursively.
|
|
+macro_rules! impl_arch {
|
|
+ ([$arch_head_i:ident[$arch_head_tt:tt]: $($arch_head_ty:ident),*],
|
|
+ $([$arch_tail_i:ident[$arch_tail_tt:tt]: $($arch_tail_ty:ident),*]),* |
|
|
+ from: $($from_ty:ident),* | into: $($into_ty:ident),* |
|
|
+ test: $test_tt:tt) => {
|
|
+ impl_arch!(
|
|
+ [$arch_head_i[$arch_head_tt]: $($arch_head_ty),*] |
|
|
+ from: $($from_ty),* |
|
|
+ into: $($into_ty),* |
|
|
+ test: $test_tt
|
|
+ );
|
|
+ impl_arch!(
|
|
+ $([$arch_tail_i[$arch_tail_tt]: $($arch_tail_ty),*]),* |
|
|
+ from: $($from_ty),* |
|
|
+ into: $($into_ty),* |
|
|
+ test: $test_tt
|
|
+ );
|
|
+ };
|
|
+ ([$arch:ident[$arch_tt:tt]: $($arch_ty:ident),*] |
|
|
+ from: $($from_ty:ident),* | into: $($into_ty:ident),* |
|
|
+ test: $test_tt:tt) => {
|
|
+ // note: if target is "arm", "+v7,+neon" must be enabled
|
|
+ // and the std library must be recompiled with them
|
|
+ #[cfg(any(
|
|
+ not(target_arch = "arm"),
|
|
+ all(target_feature = "v7", target_feature = "neon",
|
|
+ any(feature = "core_arch", libcore_neon)))
|
|
+ )]
|
|
+ // note: if target is "powerpc", "altivec" must be enabled
|
|
+ // and the std library must be recompiled with it
|
|
+ #[cfg(any(
|
|
+ not(target_arch = "powerpc"),
|
|
+ all(target_feature = "altivec", feature = "core_arch"),
|
|
+ ))]
|
|
+ #[cfg(target_arch = $arch_tt)]
|
|
+ use crate::arch::$arch::{
|
|
+ $($arch_ty),*
|
|
+ };
|
|
+
|
|
+ #[cfg(any(
|
|
+ not(target_arch = "arm"),
|
|
+ all(target_feature = "v7", target_feature = "neon",
|
|
+ any(feature = "core_arch", libcore_neon)))
|
|
+ )]
|
|
+ #[cfg(any(
|
|
+ not(target_arch = "powerpc"),
|
|
+ all(target_feature = "altivec", feature = "core_arch"),
|
|
+ ))]
|
|
+ #[cfg(target_arch = $arch_tt)]
|
|
+ impl_arch!($($arch_ty),* | $($from_ty),* | $($into_ty),* |
|
|
+ test: $test_tt);
|
|
+ };
|
|
+ ($arch_head:ident, $($arch_tail:ident),* | $($from_ty:ident),*
|
|
+ | $($into_ty:ident),* | test: $test_tt:tt) => {
|
|
+ impl_arch!($arch_head | $($from_ty),* | $($into_ty),* |
|
|
+ test: $test_tt);
|
|
+ impl_arch!($($arch_tail),* | $($from_ty),* | $($into_ty),* |
|
|
+ test: $test_tt);
|
|
+ };
|
|
+ ($arch_head:ident | $($from_ty:ident),* | $($into_ty:ident),* |
|
|
+ test: $test_tt:tt) => {
|
|
+ impl_from_bits!($arch_head[$test_tt]: $($from_ty),*);
|
|
+ impl_into_bits!($arch_head[$test_tt]: $($into_ty),*);
|
|
+ };
|
|
+}
|
|
+
|
|
+////////////////////////////////////////////////////////////////////////////////
|
|
+// Implementations for the 64-bit wide vector types:
|
|
+
|
|
+// FIXME: 64-bit single element types
|
|
+// FIXME: arm/aarch float16x4_t missing
|
|
+impl_arch!(
|
|
+ [x86["x86"]: __m64], [x86_64["x86_64"]: __m64],
|
|
+ [arm["arm"]: int8x8_t, uint8x8_t, poly8x8_t, int16x4_t, uint16x4_t,
|
|
+ poly16x4_t, int32x2_t, uint32x2_t, float32x2_t, int64x1_t,
|
|
+ uint64x1_t],
|
|
+ [aarch64["aarch64"]: int8x8_t, uint8x8_t, poly8x8_t, int16x4_t, uint16x4_t,
|
|
+ poly16x4_t, int32x2_t, uint32x2_t, float32x2_t, int64x1_t, uint64x1_t,
|
|
+ float64x1_t] |
|
|
+ from: i8x8, u8x8, m8x8, i16x4, u16x4, m16x4, i32x2, u32x2, f32x2, m32x2 |
|
|
+ into: i8x8, u8x8, i16x4, u16x4, i32x2, u32x2, f32x2 |
|
|
+ test: test_v64
|
|
+);
|
|
+
|
|
+////////////////////////////////////////////////////////////////////////////////
|
|
+// Implementations for the 128-bit wide vector types:
|
|
+
|
|
+// FIXME: arm/aarch float16x8_t missing
|
|
+// FIXME: ppc vector_pixel missing
|
|
+// FIXME: ppc64 vector_Float16 missing
|
|
+// FIXME: ppc64 vector_signed_long_long missing
|
|
+// FIXME: ppc64 vector_unsigned_long_long missing
|
|
+// FIXME: ppc64 vector_bool_long_long missing
|
|
+// FIXME: ppc64 vector_signed___int128 missing
|
|
+// FIXME: ppc64 vector_unsigned___int128 missing
|
|
+impl_arch!(
|
|
+ [x86["x86"]: __m128, __m128i, __m128d],
|
|
+ [x86_64["x86_64"]: __m128, __m128i, __m128d],
|
|
+ [arm["arm"]: int8x16_t, uint8x16_t, poly8x16_t, int16x8_t, uint16x8_t,
|
|
+ poly16x8_t, int32x4_t, uint32x4_t, float32x4_t, int64x2_t, uint64x2_t],
|
|
+ [aarch64["aarch64"]: int8x16_t, uint8x16_t, poly8x16_t, int16x8_t,
|
|
+ uint16x8_t, poly16x8_t, int32x4_t, uint32x4_t, float32x4_t, int64x2_t,
|
|
+ uint64x2_t, float64x2_t],
|
|
+ [powerpc["powerpc"]: vector_signed_char, vector_unsigned_char,
|
|
+ vector_signed_short, vector_unsigned_short, vector_signed_int,
|
|
+ vector_unsigned_int, vector_float],
|
|
+ [powerpc64["powerpc64"]: vector_signed_char, vector_unsigned_char,
|
|
+ vector_signed_short, vector_unsigned_short, vector_signed_int,
|
|
+ vector_unsigned_int, vector_float, vector_signed_long,
|
|
+ vector_unsigned_long, vector_double] |
|
|
+ from: i8x16, u8x16, m8x16, i16x8, u16x8, m16x8, i32x4, u32x4, f32x4, m32x4,
|
|
+ i64x2, u64x2, f64x2, m64x2, i128x1, u128x1, m128x1 |
|
|
+ into: i8x16, u8x16, i16x8, u16x8, i32x4, u32x4, f32x4, i64x2, u64x2, f64x2,
|
|
+ i128x1, u128x1 |
|
|
+ test: test_v128
|
|
+);
|
|
+
|
|
+impl_arch!(
|
|
+ [powerpc["powerpc"]: vector_bool_char],
|
|
+ [powerpc64["powerpc64"]: vector_bool_char] |
|
|
+ from: m8x16, m16x8, m32x4, m64x2, m128x1 |
|
|
+ into: i8x16, u8x16, i16x8, u16x8, i32x4, u32x4, f32x4,
|
|
+ i64x2, u64x2, f64x2, i128x1, u128x1,
|
|
+ // Masks:
|
|
+ m8x16 |
|
|
+ test: test_v128
|
|
+);
|
|
+
|
|
+impl_arch!(
|
|
+ [powerpc["powerpc"]: vector_bool_short],
|
|
+ [powerpc64["powerpc64"]: vector_bool_short] |
|
|
+ from: m16x8, m32x4, m64x2, m128x1 |
|
|
+ into: i8x16, u8x16, i16x8, u16x8, i32x4, u32x4, f32x4,
|
|
+ i64x2, u64x2, f64x2, i128x1, u128x1,
|
|
+ // Masks:
|
|
+ m8x16, m16x8 |
|
|
+ test: test_v128
|
|
+);
|
|
+
|
|
+impl_arch!(
|
|
+ [powerpc["powerpc"]: vector_bool_int],
|
|
+ [powerpc64["powerpc64"]: vector_bool_int] |
|
|
+ from: m32x4, m64x2, m128x1 |
|
|
+ into: i8x16, u8x16, i16x8, u16x8, i32x4, u32x4, f32x4,
|
|
+ i64x2, u64x2, f64x2, i128x1, u128x1,
|
|
+ // Masks:
|
|
+ m8x16, m16x8, m32x4 |
|
|
+ test: test_v128
|
|
+);
|
|
+
|
|
+impl_arch!(
|
|
+ [powerpc64["powerpc64"]: vector_bool_long] |
|
|
+ from: m64x2, m128x1 |
|
|
+ into: i8x16, u8x16, i16x8, u16x8, i32x4, u32x4, f32x4,
|
|
+ i64x2, u64x2, f64x2, i128x1, u128x1,
|
|
+ // Masks:
|
|
+ m8x16, m16x8, m32x4, m64x2 |
|
|
+ test: test_v128
|
|
+);
|
|
+
|
|
+////////////////////////////////////////////////////////////////////////////////
|
|
+// Implementations for the 256-bit wide vector types
|
|
+
|
|
+impl_arch!(
|
|
+ [x86["x86"]: __m256, __m256i, __m256d],
|
|
+ [x86_64["x86_64"]: __m256, __m256i, __m256d] |
|
|
+ from: i8x32, u8x32, m8x32, i16x16, u16x16, m16x16,
|
|
+ i32x8, u32x8, f32x8, m32x8,
|
|
+ i64x4, u64x4, f64x4, m64x4, i128x2, u128x2, m128x2 |
|
|
+ into: i8x32, u8x32, i16x16, u16x16, i32x8, u32x8, f32x8,
|
|
+ i64x4, u64x4, f64x4, i128x2, u128x2 |
|
|
+ test: test_v256
|
|
+);
|
|
+
|
|
+////////////////////////////////////////////////////////////////////////////////
|
|
+// FIXME: Implementations for the 512-bit wide vector types
|
|
diff --git a/third_party/rust/packed_simd/src/api/into_bits/macros.rs b/third_party/rust/packed_simd/src/api/into_bits/macros.rs
|
|
new file mode 100644
|
|
index 000000000000..8cec5b00479f
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/api/into_bits/macros.rs
|
|
@@ -0,0 +1,74 @@
|
|
+//! Macros implementing `FromBits`
|
|
+
|
|
+macro_rules! impl_from_bits_ {
|
|
+ ($id:ident[$test_tt:tt]: $from_ty:ident) => {
|
|
+ impl crate::api::into_bits::FromBits<$from_ty> for $id {
|
|
+ #[inline]
|
|
+ fn from_bits(x: $from_ty) -> Self {
|
|
+ unsafe { crate::mem::transmute(x) }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ test_if! {
|
|
+ $test_tt:
|
|
+ paste::item! {
|
|
+ pub mod [<$id _from_bits_ $from_ty>] {
|
|
+ use super::*;
|
|
+ #[cfg_attr(not(target_arch = "wasm32"), test)]
|
|
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ fn test() {
|
|
+ use crate::{
|
|
+ ptr::{read_unaligned},
|
|
+ mem::{size_of, zeroed}
|
|
+ };
|
|
+ use crate::IntoBits;
|
|
+ assert_eq!(size_of::<$id>(),
|
|
+ size_of::<$from_ty>());
|
|
+ // This is safe becasue we never create a reference to
|
|
+ // uninitialized memory:
|
|
+ let a: $from_ty = unsafe { zeroed() };
|
|
+
|
|
+ let b_0: $id = crate::FromBits::from_bits(a);
|
|
+ let b_1: $id = a.into_bits();
|
|
+
|
|
+ // Check that these are byte-wise equal, that is,
|
|
+ // that the bit patterns are identical:
|
|
+ for i in 0..size_of::<$id>() {
|
|
+ // This is safe because we only read initialized
|
|
+ // memory in bounds. Also, taking a reference to
|
|
+ // `b_i` is ok because the fields are initialized.
|
|
+ unsafe {
|
|
+ let b_0_v: u8 = read_unaligned(
|
|
+ (&b_0 as *const $id as *const u8)
|
|
+ .wrapping_add(i)
|
|
+ );
|
|
+ let b_1_v: u8 = read_unaligned(
|
|
+ (&b_1 as *const $id as *const u8)
|
|
+ .wrapping_add(i)
|
|
+ );
|
|
+ assert_eq!(b_0_v, b_1_v);
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+}
|
|
+
|
|
+macro_rules! impl_from_bits {
|
|
+ ($id:ident[$test_tt:tt]: $($from_ty:ident),*) => {
|
|
+ $(
|
|
+ impl_from_bits_!($id[$test_tt]: $from_ty);
|
|
+ )*
|
|
+ }
|
|
+}
|
|
+
|
|
+#[allow(unused)]
|
|
+macro_rules! impl_into_bits {
|
|
+ ($id:ident[$test_tt:tt]: $($from_ty:ident),*) => {
|
|
+ $(
|
|
+ impl_from_bits_!($from_ty[$test_tt]: $id);
|
|
+ )*
|
|
+ }
|
|
+}
|
|
diff --git a/third_party/rust/packed_simd/src/api/into_bits/v128.rs b/third_party/rust/packed_simd/src/api/into_bits/v128.rs
|
|
new file mode 100644
|
|
index 000000000000..804dbf282d53
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/api/into_bits/v128.rs
|
|
@@ -0,0 +1,28 @@
|
|
+//! `FromBits` and `IntoBits` implementations for portable 128-bit wide vectors
|
|
+#![rustfmt::skip]
|
|
+
|
|
+#[allow(unused)] // wasm_bindgen_test
|
|
+use crate::*;
|
|
+
|
|
+impl_from_bits!(i8x16[test_v128]: u8x16, m8x16, i16x8, u16x8, m16x8, i32x4, u32x4, f32x4, m32x4, i64x2, u64x2, f64x2, m64x2, i128x1, u128x1, m128x1);
|
|
+impl_from_bits!(u8x16[test_v128]: i8x16, m8x16, i16x8, u16x8, m16x8, i32x4, u32x4, f32x4, m32x4, i64x2, u64x2, f64x2, m64x2, i128x1, u128x1, m128x1);
|
|
+impl_from_bits!(m8x16[test_v128]: m16x8, m32x4, m64x2, m128x1);
|
|
+
|
|
+impl_from_bits!(i16x8[test_v128]: i8x16, u8x16, m8x16, u16x8, m16x8, i32x4, u32x4, f32x4, m32x4, i64x2, u64x2, f64x2, m64x2, i128x1, u128x1, m128x1);
|
|
+impl_from_bits!(u16x8[test_v128]: i8x16, u8x16, m8x16, i16x8, m16x8, i32x4, u32x4, f32x4, m32x4, i64x2, u64x2, f64x2, m64x2, i128x1, u128x1, m128x1);
|
|
+impl_from_bits!(m16x8[test_v128]: m32x4, m64x2, m128x1);
|
|
+
|
|
+impl_from_bits!(i32x4[test_v128]: i8x16, u8x16, m8x16, i16x8, u16x8, m16x8, u32x4, f32x4, m32x4, i64x2, u64x2, f64x2, m64x2, i128x1, u128x1, m128x1);
|
|
+impl_from_bits!(u32x4[test_v128]: i8x16, u8x16, m8x16, i16x8, u16x8, m16x8, i32x4, f32x4, m32x4, i64x2, u64x2, f64x2, m64x2, i128x1, u128x1, m128x1);
|
|
+impl_from_bits!(f32x4[test_v128]: i8x16, u8x16, m8x16, i16x8, u16x8, m16x8, i32x4, u32x4, m32x4, i64x2, u64x2, f64x2, m64x2, i128x1, u128x1, m128x1);
|
|
+impl_from_bits!(m32x4[test_v128]: m64x2, m128x1);
|
|
+
|
|
+impl_from_bits!(i64x2[test_v128]: i8x16, u8x16, m8x16, i16x8, u16x8, m16x8, i32x4, u32x4, f32x4, m32x4, u64x2, f64x2, m64x2, i128x1, u128x1, m128x1);
|
|
+impl_from_bits!(u64x2[test_v128]: i8x16, u8x16, m8x16, i16x8, u16x8, m16x8, i32x4, u32x4, f32x4, m32x4, i64x2, f64x2, m64x2, i128x1, u128x1, m128x1);
|
|
+impl_from_bits!(f64x2[test_v128]: i8x16, u8x16, m8x16, i16x8, u16x8, m16x8, i32x4, u32x4, f32x4, m32x4, i64x2, u64x2, m64x2, i128x1, u128x1, m128x1);
|
|
+impl_from_bits!(m64x2[test_v128]: m128x1);
|
|
+
|
|
+impl_from_bits!(i128x1[test_v128]: i8x16, u8x16, m8x16, i16x8, u16x8, m16x8, i32x4, u32x4, f32x4, m32x4, i64x2, u64x2, f64x2, m64x2, u128x1, m128x1);
|
|
+impl_from_bits!(u128x1[test_v128]: i8x16, u8x16, m8x16, i16x8, u16x8, m16x8, i32x4, u32x4, f32x4, m32x4, i64x2, u64x2, f64x2, m64x2, i128x1, m128x1);
|
|
+// note: m128x1 cannot be constructed from all the other masks bit patterns in here
|
|
+
|
|
diff --git a/third_party/rust/packed_simd/src/api/into_bits/v16.rs b/third_party/rust/packed_simd/src/api/into_bits/v16.rs
|
|
new file mode 100644
|
|
index 000000000000..1162a62e5bd1
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/api/into_bits/v16.rs
|
|
@@ -0,0 +1,9 @@
|
|
+//! `FromBits` and `IntoBits` implementations for portable 16-bit wide vectors
|
|
+#![rustfmt::skip]
|
|
+
|
|
+#[allow(unused)] // wasm_bindgen_test
|
|
+use crate::*;
|
|
+
|
|
+impl_from_bits!(i8x2[test_v16]: u8x2, m8x2);
|
|
+impl_from_bits!(u8x2[test_v16]: i8x2, m8x2);
|
|
+// note: m8x2 cannot be constructed from all i8x2 or u8x2 bit patterns
|
|
diff --git a/third_party/rust/packed_simd/src/api/into_bits/v256.rs b/third_party/rust/packed_simd/src/api/into_bits/v256.rs
|
|
new file mode 100644
|
|
index 000000000000..cc7a6646b535
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/api/into_bits/v256.rs
|
|
@@ -0,0 +1,27 @@
|
|
+//! `FromBits` and `IntoBits` implementations for portable 256-bit wide vectors
|
|
+#![rustfmt::skip]
|
|
+
|
|
+#[allow(unused)] // wasm_bindgen_test
|
|
+use crate::*;
|
|
+
|
|
+impl_from_bits!(i8x32[test_v256]: u8x32, m8x32, i16x16, u16x16, m16x16, i32x8, u32x8, f32x8, m32x8, i64x4, u64x4, f64x4, m64x4, i128x2, u128x2, m128x2);
|
|
+impl_from_bits!(u8x32[test_v256]: i8x32, m8x32, i16x16, u16x16, m16x16, i32x8, u32x8, f32x8, m32x8, i64x4, u64x4, f64x4, m64x4, i128x2, u128x2, m128x2);
|
|
+impl_from_bits!(m8x32[test_v256]: m16x16, m32x8, m64x4, m128x2);
|
|
+
|
|
+impl_from_bits!(i16x16[test_v256]: i8x32, u8x32, m8x32, u16x16, m16x16, i32x8, u32x8, f32x8, m32x8, i64x4, u64x4, f64x4, m64x4, i128x2, u128x2, m128x2);
|
|
+impl_from_bits!(u16x16[test_v256]: i8x32, u8x32, m8x32, i16x16, m16x16, i32x8, u32x8, f32x8, m32x8, i64x4, u64x4, f64x4, m64x4, i128x2, u128x2, m128x2);
|
|
+impl_from_bits!(m16x16[test_v256]: m32x8, m64x4, m128x2);
|
|
+
|
|
+impl_from_bits!(i32x8[test_v256]: i8x32, u8x32, m8x32, i16x16, u16x16, m16x16, u32x8, f32x8, m32x8, i64x4, u64x4, f64x4, m64x4, i128x2, u128x2, m128x2);
|
|
+impl_from_bits!(u32x8[test_v256]: i8x32, u8x32, m8x32, i16x16, u16x16, m16x16, i32x8, f32x8, m32x8, i64x4, u64x4, f64x4, m64x4, i128x2, u128x2, m128x2);
|
|
+impl_from_bits!(f32x8[test_v256]: i8x32, u8x32, m8x32, i16x16, u16x16, m16x16, i32x8, u32x8, m32x8, i64x4, u64x4, f64x4, m64x4, i128x2, u128x2, m128x2);
|
|
+impl_from_bits!(m32x8[test_v256]: m64x4, m128x2);
|
|
+
|
|
+impl_from_bits!(i64x4[test_v256]: i8x32, u8x32, m8x32, i16x16, u16x16, m16x16, i32x8, u32x8, f32x8, m32x8, u64x4, f64x4, m64x4, i128x2, u128x2, m128x2);
|
|
+impl_from_bits!(u64x4[test_v256]: i8x32, u8x32, m8x32, i16x16, u16x16, m16x16, i32x8, u32x8, f32x8, m32x8, i64x4, f64x4, m64x4, i128x2, u128x2, m128x2);
|
|
+impl_from_bits!(f64x4[test_v256]: i8x32, u8x32, m8x32, i16x16, u16x16, m16x16, i32x8, u32x8, f32x8, m32x8, i64x4, u64x4, m64x4, i128x2, u128x2, m128x2);
|
|
+impl_from_bits!(m64x4[test_v256]: m128x2);
|
|
+
|
|
+impl_from_bits!(i128x2[test_v256]: i8x32, u8x32, m8x32, i16x16, u16x16, m16x16, i32x8, u32x8, f32x8, m32x8, i64x4, u64x4, f64x4, m64x4, u128x2, m128x2);
|
|
+impl_from_bits!(u128x2[test_v256]: i8x32, u8x32, m8x32, i16x16, u16x16, m16x16, i32x8, u32x8, f32x8, m32x8, i64x4, u64x4, f64x4, m64x4, i128x2, m128x2);
|
|
+// note: m128x2 cannot be constructed from all the other masks bit patterns in here
|
|
diff --git a/third_party/rust/packed_simd/src/api/into_bits/v32.rs b/third_party/rust/packed_simd/src/api/into_bits/v32.rs
|
|
new file mode 100644
|
|
index 000000000000..2c183ecf1c77
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/api/into_bits/v32.rs
|
|
@@ -0,0 +1,13 @@
|
|
+//! `FromBits` and `IntoBits` implementations for portable 32-bit wide vectors
|
|
+#![rustfmt::skip]
|
|
+
|
|
+#[allow(unused)] // wasm_bindgen_test
|
|
+use crate::*;
|
|
+
|
|
+impl_from_bits!(i8x4[test_v32]: u8x4, m8x4, i16x2, u16x2, m16x2);
|
|
+impl_from_bits!(u8x4[test_v32]: i8x4, m8x4, i16x2, u16x2, m16x2);
|
|
+impl_from_bits!(m8x4[test_v32]: m16x2);
|
|
+
|
|
+impl_from_bits!(i16x2[test_v32]: i8x4, u8x4, m8x4, u16x2, m16x2);
|
|
+impl_from_bits!(u16x2[test_v32]: i8x4, u8x4, m8x4, i16x2, m16x2);
|
|
+// note: m16x2 cannot be constructed from all m8x4 bit patterns
|
|
diff --git a/third_party/rust/packed_simd/src/api/into_bits/v512.rs b/third_party/rust/packed_simd/src/api/into_bits/v512.rs
|
|
new file mode 100644
|
|
index 000000000000..8dec6a7f63a0
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/api/into_bits/v512.rs
|
|
@@ -0,0 +1,27 @@
|
|
+//! `FromBits` and `IntoBits` implementations for portable 512-bit wide vectors
|
|
+#![rustfmt::skip]
|
|
+
|
|
+#[allow(unused)] // wasm_bindgen_test
|
|
+use crate::*;
|
|
+
|
|
+impl_from_bits!(i8x64[test_v512]: u8x64, m8x64, i16x32, u16x32, m16x32, i32x16, u32x16, f32x16, m32x16, i64x8, u64x8, f64x8, m64x8, i128x4, u128x4, m128x4);
|
|
+impl_from_bits!(u8x64[test_v512]: i8x64, m8x64, i16x32, u16x32, m16x32, i32x16, u32x16, f32x16, m32x16, i64x8, u64x8, f64x8, m64x8, i128x4, u128x4, m128x4);
|
|
+impl_from_bits!(m8x64[test_v512]: m16x32, m32x16, m64x8, m128x4);
|
|
+
|
|
+impl_from_bits!(i16x32[test_v512]: i8x64, u8x64, m8x64, u16x32, m16x32, i32x16, u32x16, f32x16, m32x16, i64x8, u64x8, f64x8, m64x8, i128x4, u128x4, m128x4);
|
|
+impl_from_bits!(u16x32[test_v512]: i8x64, u8x64, m8x64, i16x32, m16x32, i32x16, u32x16, f32x16, m32x16, i64x8, u64x8, f64x8, m64x8, i128x4, u128x4, m128x4);
|
|
+impl_from_bits!(m16x32[test_v512]: m32x16, m64x8, m128x4);
|
|
+
|
|
+impl_from_bits!(i32x16[test_v512]: i8x64, u8x64, m8x64, i16x32, u16x32, m16x32, u32x16, f32x16, m32x16, i64x8, u64x8, f64x8, m64x8, i128x4, u128x4, m128x4);
|
|
+impl_from_bits!(u32x16[test_v512]: i8x64, u8x64, m8x64, i16x32, u16x32, m16x32, i32x16, f32x16, m32x16, i64x8, u64x8, f64x8, m64x8, i128x4, u128x4, m128x4);
|
|
+impl_from_bits!(f32x16[test_v512]: i8x64, u8x64, m8x64, i16x32, u16x32, m16x32, i32x16, u32x16, m32x16, i64x8, u64x8, f64x8, m64x8, i128x4, u128x4, m128x4);
|
|
+impl_from_bits!(m32x16[test_v512]: m64x8, m128x4);
|
|
+
|
|
+impl_from_bits!(i64x8[test_v512]: i8x64, u8x64, m8x64, i16x32, u16x32, m16x32, i32x16, u32x16, f32x16, m32x16, u64x8, f64x8, m64x8, i128x4, u128x4, m128x4);
|
|
+impl_from_bits!(u64x8[test_v512]: i8x64, u8x64, m8x64, i16x32, u16x32, m16x32, i32x16, u32x16, f32x16, m32x16, i64x8, f64x8, m64x8, i128x4, u128x4, m128x4);
|
|
+impl_from_bits!(f64x8[test_v512]: i8x64, u8x64, m8x64, i16x32, u16x32, m16x32, i32x16, u32x16, f32x16, m32x16, i64x8, u64x8, m64x8, i128x4, u128x4, m128x4);
|
|
+impl_from_bits!(m64x8[test_v512]: m128x4);
|
|
+
|
|
+impl_from_bits!(i128x4[test_v512]: i8x64, u8x64, m8x64, i16x32, u16x32, m16x32, i32x16, u32x16, f32x16, m32x16, i64x8, u64x8, f64x8, m64x8, u128x4, m128x4);
|
|
+impl_from_bits!(u128x4[test_v512]: i8x64, u8x64, m8x64, i16x32, u16x32, m16x32, i32x16, u32x16, f32x16, m32x16, i64x8, u64x8, f64x8, m64x8, i128x4, m128x4);
|
|
+// note: m128x4 cannot be constructed from all the other masks bit patterns in here
|
|
diff --git a/third_party/rust/packed_simd/src/api/into_bits/v64.rs b/third_party/rust/packed_simd/src/api/into_bits/v64.rs
|
|
new file mode 100644
|
|
index 000000000000..8999d98e13f8
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/api/into_bits/v64.rs
|
|
@@ -0,0 +1,18 @@
|
|
+//! `FromBits` and `IntoBits` implementations for portable 64-bit wide vectors
|
|
+#![rustfmt::skip]
|
|
+
|
|
+#[allow(unused)] // wasm_bindgen_test
|
|
+use crate::*;
|
|
+
|
|
+impl_from_bits!(i8x8[test_v64]: u8x8, m8x8, i16x4, u16x4, m16x4, i32x2, u32x2, f32x2, m32x2);
|
|
+impl_from_bits!(u8x8[test_v64]: i8x8, m8x8, i16x4, u16x4, m16x4, i32x2, u32x2, f32x2, m32x2);
|
|
+impl_from_bits!(m8x8[test_v64]: m16x4, m32x2);
|
|
+
|
|
+impl_from_bits!(i16x4[test_v64]: i8x8, u8x8, m8x8, u16x4, m16x4, i32x2, u32x2, f32x2, m32x2);
|
|
+impl_from_bits!(u16x4[test_v64]: i8x8, u8x8, m8x8, i16x4, m16x4, i32x2, u32x2, f32x2, m32x2);
|
|
+impl_from_bits!(m16x4[test_v64]: m32x2);
|
|
+
|
|
+impl_from_bits!(i32x2[test_v64]: i8x8, u8x8, m8x8, i16x4, u16x4, m16x4, u32x2, f32x2, m32x2);
|
|
+impl_from_bits!(u32x2[test_v64]: i8x8, u8x8, m8x8, i16x4, u16x4, m16x4, i32x2, f32x2, m32x2);
|
|
+impl_from_bits!(f32x2[test_v64]: i8x8, u8x8, m8x8, i16x4, u16x4, m16x4, i32x2, u32x2, m32x2);
|
|
+// note: m32x2 cannot be constructed from all m16x4 or m8x8 bit patterns
|
|
diff --git a/third_party/rust/packed_simd/src/api/math.rs b/third_party/rust/packed_simd/src/api/math.rs
|
|
new file mode 100644
|
|
index 000000000000..e7a8d256baf5
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/api/math.rs
|
|
@@ -0,0 +1,4 @@
|
|
+//! Implements vertical math operations
|
|
+
|
|
+#[macro_use]
|
|
+mod float;
|
|
diff --git a/third_party/rust/packed_simd/src/api/math/float.rs b/third_party/rust/packed_simd/src/api/math/float.rs
|
|
new file mode 100644
|
|
index 000000000000..c0ec46e91789
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/api/math/float.rs
|
|
@@ -0,0 +1,61 @@
|
|
+//! Implements vertical floating-point math operations.
|
|
+
|
|
+#[macro_use]
|
|
+mod abs;
|
|
+
|
|
+#[macro_use]
|
|
+mod consts;
|
|
+
|
|
+#[macro_use]
|
|
+mod cos;
|
|
+
|
|
+#[macro_use]
|
|
+mod exp;
|
|
+
|
|
+#[macro_use]
|
|
+mod powf;
|
|
+
|
|
+#[macro_use]
|
|
+mod ln;
|
|
+
|
|
+#[macro_use]
|
|
+mod mul_add;
|
|
+
|
|
+#[macro_use]
|
|
+mod mul_adde;
|
|
+
|
|
+#[macro_use]
|
|
+mod recpre;
|
|
+
|
|
+#[macro_use]
|
|
+mod rsqrte;
|
|
+
|
|
+#[macro_use]
|
|
+mod sin;
|
|
+
|
|
+#[macro_use]
|
|
+mod sqrt;
|
|
+
|
|
+#[macro_use]
|
|
+mod sqrte;
|
|
+
|
|
+macro_rules! impl_float_category {
|
|
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident, $mask_ty:ident) => {
|
|
+ impl $id {
|
|
+ #[inline]
|
|
+ pub fn is_nan(self) -> $mask_ty {
|
|
+ self.ne(self)
|
|
+ }
|
|
+
|
|
+ #[inline]
|
|
+ pub fn is_infinite(self) -> $mask_ty {
|
|
+ self.eq(Self::INFINITY) | self.eq(Self::NEG_INFINITY)
|
|
+ }
|
|
+
|
|
+ #[inline]
|
|
+ pub fn is_finite(self) -> $mask_ty {
|
|
+ !(self.is_nan() | self.is_infinite())
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+}
|
|
diff --git a/third_party/rust/packed_simd/src/api/math/float/abs.rs b/third_party/rust/packed_simd/src/api/math/float/abs.rs
|
|
new file mode 100644
|
|
index 000000000000..1865bdb68ec6
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/api/math/float/abs.rs
|
|
@@ -0,0 +1,31 @@
|
|
+//! Implements vertical (lane-wise) floating-point `abs`.
|
|
+
|
|
+macro_rules! impl_math_float_abs {
|
|
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
|
|
+ impl $id {
|
|
+ /// Absolute value.
|
|
+ #[inline]
|
|
+ pub fn abs(self) -> Self {
|
|
+ use crate::codegen::math::float::abs::Abs;
|
|
+ Abs::abs(self)
|
|
+ }
|
|
+ }
|
|
+
|
|
+ test_if!{
|
|
+ $test_tt:
|
|
+ paste::item! {
|
|
+ pub mod [<$id _math_abs>] {
|
|
+ use super::*;
|
|
+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ fn abs() {
|
|
+ let o = $id::splat(1 as $elem_ty);
|
|
+ assert_eq!(o, o.abs());
|
|
+
|
|
+ let mo = $id::splat(-1 as $elem_ty);
|
|
+ assert_eq!(o, mo.abs());
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+}
|
|
diff --git a/third_party/rust/packed_simd/src/api/math/float/consts.rs b/third_party/rust/packed_simd/src/api/math/float/consts.rs
|
|
new file mode 100644
|
|
index 000000000000..89f93a6d692b
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/api/math/float/consts.rs
|
|
@@ -0,0 +1,86 @@
|
|
+macro_rules! impl_float_consts {
|
|
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident) => {
|
|
+ impl $id {
|
|
+ /// Machine epsilon value.
|
|
+ pub const EPSILON: $id = $id::splat(core::$elem_ty::EPSILON);
|
|
+
|
|
+ /// Smallest finite value.
|
|
+ pub const MIN: $id = $id::splat(core::$elem_ty::MIN);
|
|
+
|
|
+ /// Smallest positive normal value.
|
|
+ pub const MIN_POSITIVE: $id =
|
|
+ $id::splat(core::$elem_ty::MIN_POSITIVE);
|
|
+
|
|
+ /// Largest finite value.
|
|
+ pub const MAX: $id = $id::splat(core::$elem_ty::MAX);
|
|
+
|
|
+ /// Not a Number (NaN).
|
|
+ pub const NAN: $id = $id::splat(core::$elem_ty::NAN);
|
|
+
|
|
+ /// Infinity (∞).
|
|
+ pub const INFINITY: $id = $id::splat(core::$elem_ty::INFINITY);
|
|
+
|
|
+ /// Negative infinity (-∞).
|
|
+ pub const NEG_INFINITY: $id =
|
|
+ $id::splat(core::$elem_ty::NEG_INFINITY);
|
|
+
|
|
+ /// Archimedes' constant (π)
|
|
+ pub const PI: $id = $id::splat(core::$elem_ty::consts::PI);
|
|
+
|
|
+ /// π/2
|
|
+ pub const FRAC_PI_2: $id =
|
|
+ $id::splat(core::$elem_ty::consts::FRAC_PI_2);
|
|
+
|
|
+ /// π/3
|
|
+ pub const FRAC_PI_3: $id =
|
|
+ $id::splat(core::$elem_ty::consts::FRAC_PI_3);
|
|
+
|
|
+ /// π/4
|
|
+ pub const FRAC_PI_4: $id =
|
|
+ $id::splat(core::$elem_ty::consts::FRAC_PI_4);
|
|
+
|
|
+ /// π/6
|
|
+ pub const FRAC_PI_6: $id =
|
|
+ $id::splat(core::$elem_ty::consts::FRAC_PI_6);
|
|
+
|
|
+ /// π/8
|
|
+ pub const FRAC_PI_8: $id =
|
|
+ $id::splat(core::$elem_ty::consts::FRAC_PI_8);
|
|
+
|
|
+ /// 1/π
|
|
+ pub const FRAC_1_PI: $id =
|
|
+ $id::splat(core::$elem_ty::consts::FRAC_1_PI);
|
|
+
|
|
+ /// 2/π
|
|
+ pub const FRAC_2_PI: $id =
|
|
+ $id::splat(core::$elem_ty::consts::FRAC_2_PI);
|
|
+
|
|
+ /// 2/sqrt(π)
|
|
+ pub const FRAC_2_SQRT_PI: $id =
|
|
+ $id::splat(core::$elem_ty::consts::FRAC_2_SQRT_PI);
|
|
+
|
|
+ /// sqrt(2)
|
|
+ pub const SQRT_2: $id = $id::splat(core::$elem_ty::consts::SQRT_2);
|
|
+
|
|
+ /// 1/sqrt(2)
|
|
+ pub const FRAC_1_SQRT_2: $id =
|
|
+ $id::splat(core::$elem_ty::consts::FRAC_1_SQRT_2);
|
|
+
|
|
+ /// Euler's number (e)
|
|
+ pub const E: $id = $id::splat(core::$elem_ty::consts::E);
|
|
+
|
|
+ /// log<sub>2</sub>(e)
|
|
+ pub const LOG2_E: $id = $id::splat(core::$elem_ty::consts::LOG2_E);
|
|
+
|
|
+ /// log<sub>10</sub>(e)
|
|
+ pub const LOG10_E: $id =
|
|
+ $id::splat(core::$elem_ty::consts::LOG10_E);
|
|
+
|
|
+ /// ln(2)
|
|
+ pub const LN_2: $id = $id::splat(core::$elem_ty::consts::LN_2);
|
|
+
|
|
+ /// ln(10)
|
|
+ pub const LN_10: $id = $id::splat(core::$elem_ty::consts::LN_10);
|
|
+ }
|
|
+ };
|
|
+}
|
|
diff --git a/third_party/rust/packed_simd/src/api/math/float/cos.rs b/third_party/rust/packed_simd/src/api/math/float/cos.rs
|
|
new file mode 100644
|
|
index 000000000000..e5b8f46036c7
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/api/math/float/cos.rs
|
|
@@ -0,0 +1,44 @@
|
|
+//! Implements vertical (lane-wise) floating-point `cos`.
|
|
+
|
|
+macro_rules! impl_math_float_cos {
|
|
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
|
|
+ impl $id {
|
|
+ /// Cosine.
|
|
+ #[inline]
|
|
+ pub fn cos(self) -> Self {
|
|
+ use crate::codegen::math::float::cos::Cos;
|
|
+ Cos::cos(self)
|
|
+ }
|
|
+
|
|
+ /// Cosine of `self * PI`.
|
|
+ #[inline]
|
|
+ pub fn cos_pi(self) -> Self {
|
|
+ use crate::codegen::math::float::cos_pi::CosPi;
|
|
+ CosPi::cos_pi(self)
|
|
+ }
|
|
+ }
|
|
+
|
|
+ test_if!{
|
|
+ $test_tt:
|
|
+ paste::item! {
|
|
+ pub mod [<$id _math_cos>] {
|
|
+ use super::*;
|
|
+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ fn cos() {
|
|
+ use crate::$elem_ty::consts::PI;
|
|
+ let z = $id::splat(0 as $elem_ty);
|
|
+ let o = $id::splat(1 as $elem_ty);
|
|
+ let p = $id::splat(PI as $elem_ty);
|
|
+ let ph = $id::splat(PI as $elem_ty / 2.);
|
|
+ let z_r = $id::splat((PI as $elem_ty / 2.).cos());
|
|
+ let o_r = $id::splat((PI as $elem_ty).cos());
|
|
+
|
|
+ assert_eq!(o, z.cos());
|
|
+ assert_eq!(z_r, ph.cos());
|
|
+ assert_eq!(o_r, p.cos());
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+}
|
|
diff --git a/third_party/rust/packed_simd/src/api/math/float/exp.rs b/third_party/rust/packed_simd/src/api/math/float/exp.rs
|
|
new file mode 100644
|
|
index 000000000000..e3356d853a83
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/api/math/float/exp.rs
|
|
@@ -0,0 +1,33 @@
|
|
+//! Implements vertical (lane-wise) floating-point `exp`.
|
|
+
|
|
+macro_rules! impl_math_float_exp {
|
|
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
|
|
+ impl $id {
|
|
+ /// Returns the exponential function of `self`: `e^(self)`.
|
|
+ #[inline]
|
|
+ pub fn exp(self) -> Self {
|
|
+ use crate::codegen::math::float::exp::Exp;
|
|
+ Exp::exp(self)
|
|
+ }
|
|
+ }
|
|
+
|
|
+ test_if!{
|
|
+ $test_tt:
|
|
+ paste::item! {
|
|
+ pub mod [<$id _math_exp>] {
|
|
+ use super::*;
|
|
+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ fn exp() {
|
|
+ let z = $id::splat(0 as $elem_ty);
|
|
+ let o = $id::splat(1 as $elem_ty);
|
|
+ assert_eq!(o, z.exp());
|
|
+
|
|
+ let e = $id::splat(crate::f64::consts::E as $elem_ty);
|
|
+ let tol = $id::splat(2.4e-4 as $elem_ty);
|
|
+ assert!((e - o.exp()).abs().le(tol).all());
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+}
|
|
diff --git a/third_party/rust/packed_simd/src/api/math/float/ln.rs b/third_party/rust/packed_simd/src/api/math/float/ln.rs
|
|
new file mode 100644
|
|
index 000000000000..5ceb9173ae05
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/api/math/float/ln.rs
|
|
@@ -0,0 +1,33 @@
|
|
+//! Implements vertical (lane-wise) floating-point `ln`.
|
|
+
|
|
+macro_rules! impl_math_float_ln {
|
|
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
|
|
+ impl $id {
|
|
+ /// Returns the natural logarithm of `self`.
|
|
+ #[inline]
|
|
+ pub fn ln(self) -> Self {
|
|
+ use crate::codegen::math::float::ln::Ln;
|
|
+ Ln::ln(self)
|
|
+ }
|
|
+ }
|
|
+
|
|
+ test_if!{
|
|
+ $test_tt:
|
|
+ paste::item! {
|
|
+ pub mod [<$id _math_ln>] {
|
|
+ use super::*;
|
|
+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ fn ln() {
|
|
+ let z = $id::splat(0 as $elem_ty);
|
|
+ let o = $id::splat(1 as $elem_ty);
|
|
+ assert_eq!(z, o.ln());
|
|
+
|
|
+ let e = $id::splat(crate::f64::consts::E as $elem_ty);
|
|
+ let tol = $id::splat(2.4e-4 as $elem_ty);
|
|
+ assert!((o - e.ln()).abs().le(tol).all());
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+}
|
|
diff --git a/third_party/rust/packed_simd/src/api/math/float/mul_add.rs b/third_party/rust/packed_simd/src/api/math/float/mul_add.rs
|
|
new file mode 100644
|
|
index 000000000000..4b170ee2b755
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/api/math/float/mul_add.rs
|
|
@@ -0,0 +1,44 @@
|
|
+//! Implements vertical (lane-wise) floating-point `mul_add`.
|
|
+
|
|
+macro_rules! impl_math_float_mul_add {
|
|
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
|
|
+ impl $id {
|
|
+ /// Fused multiply add: `self * y + z`
|
|
+ #[inline]
|
|
+ pub fn mul_add(self, y: Self, z: Self) -> Self {
|
|
+ use crate::codegen::math::float::mul_add::MulAdd;
|
|
+ MulAdd::mul_add(self, y, z)
|
|
+ }
|
|
+ }
|
|
+
|
|
+ test_if!{
|
|
+ $test_tt:
|
|
+ paste::item! {
|
|
+ pub mod [<$id _math_mul_add>] {
|
|
+ use super::*;
|
|
+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ fn mul_add() {
|
|
+ let z = $id::splat(0 as $elem_ty);
|
|
+ let o = $id::splat(1 as $elem_ty);
|
|
+ let t = $id::splat(2 as $elem_ty);
|
|
+ let t3 = $id::splat(3 as $elem_ty);
|
|
+ let f = $id::splat(4 as $elem_ty);
|
|
+
|
|
+ assert_eq!(z, z.mul_add(z, z));
|
|
+ assert_eq!(o, o.mul_add(o, z));
|
|
+ assert_eq!(o, o.mul_add(z, o));
|
|
+ assert_eq!(o, z.mul_add(o, o));
|
|
+
|
|
+ assert_eq!(t, o.mul_add(o, o));
|
|
+ assert_eq!(t, o.mul_add(t, z));
|
|
+ assert_eq!(t, t.mul_add(o, z));
|
|
+
|
|
+ assert_eq!(f, t.mul_add(t, z));
|
|
+ assert_eq!(f, t.mul_add(o, t));
|
|
+ assert_eq!(t3, t.mul_add(o, o));
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+}
|
|
diff --git a/third_party/rust/packed_simd/src/api/math/float/mul_adde.rs b/third_party/rust/packed_simd/src/api/math/float/mul_adde.rs
|
|
new file mode 100644
|
|
index 000000000000..c5b27110f2d7
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/api/math/float/mul_adde.rs
|
|
@@ -0,0 +1,48 @@
|
|
+//! Implements vertical (lane-wise) floating-point `mul_adde`.
|
|
+
|
|
+macro_rules! impl_math_float_mul_adde {
|
|
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
|
|
+ impl $id {
|
|
+ /// Fused multiply add estimate: ~= `self * y + z`
|
|
+ ///
|
|
+ /// While fused multiply-add (`fma`) has infinite precision,
|
|
+ /// `mul_adde` has _at worst_ the same precision of a multiply followed by an add.
|
|
+ /// This might be more efficient on architectures that do not have an `fma` instruction.
|
|
+ #[inline]
|
|
+ pub fn mul_adde(self, y: Self, z: Self) -> Self {
|
|
+ use crate::codegen::math::float::mul_adde::MulAddE;
|
|
+ MulAddE::mul_adde(self, y, z)
|
|
+ }
|
|
+ }
|
|
+
|
|
+ test_if!{
|
|
+ $test_tt:
|
|
+ paste::item! {
|
|
+ pub mod [<$id _math_mul_adde>] {
|
|
+ use super::*;
|
|
+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ fn mul_adde() {
|
|
+ let z = $id::splat(0 as $elem_ty);
|
|
+ let o = $id::splat(1 as $elem_ty);
|
|
+ let t = $id::splat(2 as $elem_ty);
|
|
+ let t3 = $id::splat(3 as $elem_ty);
|
|
+ let f = $id::splat(4 as $elem_ty);
|
|
+
|
|
+ assert_eq!(z, z.mul_adde(z, z));
|
|
+ assert_eq!(o, o.mul_adde(o, z));
|
|
+ assert_eq!(o, o.mul_adde(z, o));
|
|
+ assert_eq!(o, z.mul_adde(o, o));
|
|
+
|
|
+ assert_eq!(t, o.mul_adde(o, o));
|
|
+ assert_eq!(t, o.mul_adde(t, z));
|
|
+ assert_eq!(t, t.mul_adde(o, z));
|
|
+
|
|
+ assert_eq!(f, t.mul_adde(t, z));
|
|
+ assert_eq!(f, t.mul_adde(o, t));
|
|
+ assert_eq!(t3, t.mul_adde(o, o));
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+}
|
|
diff --git a/third_party/rust/packed_simd/src/api/math/float/powf.rs b/third_party/rust/packed_simd/src/api/math/float/powf.rs
|
|
new file mode 100644
|
|
index 000000000000..83dc9ff9c05e
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/api/math/float/powf.rs
|
|
@@ -0,0 +1,36 @@
|
|
+//! Implements vertical (lane-wise) floating-point `powf`.
|
|
+
|
|
+macro_rules! impl_math_float_powf {
|
|
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
|
|
+ impl $id {
|
|
+ /// Raises `self` number to the floating point power of `x`.
|
|
+ #[inline]
|
|
+ pub fn powf(self, x: Self) -> Self {
|
|
+ use crate::codegen::math::float::powf::Powf;
|
|
+ Powf::powf(self, x)
|
|
+ }
|
|
+ }
|
|
+
|
|
+ test_if!{
|
|
+ $test_tt:
|
|
+ paste::item! {
|
|
+ pub mod [<$id _math_powf>] {
|
|
+ use super::*;
|
|
+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ fn powf() {
|
|
+ let z = $id::splat(0 as $elem_ty);
|
|
+ let o = $id::splat(1 as $elem_ty);
|
|
+ let t = $id::splat(2 as $elem_ty);
|
|
+ assert_eq!(o, o.powf(z));
|
|
+ assert_eq!(o, t.powf(z));
|
|
+ assert_eq!(o, o.powf(o));
|
|
+ assert_eq!(t, t.powf(o));
|
|
+
|
|
+ let f = $id::splat(4 as $elem_ty);
|
|
+ assert_eq!(f, t.powf(t));
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+}
|
|
diff --git a/third_party/rust/packed_simd/src/api/math/float/recpre.rs b/third_party/rust/packed_simd/src/api/math/float/recpre.rs
|
|
new file mode 100644
|
|
index 000000000000..127f0b2ff674
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/api/math/float/recpre.rs
|
|
@@ -0,0 +1,36 @@
|
|
+//! Implements vertical (lane-wise) floating-point `recpre`.
|
|
+
|
|
+macro_rules! impl_math_float_recpre {
|
|
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
|
|
+ impl $id {
|
|
+ /// Reciprocal estimate: `~= 1. / self`.
|
|
+ ///
|
|
+ /// FIXME: The precision of the estimate is currently unspecified.
|
|
+ #[inline]
|
|
+ pub fn recpre(self) -> Self {
|
|
+ $id::splat(1.) / self
|
|
+ }
|
|
+ }
|
|
+
|
|
+ test_if!{
|
|
+ $test_tt:
|
|
+ paste::item! {
|
|
+ pub mod [<$id _math_recpre>] {
|
|
+ use super::*;
|
|
+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ fn recpre() {
|
|
+ let tol = $id::splat(2.4e-4 as $elem_ty);
|
|
+ let o = $id::splat(1 as $elem_ty);
|
|
+ let error = (o - o.recpre()).abs();
|
|
+ assert!(error.le(tol).all());
|
|
+
|
|
+ let t = $id::splat(2 as $elem_ty);
|
|
+ let e = 0.5;
|
|
+ let error = (e - t.recpre()).abs();
|
|
+ assert!(error.le(tol).all());
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+}
|
|
diff --git a/third_party/rust/packed_simd/src/api/math/float/rsqrte.rs b/third_party/rust/packed_simd/src/api/math/float/rsqrte.rs
|
|
new file mode 100644
|
|
index 000000000000..c77977f7b1cd
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/api/math/float/rsqrte.rs
|
|
@@ -0,0 +1,40 @@
|
|
+//! Implements vertical (lane-wise) floating-point `rsqrte`.
|
|
+
|
|
+macro_rules! impl_math_float_rsqrte {
|
|
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
|
|
+ impl $id {
|
|
+ /// Reciprocal square-root estimate: `~= 1. / self.sqrt()`.
|
|
+ ///
|
|
+ /// FIXME: The precision of the estimate is currently unspecified.
|
|
+ #[inline]
|
|
+ pub fn rsqrte(self) -> Self {
|
|
+ unsafe {
|
|
+ use crate::llvm::simd_fsqrt;
|
|
+ $id::splat(1.) / Simd(simd_fsqrt(self.0))
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ test_if!{
|
|
+ $test_tt:
|
|
+ paste::item! {
|
|
+ pub mod [<$id _math_rsqrte>] {
|
|
+ use super::*;
|
|
+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ fn rsqrte() {
|
|
+ use crate::$elem_ty::consts::SQRT_2;
|
|
+ let tol = $id::splat(2.4e-4 as $elem_ty);
|
|
+ let o = $id::splat(1 as $elem_ty);
|
|
+ let error = (o - o.rsqrte()).abs();
|
|
+ assert!(error.le(tol).all());
|
|
+
|
|
+ let t = $id::splat(2 as $elem_ty);
|
|
+ let e = 1. / SQRT_2;
|
|
+ let error = (e - t.rsqrte()).abs();
|
|
+ assert!(error.le(tol).all());
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+}
|
|
diff --git a/third_party/rust/packed_simd/src/api/math/float/sin.rs b/third_party/rust/packed_simd/src/api/math/float/sin.rs
|
|
new file mode 100644
|
|
index 000000000000..49908319b126
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/api/math/float/sin.rs
|
|
@@ -0,0 +1,50 @@
|
|
+//! Implements vertical (lane-wise) floating-point `sin`.
|
|
+
|
|
+macro_rules! impl_math_float_sin {
|
|
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
|
|
+ impl $id {
|
|
+ /// Sine.
|
|
+ #[inline]
|
|
+ pub fn sin(self) -> Self {
|
|
+ use crate::codegen::math::float::sin::Sin;
|
|
+ Sin::sin(self)
|
|
+ }
|
|
+
|
|
+ /// Sine of `self * PI`.
|
|
+ #[inline]
|
|
+ pub fn sin_pi(self) -> Self {
|
|
+ use crate::codegen::math::float::sin_pi::SinPi;
|
|
+ SinPi::sin_pi(self)
|
|
+ }
|
|
+
|
|
+ /// Sine and cosine of `self * PI`.
|
|
+ #[inline]
|
|
+ pub fn sin_cos_pi(self) -> (Self, Self) {
|
|
+ use crate::codegen::math::float::sin_cos_pi::SinCosPi;
|
|
+ SinCosPi::sin_cos_pi(self)
|
|
+ }
|
|
+ }
|
|
+
|
|
+ test_if!{
|
|
+ $test_tt:
|
|
+ paste::item! {
|
|
+ pub mod [<$id _math_sin>] {
|
|
+ use super::*;
|
|
+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ fn sin() {
|
|
+ use crate::$elem_ty::consts::PI;
|
|
+ let z = $id::splat(0 as $elem_ty);
|
|
+ let p = $id::splat(PI as $elem_ty);
|
|
+ let ph = $id::splat(PI as $elem_ty / 2.);
|
|
+ let o_r = $id::splat((PI as $elem_ty / 2.).sin());
|
|
+ let z_r = $id::splat((PI as $elem_ty).sin());
|
|
+
|
|
+ assert_eq!(z, z.sin());
|
|
+ assert_eq!(o_r, ph.sin());
|
|
+ assert_eq!(z_r, p.sin());
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+}
|
|
diff --git a/third_party/rust/packed_simd/src/api/math/float/sqrt.rs b/third_party/rust/packed_simd/src/api/math/float/sqrt.rs
|
|
new file mode 100644
|
|
index 000000000000..ae624122d0e2
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/api/math/float/sqrt.rs
|
|
@@ -0,0 +1,35 @@
|
|
+//! Implements vertical (lane-wise) floating-point `sqrt`.
|
|
+
|
|
+macro_rules! impl_math_float_sqrt {
|
|
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
|
|
+ impl $id {
|
|
+ #[inline]
|
|
+ pub fn sqrt(self) -> Self {
|
|
+ use crate::codegen::math::float::sqrt::Sqrt;
|
|
+ Sqrt::sqrt(self)
|
|
+ }
|
|
+ }
|
|
+
|
|
+ test_if!{
|
|
+ $test_tt:
|
|
+ paste::item! {
|
|
+ pub mod [<$id _math_sqrt>] {
|
|
+ use super::*;
|
|
+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ fn sqrt() {
|
|
+ use crate::$elem_ty::consts::SQRT_2;
|
|
+ let z = $id::splat(0 as $elem_ty);
|
|
+ let o = $id::splat(1 as $elem_ty);
|
|
+ assert_eq!(z, z.sqrt());
|
|
+ assert_eq!(o, o.sqrt());
|
|
+
|
|
+ let t = $id::splat(2 as $elem_ty);
|
|
+ let e = $id::splat(SQRT_2);
|
|
+ assert_eq!(e, t.sqrt());
|
|
+
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+}
|
|
diff --git a/third_party/rust/packed_simd/src/api/math/float/sqrte.rs b/third_party/rust/packed_simd/src/api/math/float/sqrte.rs
|
|
new file mode 100644
|
|
index 000000000000..f7ffad748d9c
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/api/math/float/sqrte.rs
|
|
@@ -0,0 +1,44 @@
|
|
+//! Implements vertical (lane-wise) floating-point `sqrte`.
|
|
+
|
|
+macro_rules! impl_math_float_sqrte {
|
|
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
|
|
+ impl $id {
|
|
+ /// Square-root estimate.
|
|
+ ///
|
|
+ /// FIXME: The precision of the estimate is currently unspecified.
|
|
+ #[inline]
|
|
+ pub fn sqrte(self) -> Self {
|
|
+ use crate::codegen::math::float::sqrte::Sqrte;
|
|
+ Sqrte::sqrte(self)
|
|
+ }
|
|
+ }
|
|
+
|
|
+ test_if!{
|
|
+ $test_tt:
|
|
+ paste::item! {
|
|
+ pub mod [<$id _math_sqrte>] {
|
|
+ use super::*;
|
|
+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ fn sqrte() {
|
|
+ use crate::$elem_ty::consts::SQRT_2;
|
|
+ let tol = $id::splat(2.4e-4 as $elem_ty);
|
|
+
|
|
+ let z = $id::splat(0 as $elem_ty);
|
|
+ let error = (z - z.sqrte()).abs();
|
|
+ assert!(error.le(tol).all());
|
|
+
|
|
+ let o = $id::splat(1 as $elem_ty);
|
|
+ let error = (o - o.sqrte()).abs();
|
|
+ assert!(error.le(tol).all());
|
|
+
|
|
+ let t = $id::splat(2 as $elem_ty);
|
|
+ let e = $id::splat(SQRT_2 as $elem_ty);
|
|
+ let error = (e - t.sqrte()).abs();
|
|
+
|
|
+ assert!(error.le(tol).all());
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+}
|
|
diff --git a/third_party/rust/packed_simd/src/api/minimal.rs b/third_party/rust/packed_simd/src/api/minimal.rs
|
|
new file mode 100644
|
|
index 000000000000..840d9e32585d
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/api/minimal.rs
|
|
@@ -0,0 +1,6 @@
|
|
+#[macro_use]
|
|
+mod iuf;
|
|
+#[macro_use]
|
|
+mod mask;
|
|
+#[macro_use]
|
|
+mod ptr;
|
|
diff --git a/third_party/rust/packed_simd/src/api/minimal/iuf.rs b/third_party/rust/packed_simd/src/api/minimal/iuf.rs
|
|
new file mode 100644
|
|
index 000000000000..58ffabab994f
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/api/minimal/iuf.rs
|
|
@@ -0,0 +1,167 @@
|
|
+//! Minimal API of signed integer, unsigned integer, and floating-point
|
|
+//! vectors.
|
|
+
|
|
+macro_rules! impl_minimal_iuf {
|
|
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $ielem_ty:ident |
|
|
+ $test_tt:tt | $($elem_name:ident),+ | $(#[$doc:meta])*) => {
|
|
+
|
|
+ $(#[$doc])*
|
|
+ pub type $id = Simd<[$elem_ty; $elem_count]>;
|
|
+
|
|
+ impl sealed::Simd for $id {
|
|
+ type Element = $elem_ty;
|
|
+ const LANES: usize = $elem_count;
|
|
+ type LanesType = [u32; $elem_count];
|
|
+ }
|
|
+
|
|
+ impl $id {
|
|
+ /// Creates a new instance with each vector elements initialized
|
|
+ /// with the provided values.
|
|
+ #[inline]
|
|
+ #[allow(clippy::too_many_arguments)]
|
|
+ pub const fn new($($elem_name: $elem_ty),*) -> Self {
|
|
+ Simd(codegen::$id($($elem_name as $ielem_ty),*))
|
|
+ }
|
|
+
|
|
+ /// Returns the number of vector lanes.
|
|
+ #[inline]
|
|
+ pub const fn lanes() -> usize {
|
|
+ $elem_count
|
|
+ }
|
|
+
|
|
+ /// Constructs a new instance with each element initialized to
|
|
+ /// `value`.
|
|
+ #[inline]
|
|
+ pub const fn splat(value: $elem_ty) -> Self {
|
|
+ Simd(codegen::$id($({
|
|
+ #[allow(non_camel_case_types, dead_code)]
|
|
+ struct $elem_name;
|
|
+ value as $ielem_ty
|
|
+ }),*))
|
|
+ }
|
|
+
|
|
+ /// Extracts the value at `index`.
|
|
+ ///
|
|
+ /// # Panics
|
|
+ ///
|
|
+ /// If `index >= Self::lanes()`.
|
|
+ #[inline]
|
|
+ pub fn extract(self, index: usize) -> $elem_ty {
|
|
+ assert!(index < $elem_count);
|
|
+ unsafe { self.extract_unchecked(index) }
|
|
+ }
|
|
+
|
|
+ /// Extracts the value at `index`.
|
|
+ ///
|
|
+ /// # Precondition
|
|
+ ///
|
|
+ /// If `index >= Self::lanes()` the behavior is undefined.
|
|
+ #[inline]
|
|
+ pub unsafe fn extract_unchecked(self, index: usize) -> $elem_ty {
|
|
+ use crate::llvm::simd_extract;
|
|
+ let e: $ielem_ty = simd_extract(self.0, index as u32);
|
|
+ e as $elem_ty
|
|
+ }
|
|
+
|
|
+ /// Returns a new vector where the value at `index` is replaced by `new_value`.
|
|
+ ///
|
|
+ /// # Panics
|
|
+ ///
|
|
+ /// If `index >= Self::lanes()`.
|
|
+ #[inline]
|
|
+ #[must_use = "replace does not modify the original value - \
|
|
+ it returns a new vector with the value at `index` \
|
|
+ replaced by `new_value`d"
|
|
+ ]
|
|
+ pub fn replace(self, index: usize, new_value: $elem_ty) -> Self {
|
|
+ assert!(index < $elem_count);
|
|
+ unsafe { self.replace_unchecked(index, new_value) }
|
|
+ }
|
|
+
|
|
+ /// Returns a new vector where the value at `index` is replaced by `new_value`.
|
|
+ ///
|
|
+ /// # Precondition
|
|
+ ///
|
|
+ /// If `index >= Self::lanes()` the behavior is undefined.
|
|
+ #[inline]
|
|
+ #[must_use = "replace_unchecked does not modify the original value - \
|
|
+ it returns a new vector with the value at `index` \
|
|
+ replaced by `new_value`d"
|
|
+ ]
|
|
+ pub unsafe fn replace_unchecked(
|
|
+ self,
|
|
+ index: usize,
|
|
+ new_value: $elem_ty,
|
|
+ ) -> Self {
|
|
+ use crate::llvm::simd_insert;
|
|
+ Simd(simd_insert(self.0, index as u32, new_value as $ielem_ty))
|
|
+ }
|
|
+ }
|
|
+
|
|
+ test_if!{
|
|
+ $test_tt:
|
|
+ paste::item! {
|
|
+ pub mod [<$id _minimal>] {
|
|
+ use super::*;
|
|
+ #[cfg_attr(not(target_arch = "wasm32"), test)]
|
|
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ fn minimal() {
|
|
+ // lanes:
|
|
+ assert_eq!($elem_count, $id::lanes());
|
|
+
|
|
+ // splat and extract / extract_unchecked:
|
|
+ const VAL: $elem_ty = 7 as $elem_ty;
|
|
+ const VEC: $id = $id::splat(VAL);
|
|
+ for i in 0..$id::lanes() {
|
|
+ assert_eq!(VAL, VEC.extract(i));
|
|
+ assert_eq!(
|
|
+ VAL, unsafe { VEC.extract_unchecked(i) }
|
|
+ );
|
|
+ }
|
|
+
|
|
+ // replace / replace_unchecked
|
|
+ let new_vec = VEC.replace(0, 42 as $elem_ty);
|
|
+ for i in 0..$id::lanes() {
|
|
+ if i == 0 {
|
|
+ assert_eq!(42 as $elem_ty, new_vec.extract(i));
|
|
+ } else {
|
|
+ assert_eq!(VAL, new_vec.extract(i));
|
|
+ }
|
|
+ }
|
|
+ let new_vec = unsafe {
|
|
+ VEC.replace_unchecked(0, 42 as $elem_ty)
|
|
+ };
|
|
+ for i in 0..$id::lanes() {
|
|
+ if i == 0 {
|
|
+ assert_eq!(42 as $elem_ty, new_vec.extract(i));
|
|
+ } else {
|
|
+ assert_eq!(VAL, new_vec.extract(i));
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ // FIXME: wasm-bindgen-test does not support #[should_panic]
|
|
+ // #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ #[cfg(not(target_arch = "wasm32"))]
|
|
+ #[test]
|
|
+ #[should_panic]
|
|
+ fn extract_panic_oob() {
|
|
+ const VAL: $elem_ty = 7 as $elem_ty;
|
|
+ const VEC: $id = $id::splat(VAL);
|
|
+ let _ = VEC.extract($id::lanes());
|
|
+ }
|
|
+ // FIXME: wasm-bindgen-test does not support #[should_panic]
|
|
+ // #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ #[cfg(not(target_arch = "wasm32"))]
|
|
+ #[test]
|
|
+ #[should_panic]
|
|
+ fn replace_panic_oob() {
|
|
+ const VAL: $elem_ty = 7 as $elem_ty;
|
|
+ const VEC: $id = $id::splat(VAL);
|
|
+ let _ = VEC.replace($id::lanes(), 42 as $elem_ty);
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+}
|
|
diff --git a/third_party/rust/packed_simd/src/api/minimal/mask.rs b/third_party/rust/packed_simd/src/api/minimal/mask.rs
|
|
new file mode 100644
|
|
index 000000000000..e65be95db12c
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/api/minimal/mask.rs
|
|
@@ -0,0 +1,174 @@
|
|
+//! Minimal API of mask vectors.
|
|
+
|
|
+macro_rules! impl_minimal_mask {
|
|
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $ielem_ty:ident
|
|
+ | $test_tt:tt | $($elem_name:ident),+ | $(#[$doc:meta])*) => {
|
|
+ $(#[$doc])*
|
|
+ pub type $id = Simd<[$elem_ty; $elem_count]>;
|
|
+
|
|
+ impl sealed::Simd for $id {
|
|
+ type Element = $elem_ty;
|
|
+ const LANES: usize = $elem_count;
|
|
+ type LanesType = [u32; $elem_count];
|
|
+ }
|
|
+
|
|
+ impl $id {
|
|
+ /// Creates a new instance with each vector elements initialized
|
|
+ /// with the provided values.
|
|
+ #[inline]
|
|
+ #[allow(clippy::too_many_arguments)]
|
|
+ pub const fn new($($elem_name: bool),*) -> Self {
|
|
+ Simd(codegen::$id($(Self::bool_to_internal($elem_name)),*))
|
|
+ }
|
|
+
|
|
+ /// Converts a boolean type into the type of the vector lanes.
|
|
+ #[inline]
|
|
+ #[allow(clippy::indexing_slicing)]
|
|
+ const fn bool_to_internal(x: bool) -> $ielem_ty {
|
|
+ [0 as $ielem_ty, !(0 as $ielem_ty)][x as usize]
|
|
+ }
|
|
+
|
|
+ /// Returns the number of vector lanes.
|
|
+ #[inline]
|
|
+ pub const fn lanes() -> usize {
|
|
+ $elem_count
|
|
+ }
|
|
+
|
|
+ /// Constructs a new instance with each element initialized to
|
|
+ /// `value`.
|
|
+ #[inline]
|
|
+ pub const fn splat(value: bool) -> Self {
|
|
+ Simd(codegen::$id($({
|
|
+ #[allow(non_camel_case_types, dead_code)]
|
|
+ struct $elem_name;
|
|
+ Self::bool_to_internal(value)
|
|
+ }),*))
|
|
+ }
|
|
+
|
|
+ /// Extracts the value at `index`.
|
|
+ ///
|
|
+ /// # Panics
|
|
+ ///
|
|
+ /// If `index >= Self::lanes()`.
|
|
+ #[inline]
|
|
+ pub fn extract(self, index: usize) -> bool {
|
|
+ assert!(index < $elem_count);
|
|
+ unsafe { self.extract_unchecked(index) }
|
|
+ }
|
|
+
|
|
+ /// Extracts the value at `index`.
|
|
+ ///
|
|
+ /// If `index >= Self::lanes()` the behavior is undefined.
|
|
+ #[inline]
|
|
+ pub unsafe fn extract_unchecked(self, index: usize) -> bool {
|
|
+ use crate::llvm::simd_extract;
|
|
+ let x: $ielem_ty = simd_extract(self.0, index as u32);
|
|
+ x != 0
|
|
+ }
|
|
+
|
|
+ /// Returns a new vector where the value at `index` is replaced by
|
|
+ /// `new_value`.
|
|
+ ///
|
|
+ /// # Panics
|
|
+ ///
|
|
+ /// If `index >= Self::lanes()`.
|
|
+ #[inline]
|
|
+ #[must_use = "replace does not modify the original value - \
|
|
+ it returns a new vector with the value at `index` \
|
|
+ replaced by `new_value`d"
|
|
+ ]
|
|
+ pub fn replace(self, index: usize, new_value: bool) -> Self {
|
|
+ assert!(index < $elem_count);
|
|
+ unsafe { self.replace_unchecked(index, new_value) }
|
|
+ }
|
|
+
|
|
+ /// Returns a new vector where the value at `index` is replaced by
|
|
+ /// `new_value`.
|
|
+ ///
|
|
+ /// # Panics
|
|
+ ///
|
|
+ /// If `index >= Self::lanes()`.
|
|
+ #[inline]
|
|
+ #[must_use = "replace_unchecked does not modify the original value - \
|
|
+ it returns a new vector with the value at `index` \
|
|
+ replaced by `new_value`d"
|
|
+ ]
|
|
+ pub unsafe fn replace_unchecked(
|
|
+ self,
|
|
+ index: usize,
|
|
+ new_value: bool,
|
|
+ ) -> Self {
|
|
+ use crate::llvm::simd_insert;
|
|
+ Simd(simd_insert(self.0, index as u32,
|
|
+ Self::bool_to_internal(new_value)))
|
|
+ }
|
|
+ }
|
|
+
|
|
+ test_if!{
|
|
+ $test_tt:
|
|
+ paste::item! {
|
|
+ pub mod [<$id _minimal>] {
|
|
+ use super::*;
|
|
+ #[cfg_attr(not(target_arch = "wasm32"), test)]
|
|
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ fn minimal() {
|
|
+ // TODO: test new
|
|
+
|
|
+ // lanes:
|
|
+ assert_eq!($elem_count, $id::lanes());
|
|
+
|
|
+ // splat and extract / extract_unchecked:
|
|
+ let vec = $id::splat(true);
|
|
+ for i in 0..$id::lanes() {
|
|
+ assert_eq!(true, vec.extract(i));
|
|
+ assert_eq!(true,
|
|
+ unsafe { vec.extract_unchecked(i) }
|
|
+ );
|
|
+ }
|
|
+
|
|
+ // replace / replace_unchecked
|
|
+ let new_vec = vec.replace(0, false);
|
|
+ for i in 0..$id::lanes() {
|
|
+ if i == 0 {
|
|
+ assert_eq!(false, new_vec.extract(i));
|
|
+ } else {
|
|
+ assert_eq!(true, new_vec.extract(i));
|
|
+ }
|
|
+ }
|
|
+ let new_vec = unsafe {
|
|
+ vec.replace_unchecked(0, false)
|
|
+ };
|
|
+ for i in 0..$id::lanes() {
|
|
+ if i == 0 {
|
|
+ assert_eq!(false, new_vec.extract(i));
|
|
+ } else {
|
|
+ assert_eq!(true, new_vec.extract(i));
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ // FIXME: wasm-bindgen-test does not support #[should_panic]
|
|
+ // #[cfg_attr(not(target_arch = "wasm32"), test)]
|
|
+ // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ #[cfg(not(target_arch = "wasm32"))]
|
|
+ #[test]
|
|
+ #[should_panic]
|
|
+ fn extract_panic_oob() {
|
|
+ let vec = $id::splat(false);
|
|
+ let _ = vec.extract($id::lanes());
|
|
+ }
|
|
+ // FIXME: wasm-bindgen-test does not support #[should_panic]
|
|
+ // #[cfg_attr(not(target_arch = "wasm32"), test)]
|
|
+ // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ #[cfg(not(target_arch = "wasm32"))]
|
|
+ #[test]
|
|
+ #[should_panic]
|
|
+ fn replace_panic_oob() {
|
|
+ let vec = $id::splat(false);
|
|
+ let _ = vec.replace($id::lanes(), true);
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+}
|
|
diff --git a/third_party/rust/packed_simd/src/api/minimal/ptr.rs b/third_party/rust/packed_simd/src/api/minimal/ptr.rs
|
|
new file mode 100644
|
|
index 000000000000..75e5aad5c065
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/api/minimal/ptr.rs
|
|
@@ -0,0 +1,1385 @@
|
|
+//! Minimal API of pointer vectors.
|
|
+
|
|
+macro_rules! impl_minimal_p {
|
|
+ ([$elem_ty:ty; $elem_count:expr]: $id:ident, $mask_ty:ident,
|
|
+ $usize_ty:ident, $isize_ty:ident | $ref:ident | $test_tt:tt
|
|
+ | $($elem_name:ident),+ | ($true:expr, $false:expr) |
|
|
+ $(#[$doc:meta])*) => {
|
|
+
|
|
+ $(#[$doc])*
|
|
+ pub type $id<T> = Simd<[$elem_ty; $elem_count]>;
|
|
+
|
|
+ impl<T> sealed::Simd for $id<T> {
|
|
+ type Element = $elem_ty;
|
|
+ const LANES: usize = $elem_count;
|
|
+ type LanesType = [u32; $elem_count];
|
|
+ }
|
|
+
|
|
+ impl<T> $id<T> {
|
|
+ /// Creates a new instance with each vector elements initialized
|
|
+ /// with the provided values.
|
|
+ #[inline]
|
|
+ #[allow(clippy::too_many_arguments)]
|
|
+ pub const fn new($($elem_name: $elem_ty),*) -> Self {
|
|
+ Simd(codegen::$id($($elem_name),*))
|
|
+ }
|
|
+
|
|
+ /// Returns the number of vector lanes.
|
|
+ #[inline]
|
|
+ pub const fn lanes() -> usize {
|
|
+ $elem_count
|
|
+ }
|
|
+
|
|
+ /// Constructs a new instance with each element initialized to
|
|
+ /// `value`.
|
|
+ #[inline]
|
|
+ pub const fn splat(value: $elem_ty) -> Self {
|
|
+ Simd(codegen::$id($({
|
|
+ #[allow(non_camel_case_types, dead_code)]
|
|
+ struct $elem_name;
|
|
+ value
|
|
+ }),*))
|
|
+ }
|
|
+
|
|
+ /// Constructs a new instance with each element initialized to
|
|
+ /// `null`.
|
|
+ #[inline]
|
|
+ pub const fn null() -> Self {
|
|
+ Self::splat(crate::ptr::null_mut() as $elem_ty)
|
|
+ }
|
|
+
|
|
+ /// Returns a mask that selects those lanes that contain `null`
|
|
+ /// pointers.
|
|
+ #[inline]
|
|
+ pub fn is_null(self) -> $mask_ty {
|
|
+ self.eq(Self::null())
|
|
+ }
|
|
+
|
|
+ /// Extracts the value at `index`.
|
|
+ ///
|
|
+ /// # Panics
|
|
+ ///
|
|
+ /// If `index >= Self::lanes()`.
|
|
+ #[inline]
|
|
+ pub fn extract(self, index: usize) -> $elem_ty {
|
|
+ assert!(index < $elem_count);
|
|
+ unsafe { self.extract_unchecked(index) }
|
|
+ }
|
|
+
|
|
+ /// Extracts the value at `index`.
|
|
+ ///
|
|
+ /// # Precondition
|
|
+ ///
|
|
+ /// If `index >= Self::lanes()` the behavior is undefined.
|
|
+ #[inline]
|
|
+ pub unsafe fn extract_unchecked(self, index: usize) -> $elem_ty {
|
|
+ use crate::llvm::simd_extract;
|
|
+ simd_extract(self.0, index as u32)
|
|
+ }
|
|
+
|
|
+ /// Returns a new vector where the value at `index` is replaced by
|
|
+ /// `new_value`.
|
|
+ ///
|
|
+ /// # Panics
|
|
+ ///
|
|
+ /// If `index >= Self::lanes()`.
|
|
+ #[inline]
|
|
+ #[must_use = "replace does not modify the original value - \
|
|
+ it returns a new vector with the value at `index` \
|
|
+ replaced by `new_value`d"
|
|
+ ]
|
|
+ #[allow(clippy::not_unsafe_ptr_arg_deref)]
|
|
+ pub fn replace(self, index: usize, new_value: $elem_ty) -> Self {
|
|
+ assert!(index < $elem_count);
|
|
+ unsafe { self.replace_unchecked(index, new_value) }
|
|
+ }
|
|
+
|
|
+ /// Returns a new vector where the value at `index` is replaced by `new_value`.
|
|
+ ///
|
|
+ /// # Precondition
|
|
+ ///
|
|
+ /// If `index >= Self::lanes()` the behavior is undefined.
|
|
+ #[inline]
|
|
+ #[must_use = "replace_unchecked does not modify the original value - \
|
|
+ it returns a new vector with the value at `index` \
|
|
+ replaced by `new_value`d"
|
|
+ ]
|
|
+ pub unsafe fn replace_unchecked(
|
|
+ self,
|
|
+ index: usize,
|
|
+ new_value: $elem_ty,
|
|
+ ) -> Self {
|
|
+ use crate::llvm::simd_insert;
|
|
+ Simd(simd_insert(self.0, index as u32, new_value))
|
|
+ }
|
|
+ }
|
|
+
|
|
+
|
|
+ test_if!{
|
|
+ $test_tt:
|
|
+ paste::item! {
|
|
+ pub mod [<$id _minimal>] {
|
|
+ use super::*;
|
|
+ #[cfg_attr(not(target_arch = "wasm32"), test)]
|
|
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ fn minimal() {
|
|
+ // lanes:
|
|
+ assert_eq!($elem_count, $id::<i32>::lanes());
|
|
+
|
|
+ // splat and extract / extract_unchecked:
|
|
+ let VAL7: <$id<i32> as sealed::Simd>::Element
|
|
+ = $ref!(7);
|
|
+ let VAL42: <$id<i32> as sealed::Simd>::Element
|
|
+ = $ref!(42);
|
|
+ let VEC: $id<i32> = $id::splat(VAL7);
|
|
+ for i in 0..$id::<i32>::lanes() {
|
|
+ assert_eq!(VAL7, VEC.extract(i));
|
|
+ assert_eq!(
|
|
+ VAL7, unsafe { VEC.extract_unchecked(i) }
|
|
+ );
|
|
+ }
|
|
+
|
|
+ // replace / replace_unchecked
|
|
+ let new_vec = VEC.replace(0, VAL42);
|
|
+ for i in 0..$id::<i32>::lanes() {
|
|
+ if i == 0 {
|
|
+ assert_eq!(VAL42, new_vec.extract(i));
|
|
+ } else {
|
|
+ assert_eq!(VAL7, new_vec.extract(i));
|
|
+ }
|
|
+ }
|
|
+ let new_vec = unsafe {
|
|
+ VEC.replace_unchecked(0, VAL42)
|
|
+ };
|
|
+ for i in 0..$id::<i32>::lanes() {
|
|
+ if i == 0 {
|
|
+ assert_eq!(VAL42, new_vec.extract(i));
|
|
+ } else {
|
|
+ assert_eq!(VAL7, new_vec.extract(i));
|
|
+ }
|
|
+ }
|
|
+
|
|
+ let mut n = $id::<i32>::null();
|
|
+ assert_eq!(
|
|
+ n,
|
|
+ $id::<i32>::splat(unsafe { crate::mem::zeroed() })
|
|
+ );
|
|
+ assert!(n.is_null().all());
|
|
+ n = n.replace(
|
|
+ 0, unsafe { crate::mem::transmute(1_isize) }
|
|
+ );
|
|
+ assert!(!n.is_null().all());
|
|
+ if $id::<i32>::lanes() > 1 {
|
|
+ assert!(n.is_null().any());
|
|
+ } else {
|
|
+ assert!(!n.is_null().any());
|
|
+ }
|
|
+ }
|
|
+
|
|
+ // FIXME: wasm-bindgen-test does not support #[should_panic]
|
|
+ // #[cfg_attr(not(target_arch = "wasm32"), test)]
|
|
+ // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ #[cfg(not(target_arch = "wasm32"))]
|
|
+ #[test]
|
|
+ #[should_panic]
|
|
+ fn extract_panic_oob() {
|
|
+ let VAL: <$id<i32> as sealed::Simd>::Element
|
|
+ = $ref!(7);
|
|
+ let VEC: $id<i32> = $id::splat(VAL);
|
|
+ let _ = VEC.extract($id::<i32>::lanes());
|
|
+ }
|
|
+
|
|
+ // FIXME: wasm-bindgen-test does not support #[should_panic]
|
|
+ // #[cfg_attr(not(target_arch = "wasm32"), test)]
|
|
+ // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ #[cfg(not(target_arch = "wasm32"))]
|
|
+ #[test]
|
|
+ #[should_panic]
|
|
+ fn replace_panic_oob() {
|
|
+ let VAL: <$id<i32> as sealed::Simd>::Element
|
|
+ = $ref!(7);
|
|
+ let VAL42: <$id<i32> as sealed::Simd>::Element
|
|
+ = $ref!(42);
|
|
+ let VEC: $id<i32> = $id::splat(VAL);
|
|
+ let _ = VEC.replace($id::<i32>::lanes(), VAL42);
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ impl<T> crate::fmt::Debug for $id<T> {
|
|
+ #[allow(clippy::missing_inline_in_public_items)]
|
|
+ fn fmt(&self, f: &mut crate::fmt::Formatter<'_>)
|
|
+ -> crate::fmt::Result {
|
|
+ write!(
|
|
+ f,
|
|
+ "{}<{}>(",
|
|
+ stringify!($id),
|
|
+ unsafe { crate::intrinsics::type_name::<T>() }
|
|
+ )?;
|
|
+ for i in 0..$elem_count {
|
|
+ if i > 0 {
|
|
+ write!(f, ", ")?;
|
|
+ }
|
|
+ self.extract(i).fmt(f)?;
|
|
+ }
|
|
+ write!(f, ")")
|
|
+ }
|
|
+ }
|
|
+
|
|
+ test_if!{
|
|
+ $test_tt:
|
|
+ paste::item! {
|
|
+ pub mod [<$id _fmt_debug>] {
|
|
+ use super::*;
|
|
+ #[cfg_attr(not(target_arch = "wasm32"), test)]
|
|
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ fn debug() {
|
|
+ use arrayvec::{ArrayString,ArrayVec};
|
|
+ type TinyString = ArrayString<[u8; 512]>;
|
|
+
|
|
+ use crate::fmt::Write;
|
|
+ let v = $id::<i32>::default();
|
|
+ let mut s = TinyString::new();
|
|
+ write!(&mut s, "{:?}", v).unwrap();
|
|
+
|
|
+ let mut beg = TinyString::new();
|
|
+ write!(&mut beg, "{}<i32>(", stringify!($id)).unwrap();
|
|
+ assert!(
|
|
+ s.starts_with(beg.as_str()),
|
|
+ "s = {} (should start with = {})", s, beg
|
|
+ );
|
|
+ assert!(s.ends_with(")"));
|
|
+ let s: ArrayVec<[TinyString; 64]>
|
|
+ = s.replace(beg.as_str(), "")
|
|
+ .replace(")", "").split(",")
|
|
+ .map(|v| TinyString::from(v.trim()).unwrap())
|
|
+ .collect();
|
|
+ assert_eq!(s.len(), $id::<i32>::lanes());
|
|
+ for (index, ss) in s.into_iter().enumerate() {
|
|
+ let mut e = TinyString::new();
|
|
+ write!(&mut e, "{:?}", v.extract(index)).unwrap();
|
|
+ assert_eq!(ss, e);
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ impl<T> Default for $id<T> {
|
|
+ #[inline]
|
|
+ fn default() -> Self {
|
|
+ // FIXME: ptrs do not implement default
|
|
+ Self::null()
|
|
+ }
|
|
+ }
|
|
+
|
|
+ test_if!{
|
|
+ $test_tt:
|
|
+ paste::item! {
|
|
+ pub mod [<$id _default>] {
|
|
+ use super::*;
|
|
+ #[cfg_attr(not(target_arch = "wasm32"), test)]
|
|
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ fn default() {
|
|
+ let a = $id::<i32>::default();
|
|
+ for i in 0..$id::<i32>::lanes() {
|
|
+ assert_eq!(
|
|
+ a.extract(i), unsafe { crate::mem::zeroed() }
|
|
+ );
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ impl<T> $id<T> {
|
|
+ /// Lane-wise equality comparison.
|
|
+ #[inline]
|
|
+ pub fn eq(self, other: Self) -> $mask_ty {
|
|
+ unsafe {
|
|
+ use crate::llvm::simd_eq;
|
|
+ let a: $usize_ty = crate::mem::transmute(self);
|
|
+ let b: $usize_ty = crate::mem::transmute(other);
|
|
+ Simd(simd_eq(a.0, b.0))
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /// Lane-wise inequality comparison.
|
|
+ #[inline]
|
|
+ pub fn ne(self, other: Self) -> $mask_ty {
|
|
+ unsafe {
|
|
+ use crate::llvm::simd_ne;
|
|
+ let a: $usize_ty = crate::mem::transmute(self);
|
|
+ let b: $usize_ty = crate::mem::transmute(other);
|
|
+ Simd(simd_ne(a.0, b.0))
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /// Lane-wise less-than comparison.
|
|
+ #[inline]
|
|
+ pub fn lt(self, other: Self) -> $mask_ty {
|
|
+ unsafe {
|
|
+ use crate::llvm::simd_lt;
|
|
+ let a: $usize_ty = crate::mem::transmute(self);
|
|
+ let b: $usize_ty = crate::mem::transmute(other);
|
|
+ Simd(simd_lt(a.0, b.0))
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /// Lane-wise less-than-or-equals comparison.
|
|
+ #[inline]
|
|
+ pub fn le(self, other: Self) -> $mask_ty {
|
|
+ unsafe {
|
|
+ use crate::llvm::simd_le;
|
|
+ let a: $usize_ty = crate::mem::transmute(self);
|
|
+ let b: $usize_ty = crate::mem::transmute(other);
|
|
+ Simd(simd_le(a.0, b.0))
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /// Lane-wise greater-than comparison.
|
|
+ #[inline]
|
|
+ pub fn gt(self, other: Self) -> $mask_ty {
|
|
+ unsafe {
|
|
+ use crate::llvm::simd_gt;
|
|
+ let a: $usize_ty = crate::mem::transmute(self);
|
|
+ let b: $usize_ty = crate::mem::transmute(other);
|
|
+ Simd(simd_gt(a.0, b.0))
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /// Lane-wise greater-than-or-equals comparison.
|
|
+ #[inline]
|
|
+ pub fn ge(self, other: Self) -> $mask_ty {
|
|
+ unsafe {
|
|
+ use crate::llvm::simd_ge;
|
|
+ let a: $usize_ty = crate::mem::transmute(self);
|
|
+ let b: $usize_ty = crate::mem::transmute(other);
|
|
+ Simd(simd_ge(a.0, b.0))
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ test_if!{
|
|
+ $test_tt:
|
|
+ paste::item! {
|
|
+ pub mod [<$id _cmp_vertical>] {
|
|
+ use super::*;
|
|
+ #[cfg_attr(not(target_arch = "wasm32"), test)]
|
|
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ fn cmp() {
|
|
+ let a = $id::<i32>::null();
|
|
+ let b = $id::<i32>::splat(unsafe {
|
|
+ crate::mem::transmute(1_isize)
|
|
+ });
|
|
+
|
|
+ let r = a.lt(b);
|
|
+ let e = $mask_ty::splat(true);
|
|
+ assert!(r == e);
|
|
+ let r = a.le(b);
|
|
+ assert!(r == e);
|
|
+
|
|
+ let e = $mask_ty::splat(false);
|
|
+ let r = a.gt(b);
|
|
+ assert!(r == e);
|
|
+ let r = a.ge(b);
|
|
+ assert!(r == e);
|
|
+ let r = a.eq(b);
|
|
+ assert!(r == e);
|
|
+
|
|
+ let mut a = a;
|
|
+ let mut b = b;
|
|
+ let mut e = e;
|
|
+ for i in 0..$id::<i32>::lanes() {
|
|
+ if i % 2 == 0 {
|
|
+ a = a.replace(
|
|
+ i,
|
|
+ unsafe { crate::mem::transmute(0_isize) }
|
|
+ );
|
|
+ b = b.replace(
|
|
+ i,
|
|
+ unsafe { crate::mem::transmute(1_isize) }
|
|
+ );
|
|
+ e = e.replace(i, true);
|
|
+ } else {
|
|
+ a = a.replace(
|
|
+ i,
|
|
+ unsafe { crate::mem::transmute(1_isize) }
|
|
+ );
|
|
+ b = b.replace(
|
|
+ i,
|
|
+ unsafe { crate::mem::transmute(0_isize) }
|
|
+ );
|
|
+ e = e.replace(i, false);
|
|
+ }
|
|
+ }
|
|
+ let r = a.lt(b);
|
|
+ assert!(r == e);
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ #[allow(clippy::partialeq_ne_impl)]
|
|
+ impl<T> crate::cmp::PartialEq<$id<T>> for $id<T> {
|
|
+ #[inline]
|
|
+ fn eq(&self, other: &Self) -> bool {
|
|
+ $id::<T>::eq(*self, *other).all()
|
|
+ }
|
|
+ #[inline]
|
|
+ fn ne(&self, other: &Self) -> bool {
|
|
+ $id::<T>::ne(*self, *other).any()
|
|
+ }
|
|
+ }
|
|
+
|
|
+ // FIXME: https://github.com/rust-lang-nursery/rust-clippy/issues/2892
|
|
+ #[allow(clippy::partialeq_ne_impl)]
|
|
+ impl<T> crate::cmp::PartialEq<LexicographicallyOrdered<$id<T>>>
|
|
+ for LexicographicallyOrdered<$id<T>>
|
|
+ {
|
|
+ #[inline]
|
|
+ fn eq(&self, other: &Self) -> bool {
|
|
+ self.0 == other.0
|
|
+ }
|
|
+ #[inline]
|
|
+ fn ne(&self, other: &Self) -> bool {
|
|
+ self.0 != other.0
|
|
+ }
|
|
+ }
|
|
+
|
|
+ test_if!{
|
|
+ $test_tt:
|
|
+ paste::item! {
|
|
+ pub mod [<$id _cmp_PartialEq>] {
|
|
+ use super::*;
|
|
+ #[cfg_attr(not(target_arch = "wasm32"), test)]
|
|
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ fn partial_eq() {
|
|
+ let a = $id::<i32>::null();
|
|
+ let b = $id::<i32>::splat(unsafe {
|
|
+ crate::mem::transmute(1_isize)
|
|
+ });
|
|
+
|
|
+ assert!(a != b);
|
|
+ assert!(!(a == b));
|
|
+ assert!(a == a);
|
|
+ assert!(!(a != a));
|
|
+
|
|
+ if $id::<i32>::lanes() > 1 {
|
|
+ let a = $id::<i32>::null().replace(0, unsafe {
|
|
+ crate::mem::transmute(1_isize)
|
|
+ });
|
|
+ let b = $id::<i32>::splat(unsafe {
|
|
+ crate::mem::transmute(1_isize)
|
|
+ });
|
|
+
|
|
+ assert!(a != b);
|
|
+ assert!(!(a == b));
|
|
+ assert!(a == a);
|
|
+ assert!(!(a != a));
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ impl<T> crate::cmp::Eq for $id<T> {}
|
|
+ impl<T> crate::cmp::Eq for LexicographicallyOrdered<$id<T>> {}
|
|
+
|
|
+ test_if!{
|
|
+ $test_tt:
|
|
+ paste::item! {
|
|
+ pub mod [<$id _cmp_eq>] {
|
|
+ use super::*;
|
|
+ #[cfg_attr(not(target_arch = "wasm32"), test)]
|
|
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ fn eq() {
|
|
+ fn foo<E: crate::cmp::Eq>(_: E) {}
|
|
+ let a = $id::<i32>::null();
|
|
+ foo(a);
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ impl<T> From<[$elem_ty; $elem_count]> for $id<T> {
|
|
+ #[inline]
|
|
+ fn from(array: [$elem_ty; $elem_count]) -> Self {
|
|
+ unsafe {
|
|
+ // FIXME: unnecessary zeroing; better than UB.
|
|
+ let mut u: Self = crate::mem::zeroed();
|
|
+ crate::ptr::copy_nonoverlapping(
|
|
+ &array as *const [$elem_ty; $elem_count] as *const u8,
|
|
+ &mut u as *mut Self as *mut u8,
|
|
+ crate::mem::size_of::<Self>()
|
|
+ );
|
|
+ u
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ impl<T> Into<[$elem_ty; $elem_count]> for $id<T> {
|
|
+ #[inline]
|
|
+ fn into(self) -> [$elem_ty; $elem_count] {
|
|
+ unsafe {
|
|
+ // FIXME: unnecessary zeroing; better than UB.
|
|
+ let mut u: [$elem_ty; $elem_count] = crate::mem::zeroed();
|
|
+ crate::ptr::copy_nonoverlapping(
|
|
+ &self as *const $id<T> as *const u8,
|
|
+ &mut u as *mut [$elem_ty; $elem_count] as *mut u8,
|
|
+ crate::mem::size_of::<Self>()
|
|
+ );
|
|
+ u
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ test_if!{
|
|
+ $test_tt:
|
|
+ paste::item! {
|
|
+ pub mod [<$id _from>] {
|
|
+ use super::*;
|
|
+ #[cfg_attr(not(target_arch = "wasm32"), test)]
|
|
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ fn array() {
|
|
+ let values = [1_i32; $elem_count];
|
|
+
|
|
+ let mut vec: $id<i32> = Default::default();
|
|
+ let mut array = [
|
|
+ $id::<i32>::null().extract(0); $elem_count
|
|
+ ];
|
|
+
|
|
+ for i in 0..$elem_count {
|
|
+ let ptr = unsafe {
|
|
+ crate::mem::transmute(
|
|
+ &values[i] as *const i32
|
|
+ )
|
|
+ };
|
|
+ vec = vec.replace(i, ptr);
|
|
+ array[i] = ptr;
|
|
+ }
|
|
+
|
|
+ // FIXME: there is no impl of From<$id<T>> for [$elem_ty; N]
|
|
+ // let a0 = From::from(vec);
|
|
+ // assert_eq!(a0, array);
|
|
+ #[allow(unused_assignments)]
|
|
+ let mut a1 = array;
|
|
+ a1 = vec.into();
|
|
+ assert_eq!(a1, array);
|
|
+
|
|
+ let v0: $id<i32> = From::from(array);
|
|
+ assert_eq!(v0, vec);
|
|
+ let v1: $id<i32> = array.into();
|
|
+ assert_eq!(v1, vec);
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ impl<T> $id<T> {
|
|
+ /// Instantiates a new vector with the values of the `slice`.
|
|
+ ///
|
|
+ /// # Panics
|
|
+ ///
|
|
+ /// If `slice.len() < Self::lanes()` or `&slice[0]` is not aligned
|
|
+ /// to an `align_of::<Self>()` boundary.
|
|
+ #[inline]
|
|
+ pub fn from_slice_aligned(slice: &[$elem_ty]) -> Self {
|
|
+ unsafe {
|
|
+ assert!(slice.len() >= $elem_count);
|
|
+ let target_ptr = slice.get_unchecked(0) as *const $elem_ty;
|
|
+ assert!(
|
|
+ target_ptr.align_offset(crate::mem::align_of::<Self>())
|
|
+ == 0
|
|
+ );
|
|
+ Self::from_slice_aligned_unchecked(slice)
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /// Instantiates a new vector with the values of the `slice`.
|
|
+ ///
|
|
+ /// # Panics
|
|
+ ///
|
|
+ /// If `slice.len() < Self::lanes()`.
|
|
+ #[inline]
|
|
+ pub fn from_slice_unaligned(slice: &[$elem_ty]) -> Self {
|
|
+ unsafe {
|
|
+ assert!(slice.len() >= $elem_count);
|
|
+ Self::from_slice_unaligned_unchecked(slice)
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /// Instantiates a new vector with the values of the `slice`.
|
|
+ ///
|
|
+ /// # Precondition
|
|
+ ///
|
|
+ /// If `slice.len() < Self::lanes()` or `&slice[0]` is not aligned
|
|
+ /// to an `align_of::<Self>()` boundary, the behavior is undefined.
|
|
+ #[inline]
|
|
+ pub unsafe fn from_slice_aligned_unchecked(slice: &[$elem_ty])
|
|
+ -> Self {
|
|
+ #[allow(clippy::cast_ptr_alignment)]
|
|
+ *(slice.get_unchecked(0) as *const $elem_ty as *const Self)
|
|
+ }
|
|
+
|
|
+ /// Instantiates a new vector with the values of the `slice`.
|
|
+ ///
|
|
+ /// # Precondition
|
|
+ ///
|
|
+ /// If `slice.len() < Self::lanes()` the behavior is undefined.
|
|
+ #[inline]
|
|
+ pub unsafe fn from_slice_unaligned_unchecked(
|
|
+ slice: &[$elem_ty],
|
|
+ ) -> Self {
|
|
+ use crate::mem::size_of;
|
|
+ let target_ptr =
|
|
+ slice.get_unchecked(0) as *const $elem_ty as *const u8;
|
|
+ let mut x = Self::splat(crate::ptr::null_mut() as $elem_ty);
|
|
+ let self_ptr = &mut x as *mut Self as *mut u8;
|
|
+ crate::ptr::copy_nonoverlapping(
|
|
+ target_ptr,
|
|
+ self_ptr,
|
|
+ size_of::<Self>(),
|
|
+ );
|
|
+ x
|
|
+ }
|
|
+ }
|
|
+
|
|
+ test_if!{
|
|
+ $test_tt:
|
|
+ paste::item! {
|
|
+ pub mod [<$id _slice_from_slice>] {
|
|
+ use super::*;
|
|
+ use crate::iter::Iterator;
|
|
+
|
|
+ #[cfg_attr(not(target_arch = "wasm32"), test)]
|
|
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ fn from_slice_unaligned() {
|
|
+ let (null, non_null) = ptr_vals!($id<i32>);
|
|
+
|
|
+ let mut unaligned = [
|
|
+ non_null; $id::<i32>::lanes() + 1
|
|
+ ];
|
|
+ unaligned[0] = null;
|
|
+ let vec = $id::<i32>::from_slice_unaligned(
|
|
+ &unaligned[1..]
|
|
+ );
|
|
+ for (index, &b) in unaligned.iter().enumerate() {
|
|
+ if index == 0 {
|
|
+ assert_eq!(b, null);
|
|
+ } else {
|
|
+ assert_eq!(b, non_null);
|
|
+ assert_eq!(b, vec.extract(index - 1));
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ // FIXME: wasm-bindgen-test does not support #[should_panic]
|
|
+ // #[cfg_attr(not(target_arch = "wasm32"), test)]
|
|
+ // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ #[cfg(not(target_arch = "wasm32"))]
|
|
+ #[test]
|
|
+ #[should_panic]
|
|
+ fn from_slice_unaligned_fail() {
|
|
+ let (_null, non_null) = ptr_vals!($id<i32>);
|
|
+ let unaligned = [non_null; $id::<i32>::lanes() + 1];
|
|
+ // the slice is not large enough => panic
|
|
+ let _vec = $id::<i32>::from_slice_unaligned(
|
|
+ &unaligned[2..]
|
|
+ );
|
|
+ }
|
|
+
|
|
+ union A {
|
|
+ data: [<$id<i32> as sealed::Simd>::Element;
|
|
+ 2 * $id::<i32>::lanes()],
|
|
+ _vec: $id<i32>,
|
|
+ }
|
|
+
|
|
+ #[cfg_attr(not(target_arch = "wasm32"), test)]
|
|
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ fn from_slice_aligned() {
|
|
+ let (null, non_null) = ptr_vals!($id<i32>);
|
|
+ let mut aligned = A {
|
|
+ data: [null; 2 * $id::<i32>::lanes()],
|
|
+ };
|
|
+ for i in
|
|
+ $id::<i32>::lanes()..(2 * $id::<i32>::lanes()) {
|
|
+ unsafe {
|
|
+ aligned.data[i] = non_null;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ let vec = unsafe {
|
|
+ $id::<i32>::from_slice_aligned(
|
|
+ &aligned.data[$id::<i32>::lanes()..]
|
|
+ )
|
|
+ };
|
|
+ for (index, &b) in unsafe {
|
|
+ aligned.data.iter().enumerate()
|
|
+ } {
|
|
+ if index < $id::<i32>::lanes() {
|
|
+ assert_eq!(b, null);
|
|
+ } else {
|
|
+ assert_eq!(b, non_null);
|
|
+ assert_eq!(
|
|
+ b, vec.extract(index - $id::<i32>::lanes())
|
|
+ );
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ // FIXME: wasm-bindgen-test does not support #[should_panic]
|
|
+ // #[cfg_attr(not(target_arch = "wasm32"), test)]
|
|
+ // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ #[cfg(not(target_arch = "wasm32"))]
|
|
+ #[test]
|
|
+ #[should_panic]
|
|
+ fn from_slice_aligned_fail_lanes() {
|
|
+ let (_null, non_null) = ptr_vals!($id<i32>);
|
|
+ let aligned = A {
|
|
+ data: [non_null; 2 * $id::<i32>::lanes()],
|
|
+ };
|
|
+ // the slice is not large enough => panic
|
|
+ let _vec = unsafe {
|
|
+ $id::<i32>::from_slice_aligned(
|
|
+ &aligned.data[2 * $id::<i32>::lanes()..]
|
|
+ )
|
|
+ };
|
|
+ }
|
|
+
|
|
+ // FIXME: wasm-bindgen-test does not support #[should_panic]
|
|
+ // #[cfg_attr(not(target_arch = "wasm32"), test)]
|
|
+ // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ #[cfg(not(target_arch = "wasm32"))]
|
|
+ #[test]
|
|
+ #[should_panic]
|
|
+ fn from_slice_aligned_fail_align() {
|
|
+ unsafe {
|
|
+ let (null, _non_null) = ptr_vals!($id<i32>);
|
|
+ let aligned = A {
|
|
+ data: [null; 2 * $id::<i32>::lanes()],
|
|
+ };
|
|
+
|
|
+ // get a pointer to the front of data
|
|
+ let ptr = aligned.data.as_ptr();
|
|
+ // offset pointer by one element
|
|
+ let ptr = ptr.wrapping_add(1);
|
|
+
|
|
+ if ptr.align_offset(
|
|
+ crate::mem::align_of::<$id<i32>>()
|
|
+ ) == 0 {
|
|
+ // the pointer is properly aligned, so
|
|
+ // from_slice_aligned won't fail here (e.g. this
|
|
+ // can happen for i128x1). So we panic to make
|
|
+ // the "should_fail" test pass:
|
|
+ panic!("ok");
|
|
+ }
|
|
+
|
|
+ // create a slice - this is safe, because the
|
|
+ // elements of the slice exist, are properly
|
|
+ // initialized, and properly aligned:
|
|
+ let s = slice::from_raw_parts(
|
|
+ ptr, $id::<i32>::lanes()
|
|
+ );
|
|
+ // this should always panic because the slice
|
|
+ // alignment does not match the alignment
|
|
+ // requirements for the vector type:
|
|
+ let _vec = $id::<i32>::from_slice_aligned(s);
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ impl<T> $id<T> {
|
|
+ /// Writes the values of the vector to the `slice`.
|
|
+ ///
|
|
+ /// # Panics
|
|
+ ///
|
|
+ /// If `slice.len() < Self::lanes()` or `&slice[0]` is not
|
|
+ /// aligned to an `align_of::<Self>()` boundary.
|
|
+ #[inline]
|
|
+ pub fn write_to_slice_aligned(self, slice: &mut [$elem_ty]) {
|
|
+ unsafe {
|
|
+ assert!(slice.len() >= $elem_count);
|
|
+ let target_ptr =
|
|
+ slice.get_unchecked_mut(0) as *mut $elem_ty;
|
|
+ assert!(
|
|
+ target_ptr.align_offset(crate::mem::align_of::<Self>())
|
|
+ == 0
|
|
+ );
|
|
+ self.write_to_slice_aligned_unchecked(slice);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /// Writes the values of the vector to the `slice`.
|
|
+ ///
|
|
+ /// # Panics
|
|
+ ///
|
|
+ /// If `slice.len() < Self::lanes()`.
|
|
+ #[inline]
|
|
+ pub fn write_to_slice_unaligned(self, slice: &mut [$elem_ty]) {
|
|
+ unsafe {
|
|
+ assert!(slice.len() >= $elem_count);
|
|
+ self.write_to_slice_unaligned_unchecked(slice);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /// Writes the values of the vector to the `slice`.
|
|
+ ///
|
|
+ /// # Precondition
|
|
+ ///
|
|
+ /// If `slice.len() < Self::lanes()` or `&slice[0]` is not
|
|
+ /// aligned to an `align_of::<Self>()` boundary, the behavior is
|
|
+ /// undefined.
|
|
+ #[inline]
|
|
+ pub unsafe fn write_to_slice_aligned_unchecked(
|
|
+ self, slice: &mut [$elem_ty],
|
|
+ ) {
|
|
+ #[allow(clippy::cast_ptr_alignment)]
|
|
+ *(slice.get_unchecked_mut(0) as *mut $elem_ty as *mut Self) =
|
|
+ self;
|
|
+ }
|
|
+
|
|
+ /// Writes the values of the vector to the `slice`.
|
|
+ ///
|
|
+ /// # Precondition
|
|
+ ///
|
|
+ /// If `slice.len() < Self::lanes()` the behavior is undefined.
|
|
+ #[inline]
|
|
+ pub unsafe fn write_to_slice_unaligned_unchecked(
|
|
+ self, slice: &mut [$elem_ty],
|
|
+ ) {
|
|
+ let target_ptr =
|
|
+ slice.get_unchecked_mut(0) as *mut $elem_ty as *mut u8;
|
|
+ let self_ptr = &self as *const Self as *const u8;
|
|
+ crate::ptr::copy_nonoverlapping(
|
|
+ self_ptr,
|
|
+ target_ptr,
|
|
+ crate::mem::size_of::<Self>(),
|
|
+ );
|
|
+ }
|
|
+ }
|
|
+
|
|
+ test_if!{
|
|
+ $test_tt:
|
|
+ paste::item! {
|
|
+ pub mod [<$id _slice_write_to_slice>] {
|
|
+ use super::*;
|
|
+ use crate::iter::Iterator;
|
|
+
|
|
+ #[cfg_attr(not(target_arch = "wasm32"), test)]
|
|
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ fn write_to_slice_unaligned() {
|
|
+ let (null, non_null) = ptr_vals!($id<i32>);
|
|
+ let mut unaligned = [null; $id::<i32>::lanes() + 1];
|
|
+ let vec = $id::<i32>::splat(non_null);
|
|
+ vec.write_to_slice_unaligned(&mut unaligned[1..]);
|
|
+ for (index, &b) in unaligned.iter().enumerate() {
|
|
+ if index == 0 {
|
|
+ assert_eq!(b, null);
|
|
+ } else {
|
|
+ assert_eq!(b, non_null);
|
|
+ assert_eq!(b, vec.extract(index - 1));
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ // FIXME: wasm-bindgen-test does not support #[should_panic]
|
|
+ // #[cfg_attr(not(target_arch = "wasm32"), test)]
|
|
+ // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ #[cfg(not(target_arch = "wasm32"))]
|
|
+ #[test]
|
|
+ #[should_panic]
|
|
+ fn write_to_slice_unaligned_fail() {
|
|
+ let (null, non_null) = ptr_vals!($id<i32>);
|
|
+ let mut unaligned = [null; $id::<i32>::lanes() + 1];
|
|
+ let vec = $id::<i32>::splat(non_null);
|
|
+ // the slice is not large enough => panic
|
|
+ vec.write_to_slice_unaligned(&mut unaligned[2..]);
|
|
+ }
|
|
+
|
|
+ union A {
|
|
+ data: [<$id<i32> as sealed::Simd>::Element;
|
|
+ 2 * $id::<i32>::lanes()],
|
|
+ _vec: $id<i32>,
|
|
+ }
|
|
+
|
|
+ #[cfg_attr(not(target_arch = "wasm32"), test)]
|
|
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ fn write_to_slice_aligned() {
|
|
+ let (null, non_null) = ptr_vals!($id<i32>);
|
|
+ let mut aligned = A {
|
|
+ data: [null; 2 * $id::<i32>::lanes()],
|
|
+ };
|
|
+ let vec = $id::<i32>::splat(non_null);
|
|
+ unsafe {
|
|
+ vec.write_to_slice_aligned(
|
|
+ &mut aligned.data[$id::<i32>::lanes()..]
|
|
+ )
|
|
+ };
|
|
+ for (index, &b) in
|
|
+ unsafe { aligned.data.iter().enumerate() } {
|
|
+ if index < $id::<i32>::lanes() {
|
|
+ assert_eq!(b, null);
|
|
+ } else {
|
|
+ assert_eq!(b, non_null);
|
|
+ assert_eq!(
|
|
+ b, vec.extract(index - $id::<i32>::lanes())
|
|
+ );
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ // FIXME: wasm-bindgen-test does not support #[should_panic]
|
|
+ // #[cfg_attr(not(target_arch = "wasm32"), test)]
|
|
+ // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ #[cfg(not(target_arch = "wasm32"))]
|
|
+ #[test]
|
|
+ #[should_panic]
|
|
+ fn write_to_slice_aligned_fail_lanes() {
|
|
+ let (null, non_null) = ptr_vals!($id<i32>);
|
|
+ let mut aligned = A {
|
|
+ data: [null; 2 * $id::<i32>::lanes()],
|
|
+ };
|
|
+ let vec = $id::<i32>::splat(non_null);
|
|
+ // the slice is not large enough => panic
|
|
+ unsafe {
|
|
+ vec.write_to_slice_aligned(
|
|
+ &mut aligned.data[2 * $id::<i32>::lanes()..]
|
|
+ )
|
|
+ };
|
|
+ }
|
|
+
|
|
+ // FIXME: wasm-bindgen-test does not support #[should_panic]
|
|
+ // #[cfg_attr(not(target_arch = "wasm32"), test)]
|
|
+ // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ #[cfg(not(target_arch = "wasm32"))]
|
|
+ #[test]
|
|
+ #[should_panic]
|
|
+ fn write_to_slice_aligned_fail_align() {
|
|
+ let (null, non_null) = ptr_vals!($id<i32>);
|
|
+ unsafe {
|
|
+ let mut aligned = A {
|
|
+ data: [null; 2 * $id::<i32>::lanes()],
|
|
+ };
|
|
+
|
|
+ // get a pointer to the front of data
|
|
+ let ptr = aligned.data.as_mut_ptr();
|
|
+ // offset pointer by one element
|
|
+ let ptr = ptr.wrapping_add(1);
|
|
+
|
|
+ if ptr.align_offset(
|
|
+ crate::mem::align_of::<$id<i32>>()
|
|
+ ) == 0 {
|
|
+ // the pointer is properly aligned, so
|
|
+ // write_to_slice_aligned won't fail here (e.g.
|
|
+ // this can happen for i128x1). So we panic to
|
|
+ // make the "should_fail" test pass:
|
|
+ panic!("ok");
|
|
+ }
|
|
+
|
|
+ // create a slice - this is safe, because the
|
|
+ // elements of the slice exist, are properly
|
|
+ // initialized, and properly aligned:
|
|
+ let s = slice::from_raw_parts_mut(
|
|
+ ptr, $id::<i32>::lanes()
|
|
+ );
|
|
+ // this should always panic because the slice
|
|
+ // alignment does not match the alignment
|
|
+ // requirements for the vector type:
|
|
+ let vec = $id::<i32>::splat(non_null);
|
|
+ vec.write_to_slice_aligned(s);
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ impl<T> crate::hash::Hash for $id<T> {
|
|
+ #[inline]
|
|
+ fn hash<H: crate::hash::Hasher>(&self, state: &mut H) {
|
|
+ let s: $usize_ty = unsafe { crate::mem::transmute(*self) };
|
|
+ s.hash(state)
|
|
+ }
|
|
+ }
|
|
+
|
|
+ test_if! {
|
|
+ $test_tt:
|
|
+ paste::item! {
|
|
+ pub mod [<$id _hash>] {
|
|
+ use super::*;
|
|
+ #[cfg_attr(not(target_arch = "wasm32"), test)]
|
|
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ fn hash() {
|
|
+ use crate::hash::{Hash, Hasher};
|
|
+ #[allow(deprecated)]
|
|
+ use crate::hash::{SipHasher13};
|
|
+
|
|
+ let values = [1_i32; $elem_count];
|
|
+
|
|
+ let mut vec: $id<i32> = Default::default();
|
|
+ let mut array = [
|
|
+ $id::<i32>::null().extract(0);
|
|
+ $elem_count
|
|
+ ];
|
|
+
|
|
+ for i in 0..$elem_count {
|
|
+ let ptr = unsafe {
|
|
+ crate::mem::transmute(
|
|
+ &values[i] as *const i32
|
|
+ )
|
|
+ };
|
|
+ vec = vec.replace(i, ptr);
|
|
+ array[i] = ptr;
|
|
+ }
|
|
+
|
|
+ #[allow(deprecated)]
|
|
+ let mut a_hash = SipHasher13::new();
|
|
+ let mut v_hash = a_hash.clone();
|
|
+ array.hash(&mut a_hash);
|
|
+ vec.hash(&mut v_hash);
|
|
+ assert_eq!(a_hash.finish(), v_hash.finish());
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ impl<T> $id<T> {
|
|
+ /// Calculates the offset from a pointer.
|
|
+ ///
|
|
+ /// `count` is in units of `T`; e.g. a count of `3` represents a
|
|
+ /// pointer offset of `3 * size_of::<T>()` bytes.
|
|
+ ///
|
|
+ /// # Safety
|
|
+ ///
|
|
+ /// If any of the following conditions are violated, the result is
|
|
+ /// Undefined Behavior:
|
|
+ ///
|
|
+ /// * Both the starting and resulting pointer must be either in
|
|
+ /// bounds or one byte past the end of an allocated object.
|
|
+ ///
|
|
+ /// * The computed offset, in bytes, cannot overflow an `isize`.
|
|
+ ///
|
|
+ /// * The offset being in bounds cannot rely on "wrapping around"
|
|
+ /// the address space. That is, the infinite-precision sum, in bytes
|
|
+ /// must fit in a `usize`.
|
|
+ ///
|
|
+ /// The compiler and standard library generally tries to ensure
|
|
+ /// allocations never reach a size where an offset is a concern. For
|
|
+ /// instance, `Vec` and `Box` ensure they never allocate more than
|
|
+ /// `isize::MAX` bytes, so `vec.as_ptr().offset(vec.len() as isize)`
|
|
+ /// is always safe.
|
|
+ ///
|
|
+ /// Most platforms fundamentally can't even construct such an
|
|
+ /// allocation. For instance, no known 64-bit platform can ever
|
|
+ /// serve a request for 263 bytes due to page-table limitations or
|
|
+ /// splitting the address space. However, some 32-bit and 16-bit
|
|
+ /// platforms may successfully serve a request for more than
|
|
+ /// `isize::MAX` bytes with things like Physical Address Extension.
|
|
+ /// As such, memory acquired directly from allocators or memory
|
|
+ /// mapped files may be too large to handle with this function.
|
|
+ ///
|
|
+ /// Consider using `wrapping_offset` instead if these constraints
|
|
+ /// are difficult to satisfy. The only advantage of this method is
|
|
+ /// that it enables more aggressive compiler optimizations.
|
|
+ #[inline]
|
|
+ pub unsafe fn offset(self, count: $isize_ty) -> Self {
|
|
+ // FIXME: should use LLVM's `add nsw nuw`
|
|
+ self.wrapping_offset(count)
|
|
+ }
|
|
+
|
|
+ /// Calculates the offset from a pointer using wrapping arithmetic.
|
|
+ ///
|
|
+ /// `count` is in units of `T`; e.g. a count of `3` represents a
|
|
+ /// pointer offset of `3 * size_of::<T>()` bytes.
|
|
+ ///
|
|
+ /// # Safety
|
|
+ ///
|
|
+ /// The resulting pointer does not need to be in bounds, but it is
|
|
+ /// potentially hazardous to dereference (which requires unsafe).
|
|
+ ///
|
|
+ /// Always use `.offset(count)` instead when possible, because
|
|
+ /// offset allows the compiler to optimize better.
|
|
+ #[inline]
|
|
+ pub fn wrapping_offset(self, count: $isize_ty) -> Self {
|
|
+ unsafe {
|
|
+ let x: $isize_ty = crate::mem::transmute(self);
|
|
+ // note: {+,*} currently performs a `wrapping_{add, mul}`
|
|
+ crate::mem::transmute(
|
|
+ x + (count * crate::mem::size_of::<T>() as isize)
|
|
+ )
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /// Calculates the distance between two pointers.
|
|
+ ///
|
|
+ /// The returned value is in units of `T`: the distance in bytes is
|
|
+ /// divided by `mem::size_of::<T>()`.
|
|
+ ///
|
|
+ /// This function is the inverse of offset.
|
|
+ ///
|
|
+ /// # Safety
|
|
+ ///
|
|
+ /// If any of the following conditions are violated, the result is
|
|
+ /// Undefined Behavior:
|
|
+ ///
|
|
+ /// * Both the starting and other pointer must be either in bounds
|
|
+ /// or one byte past the end of the same allocated object.
|
|
+ ///
|
|
+ /// * The distance between the pointers, in bytes, cannot overflow
|
|
+ /// an `isize`.
|
|
+ ///
|
|
+ /// * The distance between the pointers, in bytes, must be an exact
|
|
+ /// multiple of the size of `T`.
|
|
+ ///
|
|
+ /// * The distance being in bounds cannot rely on "wrapping around"
|
|
+ /// the address space.
|
|
+ ///
|
|
+ /// The compiler and standard library generally try to ensure
|
|
+ /// allocations never reach a size where an offset is a concern. For
|
|
+ /// instance, `Vec` and `Box` ensure they never allocate more than
|
|
+ /// `isize::MAX` bytes, so `ptr_into_vec.offset_from(vec.as_ptr())`
|
|
+ /// is always safe.
|
|
+ ///
|
|
+ /// Most platforms fundamentally can't even construct such an
|
|
+ /// allocation. For instance, no known 64-bit platform can ever
|
|
+ /// serve a request for 263 bytes due to page-table limitations or
|
|
+ /// splitting the address space. However, some 32-bit and 16-bit
|
|
+ /// platforms may successfully serve a request for more than
|
|
+ /// `isize::MAX` bytes with things like Physical Address Extension.
|
|
+ /// As such, memory acquired directly from allocators or memory
|
|
+ /// mapped files may be too large to handle with this function.
|
|
+ ///
|
|
+ /// Consider using wrapping_offset_from instead if these constraints
|
|
+ /// are difficult to satisfy. The only advantage of this method is
|
|
+ /// that it enables more aggressive compiler optimizations.
|
|
+ #[inline]
|
|
+ pub unsafe fn offset_from(self, origin: Self) -> $isize_ty {
|
|
+ // FIXME: should use LLVM's `sub nsw nuw`.
|
|
+ self.wrapping_offset_from(origin)
|
|
+ }
|
|
+
|
|
+ /// Calculates the distance between two pointers.
|
|
+ ///
|
|
+ /// The returned value is in units of `T`: the distance in bytes is
|
|
+ /// divided by `mem::size_of::<T>()`.
|
|
+ ///
|
|
+ /// If the address different between the two pointers is not a
|
|
+ /// multiple of `mem::size_of::<T>()` then the result of the
|
|
+ /// division is rounded towards zero.
|
|
+ ///
|
|
+ /// Though this method is safe for any two pointers, note that its
|
|
+ /// result will be mostly useless if the two pointers aren't into
|
|
+ /// the same allocated object, for example if they point to two
|
|
+ /// different local variables.
|
|
+ #[inline]
|
|
+ pub fn wrapping_offset_from(self, origin: Self) -> $isize_ty {
|
|
+ let x: $isize_ty = unsafe { crate::mem::transmute(self) };
|
|
+ let y: $isize_ty = unsafe { crate::mem::transmute(origin) };
|
|
+ // note: {-,/} currently perform wrapping_{sub, div}
|
|
+ (y - x) / (crate::mem::size_of::<T>() as isize)
|
|
+ }
|
|
+
|
|
+ /// Calculates the offset from a pointer (convenience for
|
|
+ /// `.offset(count as isize)`).
|
|
+ ///
|
|
+ /// `count` is in units of `T`; e.g. a count of 3 represents a
|
|
+ /// pointer offset of `3 * size_of::<T>()` bytes.
|
|
+ ///
|
|
+ /// # Safety
|
|
+ ///
|
|
+ /// If any of the following conditions are violated, the result is
|
|
+ /// Undefined Behavior:
|
|
+ ///
|
|
+ /// * Both the starting and resulting pointer must be either in
|
|
+ /// bounds or one byte past the end of an allocated object.
|
|
+ ///
|
|
+ /// * The computed offset, in bytes, cannot overflow an `isize`.
|
|
+ ///
|
|
+ /// * The offset being in bounds cannot rely on "wrapping around"
|
|
+ /// the address space. That is, the infinite-precision sum must fit
|
|
+ /// in a `usize`.
|
|
+ ///
|
|
+ /// The compiler and standard library generally tries to ensure
|
|
+ /// allocations never reach a size where an offset is a concern. For
|
|
+ /// instance, `Vec` and `Box` ensure they never allocate more than
|
|
+ /// `isize::MAX` bytes, so `vec.as_ptr().add(vec.len())` is always
|
|
+ /// safe.
|
|
+ ///
|
|
+ /// Most platforms fundamentally can't even construct such an
|
|
+ /// allocation. For instance, no known 64-bit platform can ever
|
|
+ /// serve a request for 263 bytes due to page-table limitations or
|
|
+ /// splitting the address space. However, some 32-bit and 16-bit
|
|
+ /// platforms may successfully serve a request for more than
|
|
+ /// `isize::MAX` bytes with things like Physical Address Extension.
|
|
+ /// As such, memory acquired directly from allocators or memory
|
|
+ /// mapped files may be too large to handle with this function.
|
|
+ ///
|
|
+ /// Consider using `wrapping_offset` instead if these constraints
|
|
+ /// are difficult to satisfy. The only advantage of this method is
|
|
+ /// that it enables more aggressive compiler optimizations.
|
|
+ #[inline]
|
|
+ #[allow(clippy::should_implement_trait)]
|
|
+ pub unsafe fn add(self, count: $usize_ty) -> Self {
|
|
+ self.offset(count.cast())
|
|
+ }
|
|
+
|
|
+ /// Calculates the offset from a pointer (convenience for
|
|
+ /// `.offset((count as isize).wrapping_neg())`).
|
|
+ ///
|
|
+ /// `count` is in units of T; e.g. a `count` of 3 represents a
|
|
+ /// pointer offset of `3 * size_of::<T>()` bytes.
|
|
+ ///
|
|
+ /// # Safety
|
|
+ ///
|
|
+ /// If any of the following conditions are violated, the result is
|
|
+ /// Undefined Behavior:
|
|
+ ///
|
|
+ /// * Both the starting and resulting pointer must be either in
|
|
+ /// bounds or one byte past the end of an allocated object.
|
|
+ ///
|
|
+ /// * The computed offset cannot exceed `isize::MAX` **bytes**.
|
|
+ ///
|
|
+ /// * The offset being in bounds cannot rely on "wrapping around"
|
|
+ /// the address space. That is, the infinite-precision sum must fit
|
|
+ /// in a usize.
|
|
+ ///
|
|
+ /// The compiler and standard library generally tries to ensure
|
|
+ /// allocations never reach a size where an offset is a concern. For
|
|
+ /// instance, `Vec` and `Box` ensure they never allocate more than
|
|
+ /// `isize::MAX` bytes, so
|
|
+ /// `vec.as_ptr().add(vec.len()).sub(vec.len())` is always safe.
|
|
+ ///
|
|
+ /// Most platforms fundamentally can't even construct such an
|
|
+ /// allocation. For instance, no known 64-bit platform can ever
|
|
+ /// serve a request for 2<sup>63</sup> bytes due to page-table
|
|
+ /// limitations or splitting the address space. However, some 32-bit
|
|
+ /// and 16-bit platforms may successfully serve a request for more
|
|
+ /// than `isize::MAX` bytes with things like Physical Address
|
|
+ /// Extension. As such, memory acquired directly from allocators or
|
|
+ /// memory mapped files *may* be too large to handle with this
|
|
+ /// function.
|
|
+ ///
|
|
+ /// Consider using `wrapping_offset` instead if these constraints
|
|
+ /// are difficult to satisfy. The only advantage of this method is
|
|
+ /// that it enables more aggressive compiler optimizations.
|
|
+ #[inline]
|
|
+ #[allow(clippy::should_implement_trait)]
|
|
+ pub unsafe fn sub(self, count: $usize_ty) -> Self {
|
|
+ let x: $isize_ty = count.cast();
|
|
+ // note: - is currently wrapping_neg
|
|
+ self.offset(-x)
|
|
+ }
|
|
+
|
|
+ /// Calculates the offset from a pointer using wrapping arithmetic.
|
|
+ /// (convenience for `.wrapping_offset(count as isize)`)
|
|
+ ///
|
|
+ /// `count` is in units of T; e.g. a `count` of 3 represents a
|
|
+ /// pointer offset of `3 * size_of::<T>()` bytes.
|
|
+ ///
|
|
+ /// # Safety
|
|
+ ///
|
|
+ /// The resulting pointer does not need to be in bounds, but it is
|
|
+ /// potentially hazardous to dereference (which requires `unsafe`).
|
|
+ ///
|
|
+ /// Always use `.add(count)` instead when possible, because `add`
|
|
+ /// allows the compiler to optimize better.
|
|
+ #[inline]
|
|
+ pub fn wrapping_add(self, count: $usize_ty) -> Self {
|
|
+ self.wrapping_offset(count.cast())
|
|
+ }
|
|
+
|
|
+ /// Calculates the offset from a pointer using wrapping arithmetic.
|
|
+ /// (convenience for `.wrapping_offset((count as
|
|
+ /// isize).wrapping_sub())`)
|
|
+ ///
|
|
+ /// `count` is in units of T; e.g. a `count` of 3 represents a
|
|
+ /// pointer offset of `3 * size_of::<T>()` bytes.
|
|
+ ///
|
|
+ /// # Safety
|
|
+ ///
|
|
+ /// The resulting pointer does not need to be in bounds, but it is
|
|
+ /// potentially hazardous to dereference (which requires `unsafe`).
|
|
+ ///
|
|
+ /// Always use `.sub(count)` instead when possible, because `sub`
|
|
+ /// allows the compiler to optimize better.
|
|
+ #[inline]
|
|
+ pub fn wrapping_sub(self, count: $usize_ty) -> Self {
|
|
+ let x: $isize_ty = count.cast();
|
|
+ self.wrapping_offset(-1 * x)
|
|
+ }
|
|
+ }
|
|
+
|
|
+ impl<T> $id<T> {
|
|
+ /// Shuffle vector elements according to `indices`.
|
|
+ #[inline]
|
|
+ pub fn shuffle1_dyn<I>(self, indices: I) -> Self
|
|
+ where
|
|
+ Self: codegen::shuffle1_dyn::Shuffle1Dyn<Indices = I>,
|
|
+ {
|
|
+ codegen::shuffle1_dyn::Shuffle1Dyn::shuffle1_dyn(self, indices)
|
|
+ }
|
|
+ }
|
|
+
|
|
+ test_if! {
|
|
+ $test_tt:
|
|
+ paste::item! {
|
|
+ pub mod [<$id _shuffle1_dyn>] {
|
|
+ use super::*;
|
|
+ #[cfg_attr(not(target_arch = "wasm32"), test)]
|
|
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ fn shuffle1_dyn() {
|
|
+ let (null, non_null) = ptr_vals!($id<i32>);
|
|
+
|
|
+ // alternating = [non_null, null, non_null, null, ...]
|
|
+ let mut alternating = $id::<i32>::splat(null);
|
|
+ for i in 0..$id::<i32>::lanes() {
|
|
+ if i % 2 == 0 {
|
|
+ alternating = alternating.replace(i, non_null);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ type Indices = <$id<i32>
|
|
+ as codegen::shuffle1_dyn::Shuffle1Dyn>::Indices;
|
|
+ // even = [0, 0, 2, 2, 4, 4, ..]
|
|
+ let even = {
|
|
+ let mut v = Indices::splat(0);
|
|
+ for i in 0..$id::<i32>::lanes() {
|
|
+ if i % 2 == 0 {
|
|
+ v = v.replace(i, (i as u8).into());
|
|
+ } else {
|
|
+ v = v.replace(i, (i as u8 - 1).into());
|
|
+ }
|
|
+ }
|
|
+ v
|
|
+ };
|
|
+ // odd = [1, 1, 3, 3, 5, 5, ...]
|
|
+ let odd = {
|
|
+ let mut v = Indices::splat(0);
|
|
+ for i in 0..$id::<i32>::lanes() {
|
|
+ if i % 2 != 0 {
|
|
+ v = v.replace(i, (i as u8).into());
|
|
+ } else {
|
|
+ v = v.replace(i, (i as u8 + 1).into());
|
|
+ }
|
|
+ }
|
|
+ v
|
|
+ };
|
|
+
|
|
+ assert_eq!(
|
|
+ alternating.shuffle1_dyn(even),
|
|
+ $id::<i32>::splat(non_null)
|
|
+ );
|
|
+ if $id::<i32>::lanes() > 1 {
|
|
+ assert_eq!(
|
|
+ alternating.shuffle1_dyn(odd),
|
|
+ $id::<i32>::splat(null)
|
|
+ );
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+}
|
|
diff --git a/third_party/rust/packed_simd/src/api/ops.rs b/third_party/rust/packed_simd/src/api/ops.rs
|
|
new file mode 100644
|
|
index 000000000000..f71c98795da3
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/api/ops.rs
|
|
@@ -0,0 +1,32 @@
|
|
+//! Implementation of the `ops` traits
|
|
+#[macro_use]
|
|
+mod vector_mask_bitwise;
|
|
+#[macro_use]
|
|
+mod scalar_mask_bitwise;
|
|
+
|
|
+#[macro_use]
|
|
+mod vector_arithmetic;
|
|
+#[macro_use]
|
|
+mod scalar_arithmetic;
|
|
+
|
|
+#[macro_use]
|
|
+mod vector_bitwise;
|
|
+#[macro_use]
|
|
+mod scalar_bitwise;
|
|
+
|
|
+#[macro_use]
|
|
+mod vector_shifts;
|
|
+#[macro_use]
|
|
+mod scalar_shifts;
|
|
+
|
|
+#[macro_use]
|
|
+mod vector_rotates;
|
|
+
|
|
+#[macro_use]
|
|
+mod vector_neg;
|
|
+
|
|
+#[macro_use]
|
|
+mod vector_int_min_max;
|
|
+
|
|
+#[macro_use]
|
|
+mod vector_float_min_max;
|
|
diff --git a/third_party/rust/packed_simd/src/api/ops/scalar_arithmetic.rs b/third_party/rust/packed_simd/src/api/ops/scalar_arithmetic.rs
|
|
new file mode 100644
|
|
index 000000000000..da1a2037eaaf
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/api/ops/scalar_arithmetic.rs
|
|
@@ -0,0 +1,203 @@
|
|
+//! Vertical (lane-wise) vector-scalar / scalar-vector arithmetic operations.
|
|
+
|
|
+macro_rules! impl_ops_scalar_arithmetic {
|
|
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
|
|
+ impl crate::ops::Add<$elem_ty> for $id {
|
|
+ type Output = Self;
|
|
+ #[inline]
|
|
+ fn add(self, other: $elem_ty) -> Self {
|
|
+ self + $id::splat(other)
|
|
+ }
|
|
+ }
|
|
+ impl crate::ops::Add<$id> for $elem_ty {
|
|
+ type Output = $id;
|
|
+ #[inline]
|
|
+ fn add(self, other: $id) -> $id {
|
|
+ $id::splat(self) + other
|
|
+ }
|
|
+ }
|
|
+
|
|
+ impl crate::ops::Sub<$elem_ty> for $id {
|
|
+ type Output = Self;
|
|
+ #[inline]
|
|
+ fn sub(self, other: $elem_ty) -> Self {
|
|
+ self - $id::splat(other)
|
|
+ }
|
|
+ }
|
|
+ impl crate::ops::Sub<$id> for $elem_ty {
|
|
+ type Output = $id;
|
|
+ #[inline]
|
|
+ fn sub(self, other: $id) -> $id {
|
|
+ $id::splat(self) - other
|
|
+ }
|
|
+ }
|
|
+
|
|
+ impl crate::ops::Mul<$elem_ty> for $id {
|
|
+ type Output = Self;
|
|
+ #[inline]
|
|
+ fn mul(self, other: $elem_ty) -> Self {
|
|
+ self * $id::splat(other)
|
|
+ }
|
|
+ }
|
|
+ impl crate::ops::Mul<$id> for $elem_ty {
|
|
+ type Output = $id;
|
|
+ #[inline]
|
|
+ fn mul(self, other: $id) -> $id {
|
|
+ $id::splat(self) * other
|
|
+ }
|
|
+ }
|
|
+
|
|
+ impl crate::ops::Div<$elem_ty> for $id {
|
|
+ type Output = Self;
|
|
+ #[inline]
|
|
+ fn div(self, other: $elem_ty) -> Self {
|
|
+ self / $id::splat(other)
|
|
+ }
|
|
+ }
|
|
+ impl crate::ops::Div<$id> for $elem_ty {
|
|
+ type Output = $id;
|
|
+ #[inline]
|
|
+ fn div(self, other: $id) -> $id {
|
|
+ $id::splat(self) / other
|
|
+ }
|
|
+ }
|
|
+
|
|
+ impl crate::ops::Rem<$elem_ty> for $id {
|
|
+ type Output = Self;
|
|
+ #[inline]
|
|
+ fn rem(self, other: $elem_ty) -> Self {
|
|
+ self % $id::splat(other)
|
|
+ }
|
|
+ }
|
|
+ impl crate::ops::Rem<$id> for $elem_ty {
|
|
+ type Output = $id;
|
|
+ #[inline]
|
|
+ fn rem(self, other: $id) -> $id {
|
|
+ $id::splat(self) % other
|
|
+ }
|
|
+ }
|
|
+
|
|
+ impl crate::ops::AddAssign<$elem_ty> for $id {
|
|
+ #[inline]
|
|
+ fn add_assign(&mut self, other: $elem_ty) {
|
|
+ *self = *self + other;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ impl crate::ops::SubAssign<$elem_ty> for $id {
|
|
+ #[inline]
|
|
+ fn sub_assign(&mut self, other: $elem_ty) {
|
|
+ *self = *self - other;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ impl crate::ops::MulAssign<$elem_ty> for $id {
|
|
+ #[inline]
|
|
+ fn mul_assign(&mut self, other: $elem_ty) {
|
|
+ *self = *self * other;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ impl crate::ops::DivAssign<$elem_ty> for $id {
|
|
+ #[inline]
|
|
+ fn div_assign(&mut self, other: $elem_ty) {
|
|
+ *self = *self / other;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ impl crate::ops::RemAssign<$elem_ty> for $id {
|
|
+ #[inline]
|
|
+ fn rem_assign(&mut self, other: $elem_ty) {
|
|
+ *self = *self % other;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ test_if!{
|
|
+ $test_tt:
|
|
+ paste::item! {
|
|
+ pub mod [<$id _ops_scalar_arith>] {
|
|
+ use super::*;
|
|
+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ fn ops_scalar_arithmetic() {
|
|
+ let zi = 0 as $elem_ty;
|
|
+ let oi = 1 as $elem_ty;
|
|
+ let ti = 2 as $elem_ty;
|
|
+ let fi = 4 as $elem_ty;
|
|
+ let z = $id::splat(zi);
|
|
+ let o = $id::splat(oi);
|
|
+ let t = $id::splat(ti);
|
|
+ let f = $id::splat(fi);
|
|
+
|
|
+ // add
|
|
+ assert_eq!(zi + z, z);
|
|
+ assert_eq!(z + zi, z);
|
|
+ assert_eq!(oi + z, o);
|
|
+ assert_eq!(o + zi, o);
|
|
+ assert_eq!(ti + z, t);
|
|
+ assert_eq!(t + zi, t);
|
|
+ assert_eq!(ti + t, f);
|
|
+ assert_eq!(t + ti, f);
|
|
+ // sub
|
|
+ assert_eq!(zi - z, z);
|
|
+ assert_eq!(z - zi, z);
|
|
+ assert_eq!(oi - z, o);
|
|
+ assert_eq!(o - zi, o);
|
|
+ assert_eq!(ti - z, t);
|
|
+ assert_eq!(t - zi, t);
|
|
+ assert_eq!(fi - t, t);
|
|
+ assert_eq!(f - ti, t);
|
|
+ assert_eq!(f - o - o, t);
|
|
+ assert_eq!(f - oi - oi, t);
|
|
+ // mul
|
|
+ assert_eq!(zi * z, z);
|
|
+ assert_eq!(z * zi, z);
|
|
+ assert_eq!(zi * o, z);
|
|
+ assert_eq!(z * oi, z);
|
|
+ assert_eq!(zi * t, z);
|
|
+ assert_eq!(z * ti, z);
|
|
+ assert_eq!(oi * t, t);
|
|
+ assert_eq!(o * ti, t);
|
|
+ assert_eq!(ti * t, f);
|
|
+ assert_eq!(t * ti, f);
|
|
+ // div
|
|
+ assert_eq!(zi / o, z);
|
|
+ assert_eq!(z / oi, z);
|
|
+ assert_eq!(ti / o, t);
|
|
+ assert_eq!(t / oi, t);
|
|
+ assert_eq!(fi / o, f);
|
|
+ assert_eq!(f / oi, f);
|
|
+ assert_eq!(ti / t, o);
|
|
+ assert_eq!(t / ti, o);
|
|
+ assert_eq!(fi / t, t);
|
|
+ assert_eq!(f / ti, t);
|
|
+ // rem
|
|
+ assert_eq!(oi % o, z);
|
|
+ assert_eq!(o % oi, z);
|
|
+ assert_eq!(fi % t, z);
|
|
+ assert_eq!(f % ti, z);
|
|
+
|
|
+ {
|
|
+ let mut v = z;
|
|
+ assert_eq!(v, z);
|
|
+ v += oi; // add_assign
|
|
+ assert_eq!(v, o);
|
|
+ v -= oi; // sub_assign
|
|
+ assert_eq!(v, z);
|
|
+ v = t;
|
|
+ v *= oi; // mul_assign
|
|
+ assert_eq!(v, t);
|
|
+ v *= ti;
|
|
+ assert_eq!(v, f);
|
|
+ v /= oi; // div_assign
|
|
+ assert_eq!(v, f);
|
|
+ v /= ti;
|
|
+ assert_eq!(v, t);
|
|
+ v %= ti; // rem_assign
|
|
+ assert_eq!(v, z);
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+}
|
|
diff --git a/third_party/rust/packed_simd/src/api/ops/scalar_bitwise.rs b/third_party/rust/packed_simd/src/api/ops/scalar_bitwise.rs
|
|
new file mode 100644
|
|
index 000000000000..88216769aec4
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/api/ops/scalar_bitwise.rs
|
|
@@ -0,0 +1,162 @@
|
|
+//! Vertical (lane-wise) vector-scalar / scalar-vector bitwise operations.
|
|
+
|
|
+macro_rules! impl_ops_scalar_bitwise {
|
|
+ (
|
|
+ [$elem_ty:ident; $elem_count:expr]:
|
|
+ $id:ident | $test_tt:tt |
|
|
+ ($true:expr, $false:expr)
|
|
+ ) => {
|
|
+ impl crate::ops::BitXor<$elem_ty> for $id {
|
|
+ type Output = Self;
|
|
+ #[inline]
|
|
+ fn bitxor(self, other: $elem_ty) -> Self {
|
|
+ self ^ $id::splat(other)
|
|
+ }
|
|
+ }
|
|
+ impl crate::ops::BitXor<$id> for $elem_ty {
|
|
+ type Output = $id;
|
|
+ #[inline]
|
|
+ fn bitxor(self, other: $id) -> $id {
|
|
+ $id::splat(self) ^ other
|
|
+ }
|
|
+ }
|
|
+
|
|
+ impl crate::ops::BitAnd<$elem_ty> for $id {
|
|
+ type Output = Self;
|
|
+ #[inline]
|
|
+ fn bitand(self, other: $elem_ty) -> Self {
|
|
+ self & $id::splat(other)
|
|
+ }
|
|
+ }
|
|
+ impl crate::ops::BitAnd<$id> for $elem_ty {
|
|
+ type Output = $id;
|
|
+ #[inline]
|
|
+ fn bitand(self, other: $id) -> $id {
|
|
+ $id::splat(self) & other
|
|
+ }
|
|
+ }
|
|
+
|
|
+ impl crate::ops::BitOr<$elem_ty> for $id {
|
|
+ type Output = Self;
|
|
+ #[inline]
|
|
+ fn bitor(self, other: $elem_ty) -> Self {
|
|
+ self | $id::splat(other)
|
|
+ }
|
|
+ }
|
|
+ impl crate::ops::BitOr<$id> for $elem_ty {
|
|
+ type Output = $id;
|
|
+ #[inline]
|
|
+ fn bitor(self, other: $id) -> $id {
|
|
+ $id::splat(self) | other
|
|
+ }
|
|
+ }
|
|
+
|
|
+ impl crate::ops::BitAndAssign<$elem_ty> for $id {
|
|
+ #[inline]
|
|
+ fn bitand_assign(&mut self, other: $elem_ty) {
|
|
+ *self = *self & other;
|
|
+ }
|
|
+ }
|
|
+ impl crate::ops::BitOrAssign<$elem_ty> for $id {
|
|
+ #[inline]
|
|
+ fn bitor_assign(&mut self, other: $elem_ty) {
|
|
+ *self = *self | other;
|
|
+ }
|
|
+ }
|
|
+ impl crate::ops::BitXorAssign<$elem_ty> for $id {
|
|
+ #[inline]
|
|
+ fn bitxor_assign(&mut self, other: $elem_ty) {
|
|
+ *self = *self ^ other;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ test_if!{
|
|
+ $test_tt:
|
|
+ paste::item! {
|
|
+ pub mod [<$id _ops_scalar_bitwise>] {
|
|
+ use super::*;
|
|
+
|
|
+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ fn ops_scalar_bitwise() {
|
|
+ let zi = 0 as $elem_ty;
|
|
+ let oi = 1 as $elem_ty;
|
|
+ let ti = 2 as $elem_ty;
|
|
+ let z = $id::splat(zi);
|
|
+ let o = $id::splat(oi);
|
|
+ let t = $id::splat(ti);
|
|
+
|
|
+ // BitAnd:
|
|
+ assert_eq!(oi & o, o);
|
|
+ assert_eq!(o & oi, o);
|
|
+ assert_eq!(oi & z, z);
|
|
+ assert_eq!(o & zi, z);
|
|
+ assert_eq!(zi & o, z);
|
|
+ assert_eq!(z & oi, z);
|
|
+ assert_eq!(zi & z, z);
|
|
+ assert_eq!(z & zi, z);
|
|
+
|
|
+ assert_eq!(ti & t, t);
|
|
+ assert_eq!(t & ti, t);
|
|
+ assert_eq!(ti & o, z);
|
|
+ assert_eq!(t & oi, z);
|
|
+ assert_eq!(oi & t, z);
|
|
+ assert_eq!(o & ti, z);
|
|
+
|
|
+ // BitOr:
|
|
+ assert_eq!(oi | o, o);
|
|
+ assert_eq!(o | oi, o);
|
|
+ assert_eq!(oi | z, o);
|
|
+ assert_eq!(o | zi, o);
|
|
+ assert_eq!(zi | o, o);
|
|
+ assert_eq!(z | oi, o);
|
|
+ assert_eq!(zi | z, z);
|
|
+ assert_eq!(z | zi, z);
|
|
+
|
|
+ assert_eq!(ti | t, t);
|
|
+ assert_eq!(t | ti, t);
|
|
+ assert_eq!(zi | t, t);
|
|
+ assert_eq!(z | ti, t);
|
|
+ assert_eq!(ti | z, t);
|
|
+ assert_eq!(t | zi, t);
|
|
+
|
|
+ // BitXOR:
|
|
+ assert_eq!(oi ^ o, z);
|
|
+ assert_eq!(o ^ oi, z);
|
|
+ assert_eq!(zi ^ z, z);
|
|
+ assert_eq!(z ^ zi, z);
|
|
+ assert_eq!(zi ^ o, o);
|
|
+ assert_eq!(z ^ oi, o);
|
|
+ assert_eq!(oi ^ z, o);
|
|
+ assert_eq!(o ^ zi, o);
|
|
+
|
|
+ assert_eq!(ti ^ t, z);
|
|
+ assert_eq!(t ^ ti, z);
|
|
+ assert_eq!(ti ^ z, t);
|
|
+ assert_eq!(t ^ zi, t);
|
|
+ assert_eq!(zi ^ t, t);
|
|
+ assert_eq!(z ^ ti, t);
|
|
+
|
|
+ {
|
|
+ // AndAssign:
|
|
+ let mut v = o;
|
|
+ v &= ti;
|
|
+ assert_eq!(v, z);
|
|
+ }
|
|
+ {
|
|
+ // OrAssign:
|
|
+ let mut v = z;
|
|
+ v |= oi;
|
|
+ assert_eq!(v, o);
|
|
+ }
|
|
+ {
|
|
+ // XORAssign:
|
|
+ let mut v = z;
|
|
+ v ^= oi;
|
|
+ assert_eq!(v, o);
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+}
|
|
diff --git a/third_party/rust/packed_simd/src/api/ops/scalar_mask_bitwise.rs b/third_party/rust/packed_simd/src/api/ops/scalar_mask_bitwise.rs
|
|
new file mode 100644
|
|
index 000000000000..523a85207b6b
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/api/ops/scalar_mask_bitwise.rs
|
|
@@ -0,0 +1,140 @@
|
|
+//! Vertical (lane-wise) vector-vector bitwise operations.
|
|
+
|
|
+macro_rules! impl_ops_scalar_mask_bitwise {
|
|
+ (
|
|
+ [$elem_ty:ident; $elem_count:expr]:
|
|
+ $id:ident | $test_tt:tt |
|
|
+ ($true:expr, $false:expr)
|
|
+ ) => {
|
|
+ impl crate::ops::BitXor<bool> for $id {
|
|
+ type Output = Self;
|
|
+ #[inline]
|
|
+ fn bitxor(self, other: bool) -> Self {
|
|
+ self ^ $id::splat(other)
|
|
+ }
|
|
+ }
|
|
+ impl crate::ops::BitXor<$id> for bool {
|
|
+ type Output = $id;
|
|
+ #[inline]
|
|
+ fn bitxor(self, other: $id) -> $id {
|
|
+ $id::splat(self) ^ other
|
|
+ }
|
|
+ }
|
|
+
|
|
+ impl crate::ops::BitAnd<bool> for $id {
|
|
+ type Output = Self;
|
|
+ #[inline]
|
|
+ fn bitand(self, other: bool) -> Self {
|
|
+ self & $id::splat(other)
|
|
+ }
|
|
+ }
|
|
+ impl crate::ops::BitAnd<$id> for bool {
|
|
+ type Output = $id;
|
|
+ #[inline]
|
|
+ fn bitand(self, other: $id) -> $id {
|
|
+ $id::splat(self) & other
|
|
+ }
|
|
+ }
|
|
+
|
|
+ impl crate::ops::BitOr<bool> for $id {
|
|
+ type Output = Self;
|
|
+ #[inline]
|
|
+ fn bitor(self, other: bool) -> Self {
|
|
+ self | $id::splat(other)
|
|
+ }
|
|
+ }
|
|
+ impl crate::ops::BitOr<$id> for bool {
|
|
+ type Output = $id;
|
|
+ #[inline]
|
|
+ fn bitor(self, other: $id) -> $id {
|
|
+ $id::splat(self) | other
|
|
+ }
|
|
+ }
|
|
+
|
|
+ impl crate::ops::BitAndAssign<bool> for $id {
|
|
+ #[inline]
|
|
+ fn bitand_assign(&mut self, other: bool) {
|
|
+ *self = *self & other;
|
|
+ }
|
|
+ }
|
|
+ impl crate::ops::BitOrAssign<bool> for $id {
|
|
+ #[inline]
|
|
+ fn bitor_assign(&mut self, other: bool) {
|
|
+ *self = *self | other;
|
|
+ }
|
|
+ }
|
|
+ impl crate::ops::BitXorAssign<bool> for $id {
|
|
+ #[inline]
|
|
+ fn bitxor_assign(&mut self, other: bool) {
|
|
+ *self = *self ^ other;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ test_if!{
|
|
+ $test_tt:
|
|
+ paste::item! {
|
|
+ pub mod [<$id _ops_scalar_mask_bitwise>] {
|
|
+ use super::*;
|
|
+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ fn ops_scalar_mask_bitwise() {
|
|
+ let ti = true;
|
|
+ let fi = false;
|
|
+ let t = $id::splat(ti);
|
|
+ let f = $id::splat(fi);
|
|
+ assert!(t != f);
|
|
+ assert!(!(t == f));
|
|
+
|
|
+ // BitAnd:
|
|
+ assert_eq!(ti & f, f);
|
|
+ assert_eq!(t & fi, f);
|
|
+ assert_eq!(fi & t, f);
|
|
+ assert_eq!(f & ti, f);
|
|
+ assert_eq!(ti & t, t);
|
|
+ assert_eq!(t & ti, t);
|
|
+ assert_eq!(fi & f, f);
|
|
+ assert_eq!(f & fi, f);
|
|
+
|
|
+ // BitOr:
|
|
+ assert_eq!(ti | f, t);
|
|
+ assert_eq!(t | fi, t);
|
|
+ assert_eq!(fi | t, t);
|
|
+ assert_eq!(f | ti, t);
|
|
+ assert_eq!(ti | t, t);
|
|
+ assert_eq!(t | ti, t);
|
|
+ assert_eq!(fi | f, f);
|
|
+ assert_eq!(f | fi, f);
|
|
+
|
|
+ // BitXOR:
|
|
+ assert_eq!(ti ^ f, t);
|
|
+ assert_eq!(t ^ fi, t);
|
|
+ assert_eq!(fi ^ t, t);
|
|
+ assert_eq!(f ^ ti, t);
|
|
+ assert_eq!(ti ^ t, f);
|
|
+ assert_eq!(t ^ ti, f);
|
|
+ assert_eq!(fi ^ f, f);
|
|
+ assert_eq!(f ^ fi, f);
|
|
+
|
|
+ {
|
|
+ // AndAssign:
|
|
+ let mut v = f;
|
|
+ v &= ti;
|
|
+ assert_eq!(v, f);
|
|
+ }
|
|
+ {
|
|
+ // OrAssign:
|
|
+ let mut v = f;
|
|
+ v |= ti;
|
|
+ assert_eq!(v, t);
|
|
+ }
|
|
+ {
|
|
+ // XORAssign:
|
|
+ let mut v = f;
|
|
+ v ^= ti;
|
|
+ assert_eq!(v, t);
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+}
|
|
diff --git a/third_party/rust/packed_simd/src/api/ops/scalar_shifts.rs b/third_party/rust/packed_simd/src/api/ops/scalar_shifts.rs
|
|
new file mode 100644
|
|
index 000000000000..9c164ad56c0b
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/api/ops/scalar_shifts.rs
|
|
@@ -0,0 +1,107 @@
|
|
+//! Vertical (lane-wise) vector-scalar shifts operations.
|
|
+
|
|
+macro_rules! impl_ops_scalar_shifts {
|
|
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
|
|
+ impl crate::ops::Shl<u32> for $id {
|
|
+ type Output = Self;
|
|
+ #[inline]
|
|
+ fn shl(self, other: u32) -> Self {
|
|
+ self << $id::splat(other as $elem_ty)
|
|
+ }
|
|
+ }
|
|
+ impl crate::ops::Shr<u32> for $id {
|
|
+ type Output = Self;
|
|
+ #[inline]
|
|
+ fn shr(self, other: u32) -> Self {
|
|
+ self >> $id::splat(other as $elem_ty)
|
|
+ }
|
|
+ }
|
|
+
|
|
+ impl crate::ops::ShlAssign<u32> for $id {
|
|
+ #[inline]
|
|
+ fn shl_assign(&mut self, other: u32) {
|
|
+ *self = *self << other;
|
|
+ }
|
|
+ }
|
|
+ impl crate::ops::ShrAssign<u32> for $id {
|
|
+ #[inline]
|
|
+ fn shr_assign(&mut self, other: u32) {
|
|
+ *self = *self >> other;
|
|
+ }
|
|
+ }
|
|
+ test_if!{
|
|
+ $test_tt:
|
|
+ paste::item! {
|
|
+ pub mod [<$id _ops_scalar_shifts>] {
|
|
+ use super::*;
|
|
+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ #[cfg_attr(any(target_arch = "s390x", target_arch = "sparc64"),
|
|
+ allow(unreachable_code,
|
|
+ unused_variables,
|
|
+ unused_mut)
|
|
+ )]
|
|
+ // ^^^ FIXME: https://github.com/rust-lang/rust/issues/55344
|
|
+ fn ops_scalar_shifts() {
|
|
+ let z = $id::splat(0 as $elem_ty);
|
|
+ let o = $id::splat(1 as $elem_ty);
|
|
+ let t = $id::splat(2 as $elem_ty);
|
|
+ let f = $id::splat(4 as $elem_ty);
|
|
+
|
|
+ {
|
|
+ let zi = 0 as u32;
|
|
+ let oi = 1 as u32;
|
|
+ let ti = 2 as u32;
|
|
+ let maxi
|
|
+ = (mem::size_of::<$elem_ty>() * 8 - 1) as u32;
|
|
+
|
|
+ // shr
|
|
+ assert_eq!(z >> zi, z);
|
|
+ assert_eq!(z >> oi, z);
|
|
+ assert_eq!(z >> ti, z);
|
|
+ assert_eq!(z >> ti, z);
|
|
+
|
|
+ #[cfg(any(target_arch = "s390x", target_arch = "sparc64"))] {
|
|
+ // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/13
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ assert_eq!(o >> zi, o);
|
|
+ assert_eq!(t >> zi, t);
|
|
+ assert_eq!(f >> zi, f);
|
|
+ assert_eq!(f >> maxi, z);
|
|
+
|
|
+ assert_eq!(o >> oi, z);
|
|
+ assert_eq!(t >> oi, o);
|
|
+ assert_eq!(t >> ti, z);
|
|
+ assert_eq!(f >> oi, t);
|
|
+ assert_eq!(f >> ti, o);
|
|
+ assert_eq!(f >> maxi, z);
|
|
+
|
|
+ // shl
|
|
+ assert_eq!(z << zi, z);
|
|
+ assert_eq!(o << zi, o);
|
|
+ assert_eq!(t << zi, t);
|
|
+ assert_eq!(f << zi, f);
|
|
+ assert_eq!(f << maxi, z);
|
|
+
|
|
+ assert_eq!(o << oi, t);
|
|
+ assert_eq!(o << ti, f);
|
|
+ assert_eq!(t << oi, f);
|
|
+
|
|
+ { // shr_assign
|
|
+ let mut v = o;
|
|
+ v >>= oi;
|
|
+ assert_eq!(v, z);
|
|
+ }
|
|
+ { // shl_assign
|
|
+ let mut v = o;
|
|
+ v <<= oi;
|
|
+ assert_eq!(v, t);
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+}
|
|
diff --git a/third_party/rust/packed_simd/src/api/ops/vector_arithmetic.rs b/third_party/rust/packed_simd/src/api/ops/vector_arithmetic.rs
|
|
new file mode 100644
|
|
index 000000000000..7057f52d0317
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/api/ops/vector_arithmetic.rs
|
|
@@ -0,0 +1,148 @@
|
|
+//! Vertical (lane-wise) vector-vector arithmetic operations.
|
|
+
|
|
+macro_rules! impl_ops_vector_arithmetic {
|
|
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
|
|
+ impl crate::ops::Add for $id {
|
|
+ type Output = Self;
|
|
+ #[inline]
|
|
+ fn add(self, other: Self) -> Self {
|
|
+ use crate::llvm::simd_add;
|
|
+ unsafe { Simd(simd_add(self.0, other.0)) }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ impl crate::ops::Sub for $id {
|
|
+ type Output = Self;
|
|
+ #[inline]
|
|
+ fn sub(self, other: Self) -> Self {
|
|
+ use crate::llvm::simd_sub;
|
|
+ unsafe { Simd(simd_sub(self.0, other.0)) }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ impl crate::ops::Mul for $id {
|
|
+ type Output = Self;
|
|
+ #[inline]
|
|
+ fn mul(self, other: Self) -> Self {
|
|
+ use crate::llvm::simd_mul;
|
|
+ unsafe { Simd(simd_mul(self.0, other.0)) }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ impl crate::ops::Div for $id {
|
|
+ type Output = Self;
|
|
+ #[inline]
|
|
+ fn div(self, other: Self) -> Self {
|
|
+ use crate::llvm::simd_div;
|
|
+ unsafe { Simd(simd_div(self.0, other.0)) }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ impl crate::ops::Rem for $id {
|
|
+ type Output = Self;
|
|
+ #[inline]
|
|
+ fn rem(self, other: Self) -> Self {
|
|
+ use crate::llvm::simd_rem;
|
|
+ unsafe { Simd(simd_rem(self.0, other.0)) }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ impl crate::ops::AddAssign for $id {
|
|
+ #[inline]
|
|
+ fn add_assign(&mut self, other: Self) {
|
|
+ *self = *self + other;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ impl crate::ops::SubAssign for $id {
|
|
+ #[inline]
|
|
+ fn sub_assign(&mut self, other: Self) {
|
|
+ *self = *self - other;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ impl crate::ops::MulAssign for $id {
|
|
+ #[inline]
|
|
+ fn mul_assign(&mut self, other: Self) {
|
|
+ *self = *self * other;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ impl crate::ops::DivAssign for $id {
|
|
+ #[inline]
|
|
+ fn div_assign(&mut self, other: Self) {
|
|
+ *self = *self / other;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ impl crate::ops::RemAssign for $id {
|
|
+ #[inline]
|
|
+ fn rem_assign(&mut self, other: Self) {
|
|
+ *self = *self % other;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ test_if!{
|
|
+ $test_tt:
|
|
+ paste::item! {
|
|
+ pub mod [<$id _ops_vector_arith>] {
|
|
+ use super::*;
|
|
+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ fn ops_vector_arithmetic() {
|
|
+ let z = $id::splat(0 as $elem_ty);
|
|
+ let o = $id::splat(1 as $elem_ty);
|
|
+ let t = $id::splat(2 as $elem_ty);
|
|
+ let f = $id::splat(4 as $elem_ty);
|
|
+
|
|
+ // add
|
|
+ assert_eq!(z + z, z);
|
|
+ assert_eq!(o + z, o);
|
|
+ assert_eq!(t + z, t);
|
|
+ assert_eq!(t + t, f);
|
|
+ // sub
|
|
+ assert_eq!(z - z, z);
|
|
+ assert_eq!(o - z, o);
|
|
+ assert_eq!(t - z, t);
|
|
+ assert_eq!(f - t, t);
|
|
+ assert_eq!(f - o - o, t);
|
|
+ // mul
|
|
+ assert_eq!(z * z, z);
|
|
+ assert_eq!(z * o, z);
|
|
+ assert_eq!(z * t, z);
|
|
+ assert_eq!(o * t, t);
|
|
+ assert_eq!(t * t, f);
|
|
+ // div
|
|
+ assert_eq!(z / o, z);
|
|
+ assert_eq!(t / o, t);
|
|
+ assert_eq!(f / o, f);
|
|
+ assert_eq!(t / t, o);
|
|
+ assert_eq!(f / t, t);
|
|
+ // rem
|
|
+ assert_eq!(o % o, z);
|
|
+ assert_eq!(f % t, z);
|
|
+
|
|
+ {
|
|
+ let mut v = z;
|
|
+ assert_eq!(v, z);
|
|
+ v += o; // add_assign
|
|
+ assert_eq!(v, o);
|
|
+ v -= o; // sub_assign
|
|
+ assert_eq!(v, z);
|
|
+ v = t;
|
|
+ v *= o; // mul_assign
|
|
+ assert_eq!(v, t);
|
|
+ v *= t;
|
|
+ assert_eq!(v, f);
|
|
+ v /= o; // div_assign
|
|
+ assert_eq!(v, f);
|
|
+ v /= t;
|
|
+ assert_eq!(v, t);
|
|
+ v %= t; // rem_assign
|
|
+ assert_eq!(v, z);
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+}
|
|
diff --git a/third_party/rust/packed_simd/src/api/ops/vector_bitwise.rs b/third_party/rust/packed_simd/src/api/ops/vector_bitwise.rs
|
|
new file mode 100644
|
|
index 000000000000..7be9603fa261
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/api/ops/vector_bitwise.rs
|
|
@@ -0,0 +1,129 @@
|
|
+//! Vertical (lane-wise) vector-vector bitwise operations.
|
|
+
|
|
+macro_rules! impl_ops_vector_bitwise {
|
|
+ (
|
|
+ [$elem_ty:ident; $elem_count:expr]:
|
|
+ $id:ident | $test_tt:tt |
|
|
+ ($true:expr, $false:expr)
|
|
+ ) => {
|
|
+ impl crate::ops::Not for $id {
|
|
+ type Output = Self;
|
|
+ #[inline]
|
|
+ fn not(self) -> Self {
|
|
+ Self::splat($true) ^ self
|
|
+ }
|
|
+ }
|
|
+ impl crate::ops::BitXor for $id {
|
|
+ type Output = Self;
|
|
+ #[inline]
|
|
+ fn bitxor(self, other: Self) -> Self {
|
|
+ use crate::llvm::simd_xor;
|
|
+ unsafe { Simd(simd_xor(self.0, other.0)) }
|
|
+ }
|
|
+ }
|
|
+ impl crate::ops::BitAnd for $id {
|
|
+ type Output = Self;
|
|
+ #[inline]
|
|
+ fn bitand(self, other: Self) -> Self {
|
|
+ use crate::llvm::simd_and;
|
|
+ unsafe { Simd(simd_and(self.0, other.0)) }
|
|
+ }
|
|
+ }
|
|
+ impl crate::ops::BitOr for $id {
|
|
+ type Output = Self;
|
|
+ #[inline]
|
|
+ fn bitor(self, other: Self) -> Self {
|
|
+ use crate::llvm::simd_or;
|
|
+ unsafe { Simd(simd_or(self.0, other.0)) }
|
|
+ }
|
|
+ }
|
|
+ impl crate::ops::BitAndAssign for $id {
|
|
+ #[inline]
|
|
+ fn bitand_assign(&mut self, other: Self) {
|
|
+ *self = *self & other;
|
|
+ }
|
|
+ }
|
|
+ impl crate::ops::BitOrAssign for $id {
|
|
+ #[inline]
|
|
+ fn bitor_assign(&mut self, other: Self) {
|
|
+ *self = *self | other;
|
|
+ }
|
|
+ }
|
|
+ impl crate::ops::BitXorAssign for $id {
|
|
+ #[inline]
|
|
+ fn bitxor_assign(&mut self, other: Self) {
|
|
+ *self = *self ^ other;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ test_if!{
|
|
+ $test_tt:
|
|
+ paste::item! {
|
|
+ pub mod [<$id _ops_vector_bitwise>] {
|
|
+ use super::*;
|
|
+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ fn ops_vector_bitwise() {
|
|
+
|
|
+ let z = $id::splat(0 as $elem_ty);
|
|
+ let o = $id::splat(1 as $elem_ty);
|
|
+ let t = $id::splat(2 as $elem_ty);
|
|
+ let m = $id::splat(!z.extract(0));
|
|
+
|
|
+ // Not:
|
|
+ assert_eq!(!z, m);
|
|
+ assert_eq!(!m, z);
|
|
+
|
|
+ // BitAnd:
|
|
+ assert_eq!(o & o, o);
|
|
+ assert_eq!(o & z, z);
|
|
+ assert_eq!(z & o, z);
|
|
+ assert_eq!(z & z, z);
|
|
+
|
|
+ assert_eq!(t & t, t);
|
|
+ assert_eq!(t & o, z);
|
|
+ assert_eq!(o & t, z);
|
|
+
|
|
+ // BitOr:
|
|
+ assert_eq!(o | o, o);
|
|
+ assert_eq!(o | z, o);
|
|
+ assert_eq!(z | o, o);
|
|
+ assert_eq!(z | z, z);
|
|
+
|
|
+ assert_eq!(t | t, t);
|
|
+ assert_eq!(z | t, t);
|
|
+ assert_eq!(t | z, t);
|
|
+
|
|
+ // BitXOR:
|
|
+ assert_eq!(o ^ o, z);
|
|
+ assert_eq!(z ^ z, z);
|
|
+ assert_eq!(z ^ o, o);
|
|
+ assert_eq!(o ^ z, o);
|
|
+
|
|
+ assert_eq!(t ^ t, z);
|
|
+ assert_eq!(t ^ z, t);
|
|
+ assert_eq!(z ^ t, t);
|
|
+
|
|
+ {
|
|
+ // AndAssign:
|
|
+ let mut v = o;
|
|
+ v &= t;
|
|
+ assert_eq!(v, z);
|
|
+ }
|
|
+ {
|
|
+ // OrAssign:
|
|
+ let mut v = z;
|
|
+ v |= o;
|
|
+ assert_eq!(v, o);
|
|
+ }
|
|
+ {
|
|
+ // XORAssign:
|
|
+ let mut v = z;
|
|
+ v ^= o;
|
|
+ assert_eq!(v, o);
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+}
|
|
diff --git a/third_party/rust/packed_simd/src/api/ops/vector_float_min_max.rs b/third_party/rust/packed_simd/src/api/ops/vector_float_min_max.rs
|
|
new file mode 100644
|
|
index 000000000000..4126e87042f5
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/api/ops/vector_float_min_max.rs
|
|
@@ -0,0 +1,69 @@
|
|
+//! Vertical (lane-wise) vector `min` and `max` for floating-point vectors.
|
|
+
|
|
+macro_rules! impl_ops_vector_float_min_max {
|
|
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
|
|
+ impl $id {
|
|
+ /// Minimum of two vectors.
|
|
+ ///
|
|
+ /// Returns a new vector containing the minimum value of each of
|
|
+ /// the input vector lanes.
|
|
+ #[inline]
|
|
+ pub fn min(self, x: Self) -> Self {
|
|
+ use crate::llvm::simd_fmin;
|
|
+ unsafe { Simd(simd_fmin(self.0, x.0)) }
|
|
+ }
|
|
+
|
|
+ /// Maximum of two vectors.
|
|
+ ///
|
|
+ /// Returns a new vector containing the maximum value of each of
|
|
+ /// the input vector lanes.
|
|
+ #[inline]
|
|
+ pub fn max(self, x: Self) -> Self {
|
|
+ use crate::llvm::simd_fmax;
|
|
+ unsafe { Simd(simd_fmax(self.0, x.0)) }
|
|
+ }
|
|
+ }
|
|
+ test_if!{
|
|
+ $test_tt:
|
|
+ paste::item! {
|
|
+ pub mod [<$id _ops_vector_min_max>] {
|
|
+ use super::*;
|
|
+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ fn min_max() {
|
|
+ let n = crate::$elem_ty::NAN;
|
|
+ let o = $id::splat(1. as $elem_ty);
|
|
+ let t = $id::splat(2. as $elem_ty);
|
|
+
|
|
+ let mut m = o; // [1., 2., 1., 2., ...]
|
|
+ let mut on = o;
|
|
+ for i in 0..$id::lanes() {
|
|
+ if i % 2 == 0 {
|
|
+ m = m.replace(i, 2. as $elem_ty);
|
|
+ on = on.replace(i, n);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ assert_eq!(o.min(t), o);
|
|
+ assert_eq!(t.min(o), o);
|
|
+ assert_eq!(m.min(o), o);
|
|
+ assert_eq!(o.min(m), o);
|
|
+ assert_eq!(m.min(t), m);
|
|
+ assert_eq!(t.min(m), m);
|
|
+
|
|
+ assert_eq!(o.max(t), t);
|
|
+ assert_eq!(t.max(o), t);
|
|
+ assert_eq!(m.max(o), m);
|
|
+ assert_eq!(o.max(m), m);
|
|
+ assert_eq!(m.max(t), t);
|
|
+ assert_eq!(t.max(m), t);
|
|
+
|
|
+ assert_eq!(on.min(o), o);
|
|
+ assert_eq!(o.min(on), o);
|
|
+ assert_eq!(on.max(o), o);
|
|
+ assert_eq!(o.max(on), o);
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+}
|
|
diff --git a/third_party/rust/packed_simd/src/api/ops/vector_int_min_max.rs b/third_party/rust/packed_simd/src/api/ops/vector_int_min_max.rs
|
|
new file mode 100644
|
|
index 000000000000..36ea98e6bf32
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/api/ops/vector_int_min_max.rs
|
|
@@ -0,0 +1,57 @@
|
|
+//! Vertical (lane-wise) vector `min` and `max` for integer vectors.
|
|
+
|
|
+macro_rules! impl_ops_vector_int_min_max {
|
|
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
|
|
+ impl $id {
|
|
+ /// Minimum of two vectors.
|
|
+ ///
|
|
+ /// Returns a new vector containing the minimum value of each of
|
|
+ /// the input vector lanes.
|
|
+ #[inline]
|
|
+ pub fn min(self, x: Self) -> Self {
|
|
+ self.lt(x).select(self, x)
|
|
+ }
|
|
+
|
|
+ /// Maximum of two vectors.
|
|
+ ///
|
|
+ /// Returns a new vector containing the maximum value of each of
|
|
+ /// the input vector lanes.
|
|
+ #[inline]
|
|
+ pub fn max(self, x: Self) -> Self {
|
|
+ self.gt(x).select(self, x)
|
|
+ }
|
|
+ }
|
|
+ test_if!{$test_tt:
|
|
+ paste::item! {
|
|
+ pub mod [<$id _ops_vector_min_max>] {
|
|
+ use super::*;
|
|
+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ fn min_max() {
|
|
+ let o = $id::splat(1 as $elem_ty);
|
|
+ let t = $id::splat(2 as $elem_ty);
|
|
+
|
|
+ let mut m = o;
|
|
+ for i in 0..$id::lanes() {
|
|
+ if i % 2 == 0 {
|
|
+ m = m.replace(i, 2 as $elem_ty);
|
|
+ }
|
|
+ }
|
|
+ assert_eq!(o.min(t), o);
|
|
+ assert_eq!(t.min(o), o);
|
|
+ assert_eq!(m.min(o), o);
|
|
+ assert_eq!(o.min(m), o);
|
|
+ assert_eq!(m.min(t), m);
|
|
+ assert_eq!(t.min(m), m);
|
|
+
|
|
+ assert_eq!(o.max(t), t);
|
|
+ assert_eq!(t.max(o), t);
|
|
+ assert_eq!(m.max(o), m);
|
|
+ assert_eq!(o.max(m), m);
|
|
+ assert_eq!(m.max(t), t);
|
|
+ assert_eq!(t.max(m), t);
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+}
|
|
diff --git a/third_party/rust/packed_simd/src/api/ops/vector_mask_bitwise.rs b/third_party/rust/packed_simd/src/api/ops/vector_mask_bitwise.rs
|
|
new file mode 100644
|
|
index 000000000000..295fc1ca81c9
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/api/ops/vector_mask_bitwise.rs
|
|
@@ -0,0 +1,116 @@
|
|
+//! Vertical (lane-wise) vector-vector bitwise operations.
|
|
+
|
|
+macro_rules! impl_ops_vector_mask_bitwise {
|
|
+ (
|
|
+ [$elem_ty:ident; $elem_count:expr]:
|
|
+ $id:ident | $test_tt:tt |
|
|
+ ($true:expr, $false:expr)
|
|
+ ) => {
|
|
+ impl crate::ops::Not for $id {
|
|
+ type Output = Self;
|
|
+ #[inline]
|
|
+ fn not(self) -> Self {
|
|
+ Self::splat($true) ^ self
|
|
+ }
|
|
+ }
|
|
+ impl crate::ops::BitXor for $id {
|
|
+ type Output = Self;
|
|
+ #[inline]
|
|
+ fn bitxor(self, other: Self) -> Self {
|
|
+ use crate::llvm::simd_xor;
|
|
+ unsafe { Simd(simd_xor(self.0, other.0)) }
|
|
+ }
|
|
+ }
|
|
+ impl crate::ops::BitAnd for $id {
|
|
+ type Output = Self;
|
|
+ #[inline]
|
|
+ fn bitand(self, other: Self) -> Self {
|
|
+ use crate::llvm::simd_and;
|
|
+ unsafe { Simd(simd_and(self.0, other.0)) }
|
|
+ }
|
|
+ }
|
|
+ impl crate::ops::BitOr for $id {
|
|
+ type Output = Self;
|
|
+ #[inline]
|
|
+ fn bitor(self, other: Self) -> Self {
|
|
+ use crate::llvm::simd_or;
|
|
+ unsafe { Simd(simd_or(self.0, other.0)) }
|
|
+ }
|
|
+ }
|
|
+ impl crate::ops::BitAndAssign for $id {
|
|
+ #[inline]
|
|
+ fn bitand_assign(&mut self, other: Self) {
|
|
+ *self = *self & other;
|
|
+ }
|
|
+ }
|
|
+ impl crate::ops::BitOrAssign for $id {
|
|
+ #[inline]
|
|
+ fn bitor_assign(&mut self, other: Self) {
|
|
+ *self = *self | other;
|
|
+ }
|
|
+ }
|
|
+ impl crate::ops::BitXorAssign for $id {
|
|
+ #[inline]
|
|
+ fn bitxor_assign(&mut self, other: Self) {
|
|
+ *self = *self ^ other;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ test_if!{
|
|
+ $test_tt:
|
|
+ paste::item! {
|
|
+ pub mod [<$id _ops_vector_mask_bitwise>] {
|
|
+ use super::*;
|
|
+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ fn ops_vector_mask_bitwise() {
|
|
+ let t = $id::splat(true);
|
|
+ let f = $id::splat(false);
|
|
+ assert!(t != f);
|
|
+ assert!(!(t == f));
|
|
+
|
|
+ // Not:
|
|
+ assert_eq!(!t, f);
|
|
+ assert_eq!(t, !f);
|
|
+
|
|
+ // BitAnd:
|
|
+ assert_eq!(t & f, f);
|
|
+ assert_eq!(f & t, f);
|
|
+ assert_eq!(t & t, t);
|
|
+ assert_eq!(f & f, f);
|
|
+
|
|
+ // BitOr:
|
|
+ assert_eq!(t | f, t);
|
|
+ assert_eq!(f | t, t);
|
|
+ assert_eq!(t | t, t);
|
|
+ assert_eq!(f | f, f);
|
|
+
|
|
+ // BitXOR:
|
|
+ assert_eq!(t ^ f, t);
|
|
+ assert_eq!(f ^ t, t);
|
|
+ assert_eq!(t ^ t, f);
|
|
+ assert_eq!(f ^ f, f);
|
|
+
|
|
+ {
|
|
+ // AndAssign:
|
|
+ let mut v = f;
|
|
+ v &= t;
|
|
+ assert_eq!(v, f);
|
|
+ }
|
|
+ {
|
|
+ // OrAssign:
|
|
+ let mut v = f;
|
|
+ v |= t;
|
|
+ assert_eq!(v, t);
|
|
+ }
|
|
+ {
|
|
+ // XORAssign:
|
|
+ let mut v = f;
|
|
+ v ^= t;
|
|
+ assert_eq!(v, t);
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+}
|
|
diff --git a/third_party/rust/packed_simd/src/api/ops/vector_neg.rs b/third_party/rust/packed_simd/src/api/ops/vector_neg.rs
|
|
new file mode 100644
|
|
index 000000000000..e2d91fd2fed6
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/api/ops/vector_neg.rs
|
|
@@ -0,0 +1,43 @@
|
|
+//! Vertical (lane-wise) vector `Neg`.
|
|
+
|
|
+macro_rules! impl_ops_vector_neg {
|
|
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
|
|
+ impl crate::ops::Neg for $id {
|
|
+ type Output = Self;
|
|
+ #[inline]
|
|
+ fn neg(self) -> Self {
|
|
+ Self::splat(-1 as $elem_ty) * self
|
|
+ }
|
|
+ }
|
|
+ test_if!{
|
|
+ $test_tt:
|
|
+ paste::item! {
|
|
+ pub mod [<$id _ops_vector_neg>] {
|
|
+ use super::*;
|
|
+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ fn neg() {
|
|
+ let z = $id::splat(0 as $elem_ty);
|
|
+ let o = $id::splat(1 as $elem_ty);
|
|
+ let t = $id::splat(2 as $elem_ty);
|
|
+ let f = $id::splat(4 as $elem_ty);
|
|
+
|
|
+ let nz = $id::splat(-(0 as $elem_ty));
|
|
+ let no = $id::splat(-(1 as $elem_ty));
|
|
+ let nt = $id::splat(-(2 as $elem_ty));
|
|
+ let nf = $id::splat(-(4 as $elem_ty));
|
|
+
|
|
+ assert_eq!(-z, nz);
|
|
+ assert_eq!(-o, no);
|
|
+ assert_eq!(-t, nt);
|
|
+ assert_eq!(-f, nf);
|
|
+
|
|
+ assert_eq!(z, -nz);
|
|
+ assert_eq!(o, -no);
|
|
+ assert_eq!(t, -nt);
|
|
+ assert_eq!(f, -nf);
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+}
|
|
diff --git a/third_party/rust/packed_simd/src/api/ops/vector_rotates.rs b/third_party/rust/packed_simd/src/api/ops/vector_rotates.rs
|
|
new file mode 100644
|
|
index 000000000000..6c794ecf4b93
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/api/ops/vector_rotates.rs
|
|
@@ -0,0 +1,90 @@
|
|
+//! Vertical (lane-wise) vector rotates operations.
|
|
+#![allow(unused)]
|
|
+
|
|
+macro_rules! impl_ops_vector_rotates {
|
|
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
|
|
+ impl $id {
|
|
+ /// Shifts the bits of each lane to the left by the specified
|
|
+ /// amount in the corresponding lane of `n`, wrapping the
|
|
+ /// truncated bits to the end of the resulting integer.
|
|
+ ///
|
|
+ /// Note: this is neither the same operation as `<<` nor equivalent
|
|
+ /// to `slice::rotate_left`.
|
|
+ #[inline]
|
|
+ pub fn rotate_left(self, n: $id) -> $id {
|
|
+ const LANE_WIDTH: $elem_ty =
|
|
+ crate::mem::size_of::<$elem_ty>() as $elem_ty * 8;
|
|
+ // Protect against undefined behavior for over-long bit shifts
|
|
+ let n = n % LANE_WIDTH;
|
|
+ (self << n) | (self >> ((LANE_WIDTH - n) % LANE_WIDTH))
|
|
+ }
|
|
+
|
|
+ /// Shifts the bits of each lane to the right by the specified
|
|
+ /// amount in the corresponding lane of `n`, wrapping the
|
|
+ /// truncated bits to the beginning of the resulting integer.
|
|
+ ///
|
|
+ /// Note: this is neither the same operation as `<<` nor equivalent
|
|
+ /// to `slice::rotate_left`.
|
|
+ #[inline]
|
|
+ pub fn rotate_right(self, n: $id) -> $id {
|
|
+ const LANE_WIDTH: $elem_ty =
|
|
+ crate::mem::size_of::<$elem_ty>() as $elem_ty * 8;
|
|
+ // Protect against undefined behavior for over-long bit shifts
|
|
+ let n = n % LANE_WIDTH;
|
|
+ (self >> n) | (self << ((LANE_WIDTH - n) % LANE_WIDTH))
|
|
+ }
|
|
+ }
|
|
+
|
|
+ test_if!{
|
|
+ $test_tt:
|
|
+ paste::item! {
|
|
+ // FIXME:
|
|
+ // https://github.com/rust-lang-nursery/packed_simd/issues/75
|
|
+ #[cfg(not(any(
|
|
+ target_arch = "s390x",
|
|
+ target_arch = "sparc64",
|
|
+ )))]
|
|
+ pub mod [<$id _ops_vector_rotate>] {
|
|
+ use super::*;
|
|
+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ fn rotate_ops() {
|
|
+ let z = $id::splat(0 as $elem_ty);
|
|
+ let o = $id::splat(1 as $elem_ty);
|
|
+ let t = $id::splat(2 as $elem_ty);
|
|
+ let f = $id::splat(4 as $elem_ty);
|
|
+
|
|
+ let max = $id::splat(
|
|
+ (mem::size_of::<$elem_ty>() * 8 - 1) as $elem_ty);
|
|
+
|
|
+ // rotate_right
|
|
+ assert_eq!(z.rotate_right(z), z);
|
|
+ assert_eq!(z.rotate_right(o), z);
|
|
+ assert_eq!(z.rotate_right(t), z);
|
|
+
|
|
+ assert_eq!(o.rotate_right(z), o);
|
|
+ assert_eq!(t.rotate_right(z), t);
|
|
+ assert_eq!(f.rotate_right(z), f);
|
|
+ assert_eq!(f.rotate_right(max), f << 1);
|
|
+
|
|
+ assert_eq!(o.rotate_right(o), o << max);
|
|
+ assert_eq!(t.rotate_right(o), o);
|
|
+ assert_eq!(t.rotate_right(t), o << max);
|
|
+ assert_eq!(f.rotate_right(o), t);
|
|
+ assert_eq!(f.rotate_right(t), o);
|
|
+
|
|
+ // rotate_left
|
|
+ assert_eq!(z.rotate_left(z), z);
|
|
+ assert_eq!(o.rotate_left(z), o);
|
|
+ assert_eq!(t.rotate_left(z), t);
|
|
+ assert_eq!(f.rotate_left(z), f);
|
|
+ assert_eq!(f.rotate_left(max), t);
|
|
+
|
|
+ assert_eq!(o.rotate_left(o), t);
|
|
+ assert_eq!(o.rotate_left(t), f);
|
|
+ assert_eq!(t.rotate_left(o), f);
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+}
|
|
diff --git a/third_party/rust/packed_simd/src/api/ops/vector_shifts.rs b/third_party/rust/packed_simd/src/api/ops/vector_shifts.rs
|
|
new file mode 100644
|
|
index 000000000000..22e1fbc0ec76
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/api/ops/vector_shifts.rs
|
|
@@ -0,0 +1,107 @@
|
|
+//! Vertical (lane-wise) vector-vector shifts operations.
|
|
+
|
|
+macro_rules! impl_ops_vector_shifts {
|
|
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
|
|
+ impl crate::ops::Shl<$id> for $id {
|
|
+ type Output = Self;
|
|
+ #[inline]
|
|
+ fn shl(self, other: Self) -> Self {
|
|
+ use crate::llvm::simd_shl;
|
|
+ unsafe { Simd(simd_shl(self.0, other.0)) }
|
|
+ }
|
|
+ }
|
|
+ impl crate::ops::Shr<$id> for $id {
|
|
+ type Output = Self;
|
|
+ #[inline]
|
|
+ fn shr(self, other: Self) -> Self {
|
|
+ use crate::llvm::simd_shr;
|
|
+ unsafe { Simd(simd_shr(self.0, other.0)) }
|
|
+ }
|
|
+ }
|
|
+ impl crate::ops::ShlAssign<$id> for $id {
|
|
+ #[inline]
|
|
+ fn shl_assign(&mut self, other: Self) {
|
|
+ *self = *self << other;
|
|
+ }
|
|
+ }
|
|
+ impl crate::ops::ShrAssign<$id> for $id {
|
|
+ #[inline]
|
|
+ fn shr_assign(&mut self, other: Self) {
|
|
+ *self = *self >> other;
|
|
+ }
|
|
+ }
|
|
+ test_if!{
|
|
+ $test_tt:
|
|
+ paste::item! {
|
|
+ pub mod [<$id _ops_vector_shifts>] {
|
|
+ use super::*;
|
|
+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ #[cfg_attr(any(target_arch = "s390x", target_arch = "sparc64"),
|
|
+ allow(unreachable_code,
|
|
+ unused_variables,
|
|
+ unused_mut)
|
|
+ )]
|
|
+ // ^^^ FIXME: https://github.com/rust-lang/rust/issues/55344
|
|
+ fn ops_vector_shifts() {
|
|
+ let z = $id::splat(0 as $elem_ty);
|
|
+ let o = $id::splat(1 as $elem_ty);
|
|
+ let t = $id::splat(2 as $elem_ty);
|
|
+ let f = $id::splat(4 as $elem_ty);
|
|
+
|
|
+ let max =$id::splat(
|
|
+ (mem::size_of::<$elem_ty>() * 8 - 1) as $elem_ty
|
|
+ );
|
|
+
|
|
+ // shr
|
|
+ assert_eq!(z >> z, z);
|
|
+ assert_eq!(z >> o, z);
|
|
+ assert_eq!(z >> t, z);
|
|
+ assert_eq!(z >> t, z);
|
|
+
|
|
+ #[cfg(any(target_arch = "s390x", target_arch = "sparc64"))] {
|
|
+ // FIXME: rust produces bad codegen for shifts:
|
|
+ // https://github.com/rust-lang-nursery/packed_simd/issues/13
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ assert_eq!(o >> z, o);
|
|
+ assert_eq!(t >> z, t);
|
|
+ assert_eq!(f >> z, f);
|
|
+ assert_eq!(f >> max, z);
|
|
+
|
|
+ assert_eq!(o >> o, z);
|
|
+ assert_eq!(t >> o, o);
|
|
+ assert_eq!(t >> t, z);
|
|
+ assert_eq!(f >> o, t);
|
|
+ assert_eq!(f >> t, o);
|
|
+ assert_eq!(f >> max, z);
|
|
+
|
|
+ // shl
|
|
+ assert_eq!(z << z, z);
|
|
+ assert_eq!(o << z, o);
|
|
+ assert_eq!(t << z, t);
|
|
+ assert_eq!(f << z, f);
|
|
+ assert_eq!(f << max, z);
|
|
+
|
|
+ assert_eq!(o << o, t);
|
|
+ assert_eq!(o << t, f);
|
|
+ assert_eq!(t << o, f);
|
|
+
|
|
+ {
|
|
+ // shr_assign
|
|
+ let mut v = o;
|
|
+ v >>= o;
|
|
+ assert_eq!(v, z);
|
|
+ }
|
|
+ {
|
|
+ // shl_assign
|
|
+ let mut v = o;
|
|
+ v <<= o;
|
|
+ assert_eq!(v, t);
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+}
|
|
diff --git a/third_party/rust/packed_simd/src/api/ptr.rs b/third_party/rust/packed_simd/src/api/ptr.rs
|
|
new file mode 100644
|
|
index 000000000000..d2e523a49faf
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/api/ptr.rs
|
|
@@ -0,0 +1,4 @@
|
|
+//! Vector of pointers
|
|
+
|
|
+#[macro_use]
|
|
+mod gather_scatter;
|
|
diff --git a/third_party/rust/packed_simd/src/api/ptr/gather_scatter.rs b/third_party/rust/packed_simd/src/api/ptr/gather_scatter.rs
|
|
new file mode 100644
|
|
index 000000000000..9d8e113bb44f
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/api/ptr/gather_scatter.rs
|
|
@@ -0,0 +1,241 @@
|
|
+//! Implements masked gather and scatters for vectors of pointers
|
|
+
|
|
+macro_rules! impl_ptr_read {
|
|
+ ([$elem_ty:ty; $elem_count:expr]: $id:ident, $mask_ty:ident
|
|
+ | $test_tt:tt) => {
|
|
+ impl<T> $id<T>
|
|
+ where
|
|
+ [T; $elem_count]: sealed::SimdArray,
|
|
+ {
|
|
+ /// Reads selected vector elements from memory.
|
|
+ ///
|
|
+ /// Instantiates a new vector by reading the values from `self` for
|
|
+ /// those lanes whose `mask` is `true`, and using the elements of
|
|
+ /// `value` otherwise.
|
|
+ ///
|
|
+ /// No memory is accessed for those lanes of `self` whose `mask` is
|
|
+ /// `false`.
|
|
+ ///
|
|
+ /// # Safety
|
|
+ ///
|
|
+ /// This method is unsafe because it dereferences raw pointers. The
|
|
+ /// pointers must be aligned to `mem::align_of::<T>()`.
|
|
+ #[inline]
|
|
+ pub unsafe fn read<M>(
|
|
+ self, mask: Simd<[M; $elem_count]>,
|
|
+ value: Simd<[T; $elem_count]>,
|
|
+ ) -> Simd<[T; $elem_count]>
|
|
+ where
|
|
+ M: sealed::Mask,
|
|
+ [M; $elem_count]: sealed::SimdArray,
|
|
+ {
|
|
+ use crate::llvm::simd_gather;
|
|
+ Simd(simd_gather(value.0, self.0, mask.0))
|
|
+ }
|
|
+ }
|
|
+
|
|
+ test_if! {
|
|
+ $test_tt:
|
|
+ paste::item! {
|
|
+ mod [<$id _read>] {
|
|
+ use super::*;
|
|
+ #[test]
|
|
+ fn read() {
|
|
+ let mut v = [0_i32; $elem_count];
|
|
+ for i in 0..$elem_count {
|
|
+ v[i] = i as i32;
|
|
+ }
|
|
+
|
|
+ let mut ptr = $id::<i32>::null();
|
|
+
|
|
+ for i in 0..$elem_count {
|
|
+ ptr = ptr.replace(i, unsafe {
|
|
+ crate::mem::transmute(&v[i] as *const i32)
|
|
+ });
|
|
+ }
|
|
+
|
|
+ // all mask elements are true:
|
|
+ let mask = $mask_ty::splat(true);
|
|
+ let def = Simd::<[i32; $elem_count]>::splat(42_i32);
|
|
+ let r: Simd<[i32; $elem_count]> = unsafe {
|
|
+ ptr.read(mask, def)
|
|
+ };
|
|
+ assert_eq!(
|
|
+ r,
|
|
+ Simd::<[i32; $elem_count]>::from_slice_unaligned(
|
|
+ &v
|
|
+ )
|
|
+ );
|
|
+
|
|
+ let mut mask = mask;
|
|
+ for i in 0..$elem_count {
|
|
+ if i % 2 != 0 {
|
|
+ mask = mask.replace(i, false);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ // even mask elements are true, odd ones are false:
|
|
+ let r: Simd<[i32; $elem_count]> = unsafe {
|
|
+ ptr.read(mask, def)
|
|
+ };
|
|
+ let mut e = v;
|
|
+ for i in 0..$elem_count {
|
|
+ if i % 2 != 0 {
|
|
+ e[i] = 42;
|
|
+ }
|
|
+ }
|
|
+ assert_eq!(
|
|
+ r,
|
|
+ Simd::<[i32; $elem_count]>::from_slice_unaligned(
|
|
+ &e
|
|
+ )
|
|
+ );
|
|
+
|
|
+ // all mask elements are false:
|
|
+ let mask = $mask_ty::splat(false);
|
|
+ let def = Simd::<[i32; $elem_count]>::splat(42_i32);
|
|
+ let r: Simd<[i32; $elem_count]> = unsafe {
|
|
+ ptr.read(mask, def) }
|
|
+ ;
|
|
+ assert_eq!(r, def);
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+}
|
|
+
|
|
+macro_rules! impl_ptr_write {
|
|
+ ([$elem_ty:ty; $elem_count:expr]: $id:ident, $mask_ty:ident
|
|
+ | $test_tt:tt) => {
|
|
+ impl<T> $id<T>
|
|
+ where
|
|
+ [T; $elem_count]: sealed::SimdArray,
|
|
+ {
|
|
+ /// Writes selected vector elements to memory.
|
|
+ ///
|
|
+ /// Writes the lanes of `values` for which the mask is `true` to
|
|
+ /// their corresponding memory addresses in `self`.
|
|
+ ///
|
|
+ /// No memory is accessed for those lanes of `self` whose `mask` is
|
|
+ /// `false`.
|
|
+ ///
|
|
+ /// Overlapping memory addresses of `self` are written to in order
|
|
+ /// from the lest-significant to the most-significant element.
|
|
+ ///
|
|
+ /// # Safety
|
|
+ ///
|
|
+ /// This method is unsafe because it dereferences raw pointers. The
|
|
+ /// pointers must be aligned to `mem::align_of::<T>()`.
|
|
+ #[inline]
|
|
+ pub unsafe fn write<M>(
|
|
+ self, mask: Simd<[M; $elem_count]>,
|
|
+ value: Simd<[T; $elem_count]>,
|
|
+ ) where
|
|
+ M: sealed::Mask,
|
|
+ [M; $elem_count]: sealed::SimdArray,
|
|
+ {
|
|
+ // FIXME:
|
|
+ // https://github.com/rust-lang-nursery/packed_simd/issues/85
|
|
+ #[cfg(not(target_arch = "mips"))]
|
|
+ {
|
|
+ use crate::llvm::simd_scatter;
|
|
+ simd_scatter(value.0, self.0, mask.0)
|
|
+ }
|
|
+ #[cfg(target_arch = "mips")]
|
|
+ {
|
|
+ let m_ptr =
|
|
+ &mask as *const Simd<[M; $elem_count]> as *const M;
|
|
+ for i in 0..$elem_count {
|
|
+ let m = ptr::read(m_ptr.add(i));
|
|
+ if m.test() {
|
|
+ let t_ptr = &self
|
|
+ as *const Simd<[*mut T; $elem_count]>
|
|
+ as *mut *mut T;
|
|
+ let v_ptr = &value as *const Simd<[T; $elem_count]>
|
|
+ as *const T;
|
|
+ ptr::write(
|
|
+ ptr::read(t_ptr.add(i)),
|
|
+ ptr::read(v_ptr.add(i)),
|
|
+ );
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ test_if! {
|
|
+ $test_tt:
|
|
+ paste::item! {
|
|
+ mod [<$id _write>] {
|
|
+ use super::*;
|
|
+ #[test]
|
|
+ fn write() {
|
|
+ // fourty_two = [42, 42, 42, ...]
|
|
+ let fourty_two
|
|
+ = Simd::<[i32; $elem_count]>::splat(42_i32);
|
|
+
|
|
+ // This test will write to this array
|
|
+ let mut arr = [0_i32; $elem_count];
|
|
+ for i in 0..$elem_count {
|
|
+ arr[i] = i as i32;
|
|
+ }
|
|
+ // arr = [0, 1, 2, ...]
|
|
+
|
|
+ let mut ptr = $id::<i32>::null();
|
|
+ for i in 0..$elem_count {
|
|
+ ptr = ptr.replace(i, unsafe {
|
|
+ crate::mem::transmute(arr.as_ptr().add(i))
|
|
+ });
|
|
+ }
|
|
+ // ptr = [&arr[0], &arr[1], ...]
|
|
+
|
|
+ // write `fourty_two` to all elements of `v`
|
|
+ {
|
|
+ let backup = arr;
|
|
+ unsafe {
|
|
+ ptr.write($mask_ty::splat(true), fourty_two)
|
|
+ };
|
|
+ assert_eq!(arr, [42_i32; $elem_count]);
|
|
+ arr = backup; // arr = [0, 1, 2, ...]
|
|
+ }
|
|
+
|
|
+ // write 42 to even elements of arr:
|
|
+ {
|
|
+ // set odd elements of the mask to false
|
|
+ let mut mask = $mask_ty::splat(true);
|
|
+ for i in 0..$elem_count {
|
|
+ if i % 2 != 0 {
|
|
+ mask = mask.replace(i, false);
|
|
+ }
|
|
+ }
|
|
+ // mask = [true, false, true, false, ...]
|
|
+
|
|
+ // expected result r = [42, 1, 42, 3, 42, 5, ...]
|
|
+ let mut r = arr;
|
|
+ for i in 0..$elem_count {
|
|
+ if i % 2 == 0 {
|
|
+ r[i] = 42;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ let backup = arr;
|
|
+ unsafe { ptr.write(mask, fourty_two) };
|
|
+ assert_eq!(arr, r);
|
|
+ arr = backup; // arr = [0, 1, 2, 3, ...]
|
|
+ }
|
|
+
|
|
+ // write 42 to no elements of arr
|
|
+ {
|
|
+ let backup = arr;
|
|
+ unsafe {
|
|
+ ptr.write($mask_ty::splat(false), fourty_two)
|
|
+ };
|
|
+ assert_eq!(arr, backup);
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+}
|
|
diff --git a/third_party/rust/packed_simd/src/api/reductions.rs b/third_party/rust/packed_simd/src/api/reductions.rs
|
|
new file mode 100644
|
|
index 000000000000..54d2f0cc7f08
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/api/reductions.rs
|
|
@@ -0,0 +1,12 @@
|
|
+//! Reductions
|
|
+
|
|
+#[macro_use]
|
|
+mod float_arithmetic;
|
|
+#[macro_use]
|
|
+mod integer_arithmetic;
|
|
+#[macro_use]
|
|
+mod bitwise;
|
|
+#[macro_use]
|
|
+mod mask;
|
|
+#[macro_use]
|
|
+mod min_max;
|
|
diff --git a/third_party/rust/packed_simd/src/api/reductions/bitwise.rs b/third_party/rust/packed_simd/src/api/reductions/bitwise.rs
|
|
new file mode 100644
|
|
index 000000000000..5bad4f474b16
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/api/reductions/bitwise.rs
|
|
@@ -0,0 +1,151 @@
|
|
+//! Implements portable horizontal bitwise vector reductions.
|
|
+#![allow(unused)]
|
|
+
|
|
+macro_rules! impl_reduction_bitwise {
|
|
+ (
|
|
+ [$elem_ty:ident; $elem_count:expr]:
|
|
+ $id:ident | $ielem_ty:ident | $test_tt:tt |
|
|
+ ($convert:expr) |
|
|
+ ($true:expr, $false:expr)
|
|
+ ) => {
|
|
+ impl $id {
|
|
+ /// Lane-wise bitwise `and` of the vector elements.
|
|
+ ///
|
|
+ /// Note: if the vector has one lane, the first element of the
|
|
+ /// vector is returned.
|
|
+ #[inline]
|
|
+ pub fn and(self) -> $elem_ty {
|
|
+ #[cfg(not(target_arch = "aarch64"))]
|
|
+ {
|
|
+ use crate::llvm::simd_reduce_and;
|
|
+ let r: $ielem_ty = unsafe { simd_reduce_and(self.0) };
|
|
+ $convert(r)
|
|
+ }
|
|
+ #[cfg(target_arch = "aarch64")]
|
|
+ {
|
|
+ // FIXME: broken on aarch64
|
|
+ // https://github.com/rust-lang-nursery/packed_simd/issues/15
|
|
+ let mut x = self.extract(0) as $elem_ty;
|
|
+ for i in 1..$id::lanes() {
|
|
+ x &= self.extract(i) as $elem_ty;
|
|
+ }
|
|
+ x
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /// Lane-wise bitwise `or` of the vector elements.
|
|
+ ///
|
|
+ /// Note: if the vector has one lane, the first element of the
|
|
+ /// vector is returned.
|
|
+ #[inline]
|
|
+ pub fn or(self) -> $elem_ty {
|
|
+ #[cfg(not(target_arch = "aarch64"))]
|
|
+ {
|
|
+ use crate::llvm::simd_reduce_or;
|
|
+ let r: $ielem_ty = unsafe { simd_reduce_or(self.0) };
|
|
+ $convert(r)
|
|
+ }
|
|
+ #[cfg(target_arch = "aarch64")]
|
|
+ {
|
|
+ // FIXME: broken on aarch64
|
|
+ // https://github.com/rust-lang-nursery/packed_simd/issues/15
|
|
+ let mut x = self.extract(0) as $elem_ty;
|
|
+ for i in 1..$id::lanes() {
|
|
+ x |= self.extract(i) as $elem_ty;
|
|
+ }
|
|
+ x
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /// Lane-wise bitwise `xor` of the vector elements.
|
|
+ ///
|
|
+ /// Note: if the vector has one lane, the first element of the
|
|
+ /// vector is returned.
|
|
+ #[inline]
|
|
+ pub fn xor(self) -> $elem_ty {
|
|
+ #[cfg(not(target_arch = "aarch64"))]
|
|
+ {
|
|
+ use crate::llvm::simd_reduce_xor;
|
|
+ let r: $ielem_ty = unsafe { simd_reduce_xor(self.0) };
|
|
+ $convert(r)
|
|
+ }
|
|
+ #[cfg(target_arch = "aarch64")]
|
|
+ {
|
|
+ // FIXME: broken on aarch64
|
|
+ // https://github.com/rust-lang-nursery/packed_simd/issues/15
|
|
+ let mut x = self.extract(0) as $elem_ty;
|
|
+ for i in 1..$id::lanes() {
|
|
+ x ^= self.extract(i) as $elem_ty;
|
|
+ }
|
|
+ x
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ test_if!{
|
|
+ $test_tt:
|
|
+ paste::item! {
|
|
+ pub mod [<$id _reduction_bitwise>] {
|
|
+ use super::*;
|
|
+
|
|
+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ fn and() {
|
|
+ let v = $id::splat($false);
|
|
+ assert_eq!(v.and(), $false);
|
|
+ let v = $id::splat($true);
|
|
+ assert_eq!(v.and(), $true);
|
|
+ let v = $id::splat($false);
|
|
+ let v = v.replace(0, $true);
|
|
+ if $id::lanes() > 1 {
|
|
+ assert_eq!(v.and(), $false);
|
|
+ } else {
|
|
+ assert_eq!(v.and(), $true);
|
|
+ }
|
|
+ let v = $id::splat($true);
|
|
+ let v = v.replace(0, $false);
|
|
+ assert_eq!(v.and(), $false);
|
|
+
|
|
+ }
|
|
+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ fn or() {
|
|
+ let v = $id::splat($false);
|
|
+ assert_eq!(v.or(), $false);
|
|
+ let v = $id::splat($true);
|
|
+ assert_eq!(v.or(), $true);
|
|
+ let v = $id::splat($false);
|
|
+ let v = v.replace(0, $true);
|
|
+ assert_eq!(v.or(), $true);
|
|
+ let v = $id::splat($true);
|
|
+ let v = v.replace(0, $false);
|
|
+ if $id::lanes() > 1 {
|
|
+ assert_eq!(v.or(), $true);
|
|
+ } else {
|
|
+ assert_eq!(v.or(), $false);
|
|
+ }
|
|
+ }
|
|
+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ fn xor() {
|
|
+ let v = $id::splat($false);
|
|
+ assert_eq!(v.xor(), $false);
|
|
+ let v = $id::splat($true);
|
|
+ if $id::lanes() > 1 {
|
|
+ assert_eq!(v.xor(), $false);
|
|
+ } else {
|
|
+ assert_eq!(v.xor(), $true);
|
|
+ }
|
|
+ let v = $id::splat($false);
|
|
+ let v = v.replace(0, $true);
|
|
+ assert_eq!(v.xor(), $true);
|
|
+ let v = $id::splat($true);
|
|
+ let v = v.replace(0, $false);
|
|
+ if $id::lanes() > 1 {
|
|
+ assert_eq!(v.xor(), $true);
|
|
+ } else {
|
|
+ assert_eq!(v.xor(), $false);
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+}
|
|
diff --git a/third_party/rust/packed_simd/src/api/reductions/float_arithmetic.rs b/third_party/rust/packed_simd/src/api/reductions/float_arithmetic.rs
|
|
new file mode 100644
|
|
index 000000000000..dd722ae25fdd
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/api/reductions/float_arithmetic.rs
|
|
@@ -0,0 +1,312 @@
|
|
+//! Implements portable horizontal float vector arithmetic reductions.
|
|
+
|
|
+macro_rules! impl_reduction_float_arithmetic {
|
|
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
|
|
+ impl $id {
|
|
+ /// Horizontal sum of the vector elements.
|
|
+ ///
|
|
+ /// The intrinsic performs a tree-reduction of the vector elements.
|
|
+ /// That is, for an 8 element vector:
|
|
+ ///
|
|
+ /// > ((x0 + x1) + (x2 + x3)) + ((x4 + x5) + (x6 + x7))
|
|
+ ///
|
|
+ /// If one of the vector element is `NaN` the reduction returns
|
|
+ /// `NaN`. The resulting `NaN` is not required to be equal to any
|
|
+ /// of the `NaN`s in the vector.
|
|
+ #[inline]
|
|
+ pub fn sum(self) -> $elem_ty {
|
|
+ #[cfg(not(target_arch = "aarch64"))]
|
|
+ {
|
|
+ use crate::llvm::simd_reduce_add_ordered;
|
|
+ unsafe { simd_reduce_add_ordered(self.0, 0 as $elem_ty) }
|
|
+ }
|
|
+ #[cfg(target_arch = "aarch64")]
|
|
+ {
|
|
+ // FIXME: broken on AArch64
|
|
+ // https://github.com/rust-lang-nursery/packed_simd/issues/15
|
|
+ let mut x = self.extract(0) as $elem_ty;
|
|
+ for i in 1..$id::lanes() {
|
|
+ x += self.extract(i) as $elem_ty;
|
|
+ }
|
|
+ x
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /// Horizontal product of the vector elements.
|
|
+ ///
|
|
+ /// The intrinsic performs a tree-reduction of the vector elements.
|
|
+ /// That is, for an 8 element vector:
|
|
+ ///
|
|
+ /// > ((x0 * x1) * (x2 * x3)) * ((x4 * x5) * (x6 * x7))
|
|
+ ///
|
|
+ /// If one of the vector element is `NaN` the reduction returns
|
|
+ /// `NaN`. The resulting `NaN` is not required to be equal to any
|
|
+ /// of the `NaN`s in the vector.
|
|
+ #[inline]
|
|
+ pub fn product(self) -> $elem_ty {
|
|
+ #[cfg(not(target_arch = "aarch64"))]
|
|
+ {
|
|
+ use crate::llvm::simd_reduce_mul_ordered;
|
|
+ unsafe { simd_reduce_mul_ordered(self.0, 1 as $elem_ty) }
|
|
+ }
|
|
+ #[cfg(target_arch = "aarch64")]
|
|
+ {
|
|
+ // FIXME: broken on AArch64
|
|
+ // https://github.com/rust-lang-nursery/packed_simd/issues/15
|
|
+ let mut x = self.extract(0) as $elem_ty;
|
|
+ for i in 1..$id::lanes() {
|
|
+ x *= self.extract(i) as $elem_ty;
|
|
+ }
|
|
+ x
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ impl crate::iter::Sum for $id {
|
|
+ #[inline]
|
|
+ fn sum<I: Iterator<Item = $id>>(iter: I) -> $id {
|
|
+ iter.fold($id::splat(0.), crate::ops::Add::add)
|
|
+ }
|
|
+ }
|
|
+
|
|
+ impl crate::iter::Product for $id {
|
|
+ #[inline]
|
|
+ fn product<I: Iterator<Item = $id>>(iter: I) -> $id {
|
|
+ iter.fold($id::splat(1.), crate::ops::Mul::mul)
|
|
+ }
|
|
+ }
|
|
+
|
|
+ impl<'a> crate::iter::Sum<&'a $id> for $id {
|
|
+ #[inline]
|
|
+ fn sum<I: Iterator<Item = &'a $id>>(iter: I) -> $id {
|
|
+ iter.fold($id::splat(0.), |a, b| crate::ops::Add::add(a, *b))
|
|
+ }
|
|
+ }
|
|
+
|
|
+ impl<'a> crate::iter::Product<&'a $id> for $id {
|
|
+ #[inline]
|
|
+ fn product<I: Iterator<Item = &'a $id>>(iter: I) -> $id {
|
|
+ iter.fold($id::splat(1.), |a, b| crate::ops::Mul::mul(a, *b))
|
|
+ }
|
|
+ }
|
|
+
|
|
+ test_if! {
|
|
+ $test_tt:
|
|
+ paste::item! {
|
|
+ pub mod [<$id _reduction_float_arith>] {
|
|
+ use super::*;
|
|
+ fn alternating(x: usize) -> $id {
|
|
+ let mut v = $id::splat(1 as $elem_ty);
|
|
+ for i in 0..$id::lanes() {
|
|
+ if i % x == 0 {
|
|
+ v = v.replace(i, 2 as $elem_ty);
|
|
+ }
|
|
+ }
|
|
+ v
|
|
+ }
|
|
+
|
|
+ #[cfg_attr(not(target_arch = "wasm32"), test)]
|
|
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ fn sum() {
|
|
+ let v = $id::splat(0 as $elem_ty);
|
|
+ assert_eq!(v.sum(), 0 as $elem_ty);
|
|
+ let v = $id::splat(1 as $elem_ty);
|
|
+ assert_eq!(v.sum(), $id::lanes() as $elem_ty);
|
|
+ let v = alternating(2);
|
|
+ assert_eq!(
|
|
+ v.sum(),
|
|
+ ($id::lanes() / 2 + $id::lanes()) as $elem_ty
|
|
+ );
|
|
+ }
|
|
+ #[cfg_attr(not(target_arch = "wasm32"), test)]
|
|
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ fn product() {
|
|
+ let v = $id::splat(0 as $elem_ty);
|
|
+ assert_eq!(v.product(), 0 as $elem_ty);
|
|
+ let v = $id::splat(1 as $elem_ty);
|
|
+ assert_eq!(v.product(), 1 as $elem_ty);
|
|
+ let f = match $id::lanes() {
|
|
+ 64 => 16,
|
|
+ 32 => 8,
|
|
+ 16 => 4,
|
|
+ _ => 2,
|
|
+ };
|
|
+ let v = alternating(f);
|
|
+ assert_eq!(
|
|
+ v.product(),
|
|
+ (2_usize.pow(($id::lanes() / f) as u32)
|
|
+ as $elem_ty)
|
|
+ );
|
|
+ }
|
|
+
|
|
+ #[cfg_attr(not(target_arch = "wasm32"), test)]
|
|
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ #[allow(unreachable_code)]
|
|
+ #[allow(unused_mut)]
|
|
+ // ^^^ FIXME: https://github.com/rust-lang/rust/issues/55344
|
|
+ fn sum_nan() {
|
|
+ // FIXME: https://bugs.llvm.org/show_bug.cgi?id=36732
|
|
+ // https://github.com/rust-lang-nursery/packed_simd/issues/6
|
|
+ return;
|
|
+
|
|
+ let n0 = crate::$elem_ty::NAN;
|
|
+ let v0 = $id::splat(-3.0);
|
|
+ for i in 0..$id::lanes() {
|
|
+ let mut v = v0.replace(i, n0);
|
|
+ // If the vector contains a NaN the result is NaN:
|
|
+ assert!(
|
|
+ v.sum().is_nan(),
|
|
+ "nan at {} => {} | {:?}",
|
|
+ i,
|
|
+ v.sum(),
|
|
+ v
|
|
+ );
|
|
+ for j in 0..i {
|
|
+ v = v.replace(j, n0);
|
|
+ assert!(v.sum().is_nan());
|
|
+ }
|
|
+ }
|
|
+ let v = $id::splat(n0);
|
|
+ assert!(v.sum().is_nan(), "all nans | {:?}", v);
|
|
+ }
|
|
+
|
|
+ #[cfg_attr(not(target_arch = "wasm32"), test)]
|
|
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ #[allow(unreachable_code)]
|
|
+ #[allow(unused_mut)]
|
|
+ // ^^^ FIXME: https://github.com/rust-lang/rust/issues/55344
|
|
+ fn product_nan() {
|
|
+ // FIXME: https://bugs.llvm.org/show_bug.cgi?id=36732
|
|
+ // https://github.com/rust-lang-nursery/packed_simd/issues/6
|
|
+ return;
|
|
+
|
|
+ let n0 = crate::$elem_ty::NAN;
|
|
+ let v0 = $id::splat(-3.0);
|
|
+ for i in 0..$id::lanes() {
|
|
+ let mut v = v0.replace(i, n0);
|
|
+ // If the vector contains a NaN the result is NaN:
|
|
+ assert!(
|
|
+ v.product().is_nan(),
|
|
+ "nan at {} => {} | {:?}",
|
|
+ i,
|
|
+ v.product(),
|
|
+ v
|
|
+ );
|
|
+ for j in 0..i {
|
|
+ v = v.replace(j, n0);
|
|
+ assert!(v.product().is_nan());
|
|
+ }
|
|
+ }
|
|
+ let v = $id::splat(n0);
|
|
+ assert!(v.product().is_nan(), "all nans | {:?}", v);
|
|
+ }
|
|
+
|
|
+ #[cfg_attr(not(target_arch = "wasm32"), test)]
|
|
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ #[allow(unused, dead_code)]
|
|
+ fn sum_roundoff() {
|
|
+ // Performs a tree-reduction
|
|
+ fn tree_reduce_sum(a: &[$elem_ty]) -> $elem_ty {
|
|
+ assert!(!a.is_empty());
|
|
+ if a.len() == 1 {
|
|
+ a[0]
|
|
+ } else if a.len() == 2 {
|
|
+ a[0] + a[1]
|
|
+ } else {
|
|
+ let mid = a.len() / 2;
|
|
+ let (left, right) = a.split_at(mid);
|
|
+ tree_reduce_sum(left) + tree_reduce_sum(right)
|
|
+ }
|
|
+ }
|
|
+
|
|
+ let mut start = crate::$elem_ty::EPSILON;
|
|
+ let mut scalar_reduction = 0. as $elem_ty;
|
|
+
|
|
+ let mut v = $id::splat(0. as $elem_ty);
|
|
+ for i in 0..$id::lanes() {
|
|
+ let c = if i % 2 == 0 { 1e3 } else { -1. };
|
|
+ start *= 3.14 * c;
|
|
+ scalar_reduction += start;
|
|
+ v = v.replace(i, start);
|
|
+ }
|
|
+ let simd_reduction = v.sum();
|
|
+
|
|
+ let mut a = [0. as $elem_ty; $id::lanes()];
|
|
+ v.write_to_slice_unaligned(&mut a);
|
|
+ let tree_reduction = tree_reduce_sum(&a);
|
|
+
|
|
+ // tolerate 1 ULP difference:
|
|
+ let red_bits = simd_reduction.to_bits();
|
|
+ let tree_bits = tree_reduction.to_bits();
|
|
+ assert!(
|
|
+ if red_bits > tree_bits {
|
|
+ red_bits - tree_bits
|
|
+ } else {
|
|
+ tree_bits - red_bits
|
|
+ } < 2,
|
|
+ "vector: {:?} | simd_reduction: {:?} | \
|
|
+ tree_reduction: {} | scalar_reduction: {}",
|
|
+ v,
|
|
+ simd_reduction,
|
|
+ tree_reduction,
|
|
+ scalar_reduction
|
|
+ );
|
|
+ }
|
|
+
|
|
+ #[cfg_attr(not(target_arch = "wasm32"), test)]
|
|
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ #[allow(unused, dead_code)]
|
|
+ fn product_roundoff() {
|
|
+ // Performs a tree-reduction
|
|
+ fn tree_reduce_product(a: &[$elem_ty]) -> $elem_ty {
|
|
+ assert!(!a.is_empty());
|
|
+ if a.len() == 1 {
|
|
+ a[0]
|
|
+ } else if a.len() == 2 {
|
|
+ a[0] * a[1]
|
|
+ } else {
|
|
+ let mid = a.len() / 2;
|
|
+ let (left, right) = a.split_at(mid);
|
|
+ tree_reduce_product(left)
|
|
+ * tree_reduce_product(right)
|
|
+ }
|
|
+ }
|
|
+
|
|
+ let mut start = crate::$elem_ty::EPSILON;
|
|
+ let mut scalar_reduction = 1. as $elem_ty;
|
|
+
|
|
+ let mut v = $id::splat(0. as $elem_ty);
|
|
+ for i in 0..$id::lanes() {
|
|
+ let c = if i % 2 == 0 { 1e3 } else { -1. };
|
|
+ start *= 3.14 * c;
|
|
+ scalar_reduction *= start;
|
|
+ v = v.replace(i, start);
|
|
+ }
|
|
+ let simd_reduction = v.product();
|
|
+
|
|
+ let mut a = [0. as $elem_ty; $id::lanes()];
|
|
+ v.write_to_slice_unaligned(&mut a);
|
|
+ let tree_reduction = tree_reduce_product(&a);
|
|
+
|
|
+ // tolerate 1 ULP difference:
|
|
+ let red_bits = simd_reduction.to_bits();
|
|
+ let tree_bits = tree_reduction.to_bits();
|
|
+ assert!(
|
|
+ if red_bits > tree_bits {
|
|
+ red_bits - tree_bits
|
|
+ } else {
|
|
+ tree_bits - red_bits
|
|
+ } < 2,
|
|
+ "vector: {:?} | simd_reduction: {:?} | \
|
|
+ tree_reduction: {} | scalar_reduction: {}",
|
|
+ v,
|
|
+ simd_reduction,
|
|
+ tree_reduction,
|
|
+ scalar_reduction
|
|
+ );
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+}
|
|
diff --git a/third_party/rust/packed_simd/src/api/reductions/integer_arithmetic.rs b/third_party/rust/packed_simd/src/api/reductions/integer_arithmetic.rs
|
|
new file mode 100644
|
|
index 000000000000..91dffad31032
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/api/reductions/integer_arithmetic.rs
|
|
@@ -0,0 +1,197 @@
|
|
+//! Implements portable horizontal integer vector arithmetic reductions.
|
|
+
|
|
+macro_rules! impl_reduction_integer_arithmetic {
|
|
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $ielem_ty:ident
|
|
+ | $test_tt:tt) => {
|
|
+ impl $id {
|
|
+ /// Horizontal wrapping sum of the vector elements.
|
|
+ ///
|
|
+ /// The intrinsic performs a tree-reduction of the vector elements.
|
|
+ /// That is, for an 8 element vector:
|
|
+ ///
|
|
+ /// > ((x0 + x1) + (x2 + x3)) + ((x4 + x5) + (x6 + x7))
|
|
+ ///
|
|
+ /// If an operation overflows it returns the mathematical result
|
|
+ /// modulo `2^n` where `n` is the number of times it overflows.
|
|
+ #[inline]
|
|
+ pub fn wrapping_sum(self) -> $elem_ty {
|
|
+ #[cfg(not(target_arch = "aarch64"))]
|
|
+ {
|
|
+ use crate::llvm::simd_reduce_add_ordered;
|
|
+ let v: $ielem_ty = unsafe {
|
|
+ simd_reduce_add_ordered(self.0, 0 as $ielem_ty)
|
|
+ };
|
|
+ v as $elem_ty
|
|
+ }
|
|
+ #[cfg(target_arch = "aarch64")]
|
|
+ {
|
|
+ // FIXME: broken on AArch64
|
|
+ // https://github.com/rust-lang-nursery/packed_simd/issues/15
|
|
+ let mut x = self.extract(0) as $elem_ty;
|
|
+ for i in 1..$id::lanes() {
|
|
+ x = x.wrapping_add(self.extract(i) as $elem_ty);
|
|
+ }
|
|
+ x
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /// Horizontal wrapping product of the vector elements.
|
|
+ ///
|
|
+ /// The intrinsic performs a tree-reduction of the vector elements.
|
|
+ /// That is, for an 8 element vector:
|
|
+ ///
|
|
+ /// > ((x0 * x1) * (x2 * x3)) * ((x4 * x5) * (x6 * x7))
|
|
+ ///
|
|
+ /// If an operation overflows it returns the mathematical result
|
|
+ /// modulo `2^n` where `n` is the number of times it overflows.
|
|
+ #[inline]
|
|
+ pub fn wrapping_product(self) -> $elem_ty {
|
|
+ #[cfg(not(target_arch = "aarch64"))]
|
|
+ {
|
|
+ use crate::llvm::simd_reduce_mul_ordered;
|
|
+ let v: $ielem_ty = unsafe {
|
|
+ simd_reduce_mul_ordered(self.0, 1 as $ielem_ty)
|
|
+ };
|
|
+ v as $elem_ty
|
|
+ }
|
|
+ #[cfg(target_arch = "aarch64")]
|
|
+ {
|
|
+ // FIXME: broken on AArch64
|
|
+ // https://github.com/rust-lang-nursery/packed_simd/issues/15
|
|
+ let mut x = self.extract(0) as $elem_ty;
|
|
+ for i in 1..$id::lanes() {
|
|
+ x = x.wrapping_mul(self.extract(i) as $elem_ty);
|
|
+ }
|
|
+ x
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ impl crate::iter::Sum for $id {
|
|
+ #[inline]
|
|
+ fn sum<I: Iterator<Item = $id>>(iter: I) -> $id {
|
|
+ iter.fold($id::splat(0), crate::ops::Add::add)
|
|
+ }
|
|
+ }
|
|
+
|
|
+ impl crate::iter::Product for $id {
|
|
+ #[inline]
|
|
+ fn product<I: Iterator<Item = $id>>(iter: I) -> $id {
|
|
+ iter.fold($id::splat(1), crate::ops::Mul::mul)
|
|
+ }
|
|
+ }
|
|
+
|
|
+ impl<'a> crate::iter::Sum<&'a $id> for $id {
|
|
+ #[inline]
|
|
+ fn sum<I: Iterator<Item = &'a $id>>(iter: I) -> $id {
|
|
+ iter.fold($id::splat(0), |a, b| crate::ops::Add::add(a, *b))
|
|
+ }
|
|
+ }
|
|
+
|
|
+ impl<'a> crate::iter::Product<&'a $id> for $id {
|
|
+ #[inline]
|
|
+ fn product<I: Iterator<Item = &'a $id>>(iter: I) -> $id {
|
|
+ iter.fold($id::splat(1), |a, b| crate::ops::Mul::mul(a, *b))
|
|
+ }
|
|
+ }
|
|
+
|
|
+ test_if! {
|
|
+ $test_tt:
|
|
+ paste::item! {
|
|
+ pub mod [<$id _reduction_int_arith>] {
|
|
+ use super::*;
|
|
+
|
|
+ fn alternating(x: usize) -> $id {
|
|
+ let mut v = $id::splat(1 as $elem_ty);
|
|
+ for i in 0..$id::lanes() {
|
|
+ if i % x == 0 {
|
|
+ v = v.replace(i, 2 as $elem_ty);
|
|
+ }
|
|
+ }
|
|
+ v
|
|
+ }
|
|
+
|
|
+ #[cfg_attr(not(target_arch = "wasm32"), test)]
|
|
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ fn wrapping_sum() {
|
|
+ let v = $id::splat(0 as $elem_ty);
|
|
+ assert_eq!(v.wrapping_sum(), 0 as $elem_ty);
|
|
+ let v = $id::splat(1 as $elem_ty);
|
|
+ assert_eq!(v.wrapping_sum(), $id::lanes() as $elem_ty);
|
|
+ let v = alternating(2);
|
|
+ if $id::lanes() > 1 {
|
|
+ assert_eq!(
|
|
+ v.wrapping_sum(),
|
|
+ ($id::lanes() / 2 + $id::lanes()) as $elem_ty
|
|
+ );
|
|
+ } else {
|
|
+ assert_eq!(
|
|
+ v.wrapping_sum(),
|
|
+ 2 as $elem_ty
|
|
+ );
|
|
+ }
|
|
+ }
|
|
+ #[cfg_attr(not(target_arch = "wasm32"), test)]
|
|
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ fn wrapping_sum_overflow() {
|
|
+ let start = $elem_ty::max_value()
|
|
+ - ($id::lanes() as $elem_ty / 2);
|
|
+
|
|
+ let v = $id::splat(start as $elem_ty);
|
|
+ let vwrapping_sum = v.wrapping_sum();
|
|
+
|
|
+ let mut wrapping_sum = start;
|
|
+ for _ in 1..$id::lanes() {
|
|
+ wrapping_sum = wrapping_sum.wrapping_add(start);
|
|
+ }
|
|
+ assert_eq!(wrapping_sum, vwrapping_sum, "v = {:?}", v);
|
|
+ }
|
|
+
|
|
+ #[cfg_attr(not(target_arch = "wasm32"), test)]
|
|
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ fn wrapping_product() {
|
|
+ let v = $id::splat(0 as $elem_ty);
|
|
+ assert_eq!(v.wrapping_product(), 0 as $elem_ty);
|
|
+ let v = $id::splat(1 as $elem_ty);
|
|
+ assert_eq!(v.wrapping_product(), 1 as $elem_ty);
|
|
+ let f = match $id::lanes() {
|
|
+ 64 => 16,
|
|
+ 32 => 8,
|
|
+ 16 => 4,
|
|
+ _ => 2,
|
|
+ };
|
|
+ let v = alternating(f);
|
|
+ if $id::lanes() > 1 {
|
|
+ assert_eq!(
|
|
+ v.wrapping_product(),
|
|
+ (2_usize.pow(($id::lanes() / f) as u32)
|
|
+ as $elem_ty)
|
|
+ );
|
|
+ } else {
|
|
+ assert_eq!(
|
|
+ v.wrapping_product(),
|
|
+ 2 as $elem_ty
|
|
+ );
|
|
+ }
|
|
+ }
|
|
+
|
|
+ #[cfg_attr(not(target_arch = "wasm32"), test)]
|
|
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ fn wrapping_product_overflow() {
|
|
+ let start = $elem_ty::max_value()
|
|
+ - ($id::lanes() as $elem_ty / 2);
|
|
+
|
|
+ let v = $id::splat(start as $elem_ty);
|
|
+ let vmul = v.wrapping_product();
|
|
+
|
|
+ let mut mul = start;
|
|
+ for _ in 1..$id::lanes() {
|
|
+ mul = mul.wrapping_mul(start);
|
|
+ }
|
|
+ assert_eq!(mul, vmul, "v = {:?}", v);
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+}
|
|
diff --git a/third_party/rust/packed_simd/src/api/reductions/mask.rs b/third_party/rust/packed_simd/src/api/reductions/mask.rs
|
|
new file mode 100644
|
|
index 000000000000..0dd6a84e7e8d
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/api/reductions/mask.rs
|
|
@@ -0,0 +1,89 @@
|
|
+//! Implements portable horizontal mask reductions.
|
|
+
|
|
+macro_rules! impl_reduction_mask {
|
|
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
|
|
+ impl $id {
|
|
+ /// Are `all` vector lanes `true`?
|
|
+ #[inline]
|
|
+ pub fn all(self) -> bool {
|
|
+ unsafe { crate::codegen::reductions::mask::All::all(self) }
|
|
+ }
|
|
+ /// Is `any` vector lane `true`?
|
|
+ #[inline]
|
|
+ pub fn any(self) -> bool {
|
|
+ unsafe { crate::codegen::reductions::mask::Any::any(self) }
|
|
+ }
|
|
+ /// Are `all` vector lanes `false`?
|
|
+ #[inline]
|
|
+ pub fn none(self) -> bool {
|
|
+ !self.any()
|
|
+ }
|
|
+ }
|
|
+
|
|
+ test_if! {
|
|
+ $test_tt:
|
|
+ paste::item! {
|
|
+ pub mod [<$id _reduction>] {
|
|
+ use super::*;
|
|
+ #[cfg_attr(not(target_arch = "wasm32"), test)]
|
|
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ fn all() {
|
|
+ let a = $id::splat(true);
|
|
+ assert!(a.all());
|
|
+ let a = $id::splat(false);
|
|
+ assert!(!a.all());
|
|
+
|
|
+ if $id::lanes() > 1 {
|
|
+ for i in 0..$id::lanes() {
|
|
+ let mut a = $id::splat(true);
|
|
+ a = a.replace(i, false);
|
|
+ assert!(!a.all());
|
|
+ let mut a = $id::splat(false);
|
|
+ a = a.replace(i, true);
|
|
+ assert!(!a.all());
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ #[cfg_attr(not(target_arch = "wasm32"), test)]
|
|
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ fn any() {
|
|
+ let a = $id::splat(true);
|
|
+ assert!(a.any());
|
|
+ let a = $id::splat(false);
|
|
+ assert!(!a.any());
|
|
+
|
|
+ if $id::lanes() > 1 {
|
|
+ for i in 0..$id::lanes() {
|
|
+ let mut a = $id::splat(true);
|
|
+ a = a.replace(i, false);
|
|
+ assert!(a.any());
|
|
+ let mut a = $id::splat(false);
|
|
+ a = a.replace(i, true);
|
|
+ assert!(a.any());
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ #[cfg_attr(not(target_arch = "wasm32"), test)]
|
|
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ fn none() {
|
|
+ let a = $id::splat(true);
|
|
+ assert!(!a.none());
|
|
+ let a = $id::splat(false);
|
|
+ assert!(a.none());
|
|
+
|
|
+ if $id::lanes() > 1 {
|
|
+ for i in 0..$id::lanes() {
|
|
+ let mut a = $id::splat(true);
|
|
+ a = a.replace(i, false);
|
|
+ assert!(!a.none());
|
|
+ let mut a = $id::splat(false);
|
|
+ a = a.replace(i, true);
|
|
+ assert!(!a.none());
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+}
|
|
diff --git a/third_party/rust/packed_simd/src/api/reductions/min_max.rs b/third_party/rust/packed_simd/src/api/reductions/min_max.rs
|
|
new file mode 100644
|
|
index 000000000000..c4d3aa10f15c
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/api/reductions/min_max.rs
|
|
@@ -0,0 +1,377 @@
|
|
+//! Implements portable horizontal vector min/max reductions.
|
|
+
|
|
+macro_rules! impl_reduction_min_max {
|
|
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident
|
|
+ | $ielem_ty:ident | $test_tt:tt) => {
|
|
+ impl $id {
|
|
+ /// Largest vector element value.
|
|
+ #[inline]
|
|
+ pub fn max_element(self) -> $elem_ty {
|
|
+ #[cfg(not(any(
|
|
+ target_arch = "aarch64",
|
|
+ target_arch = "arm",
|
|
+ target_arch = "powerpc64",
|
|
+ target_arch = "wasm32",
|
|
+ )))]
|
|
+ {
|
|
+ use crate::llvm::simd_reduce_max;
|
|
+ let v: $ielem_ty = unsafe { simd_reduce_max(self.0) };
|
|
+ v as $elem_ty
|
|
+ }
|
|
+ #[cfg(any(
|
|
+ target_arch = "aarch64",
|
|
+ target_arch = "arm",
|
|
+ target_arch = "powerpc64",
|
|
+ target_arch = "wasm32",
|
|
+ ))]
|
|
+ {
|
|
+ // FIXME: broken on AArch64
|
|
+ // https://github.com/rust-lang-nursery/packed_simd/issues/15
|
|
+ // FIXME: broken on WASM32
|
|
+ // https://github.com/rust-lang-nursery/packed_simd/issues/91
|
|
+ let mut x = self.extract(0);
|
|
+ for i in 1..$id::lanes() {
|
|
+ x = x.max(self.extract(i));
|
|
+ }
|
|
+ x
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /// Smallest vector element value.
|
|
+ #[inline]
|
|
+ pub fn min_element(self) -> $elem_ty {
|
|
+ #[cfg(not(any(
|
|
+ target_arch = "aarch64",
|
|
+ target_arch = "arm",
|
|
+ all(target_arch = "x86", not(target_feature = "sse2")),
|
|
+ target_arch = "powerpc64",
|
|
+ target_arch = "wasm32",
|
|
+ ),))]
|
|
+ {
|
|
+ use crate::llvm::simd_reduce_min;
|
|
+ let v: $ielem_ty = unsafe { simd_reduce_min(self.0) };
|
|
+ v as $elem_ty
|
|
+ }
|
|
+ #[cfg(any(
|
|
+ target_arch = "aarch64",
|
|
+ target_arch = "arm",
|
|
+ all(target_arch = "x86", not(target_feature = "sse2")),
|
|
+ target_arch = "powerpc64",
|
|
+ target_arch = "wasm32",
|
|
+ ))]
|
|
+ {
|
|
+ // FIXME: broken on AArch64
|
|
+ // https://github.com/rust-lang-nursery/packed_simd/issues/15
|
|
+ // FIXME: broken on i586-unknown-linux-gnu
|
|
+ // https://github.com/rust-lang-nursery/packed_simd/issues/22
|
|
+ // FIXME: broken on WASM32
|
|
+ // https://github.com/rust-lang-nursery/packed_simd/issues/91
|
|
+ let mut x = self.extract(0);
|
|
+ for i in 1..$id::lanes() {
|
|
+ x = x.min(self.extract(i));
|
|
+ }
|
|
+ x
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ test_if! {$test_tt:
|
|
+ paste::item! {
|
|
+ pub mod [<$id _reduction_min_max>] {
|
|
+ use super::*;
|
|
+ #[cfg_attr(not(target_arch = "wasm32"), test)]
|
|
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ pub fn max_element() {
|
|
+ let v = $id::splat(0 as $elem_ty);
|
|
+ assert_eq!(v.max_element(), 0 as $elem_ty);
|
|
+ if $id::lanes() > 1 {
|
|
+ let v = v.replace(1, 1 as $elem_ty);
|
|
+ assert_eq!(v.max_element(), 1 as $elem_ty);
|
|
+ }
|
|
+ let v = v.replace(0, 2 as $elem_ty);
|
|
+ assert_eq!(v.max_element(), 2 as $elem_ty);
|
|
+ }
|
|
+
|
|
+ #[cfg_attr(not(target_arch = "wasm32"), test)]
|
|
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ pub fn min_element() {
|
|
+ let v = $id::splat(0 as $elem_ty);
|
|
+ assert_eq!(v.min_element(), 0 as $elem_ty);
|
|
+ if $id::lanes() > 1 {
|
|
+ let v = v.replace(1, 1 as $elem_ty);
|
|
+ assert_eq!(v.min_element(), 0 as $elem_ty);
|
|
+ }
|
|
+ let v = $id::splat(1 as $elem_ty);
|
|
+ let v = v.replace(0, 2 as $elem_ty);
|
|
+ if $id::lanes() > 1 {
|
|
+ assert_eq!(v.min_element(), 1 as $elem_ty);
|
|
+ } else {
|
|
+ assert_eq!(v.min_element(), 2 as $elem_ty);
|
|
+ }
|
|
+ if $id::lanes() > 1 {
|
|
+ let v = $id::splat(2 as $elem_ty);
|
|
+ let v = v.replace(1, 1 as $elem_ty);
|
|
+ assert_eq!(v.min_element(), 1 as $elem_ty);
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+}
|
|
+
|
|
+macro_rules! test_reduction_float_min_max {
|
|
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
|
|
+ test_if!{
|
|
+ $test_tt:
|
|
+ paste::item! {
|
|
+ pub mod [<$id _reduction_min_max_nan>] {
|
|
+ use super::*;
|
|
+ #[cfg_attr(not(target_arch = "wasm32"), test)]
|
|
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ fn min_element_test() {
|
|
+ let n = crate::$elem_ty::NAN;
|
|
+
|
|
+ assert_eq!(n.min(-3.), -3.);
|
|
+ assert_eq!((-3. as $elem_ty).min(n), -3.);
|
|
+
|
|
+ let v0 = $id::splat(-3.);
|
|
+
|
|
+ let target_with_broken_last_lane_nan = !cfg!(any(
|
|
+ target_arch = "arm", target_arch = "aarch64",
|
|
+ all(target_arch = "x86",
|
|
+ not(target_feature = "sse2")
|
|
+ ),
|
|
+ target_arch = "powerpc64",
|
|
+ target_arch = "wasm32",
|
|
+ ));
|
|
+
|
|
+ // The vector is initialized to `-3.`s: [-3, -3, -3, -3]
|
|
+ for i in 0..$id::lanes() {
|
|
+ // We replace the i-th element of the vector with
|
|
+ // `NaN`: [-3, -3, -3, NaN]
|
|
+ let mut v = v0.replace(i, n);
|
|
+
|
|
+ // If the NaN is in the last place, the LLVM
|
|
+ // implementation of these methods is broken on some
|
|
+ // targets:
|
|
+ if i == $id::lanes() - 1 &&
|
|
+ target_with_broken_last_lane_nan {
|
|
+ // FIXME:
|
|
+ // https://github.com/rust-lang-nursery/packed_simd/issues/5
|
|
+ //
|
|
+ // If there is a NaN, the result should always
|
|
+ // the smallest element, but currently when the
|
|
+ // last element is NaN the current
|
|
+ // implementation incorrectly returns NaN.
|
|
+ //
|
|
+ // The targets mentioned above use different
|
|
+ // codegen that produces the correct result.
|
|
+ //
|
|
+ // These asserts detect if this behavior changes
|
|
+ assert!(v.min_element().is_nan(),
|
|
+ // FIXME: ^^^ should be -3.
|
|
+ "[A]: nan at {} => {} | {:?}",
|
|
+ i, v.min_element(), v);
|
|
+
|
|
+ // If we replace all the elements in the vector
|
|
+ // up-to the `i-th` lane with `NaN`s, the result
|
|
+ // is still always `-3.` unless all elements of
|
|
+ // the vector are `NaN`s:
|
|
+ //
|
|
+ // This is also broken:
|
|
+ for j in 0..i {
|
|
+ v = v.replace(j, n);
|
|
+ assert!(v.min_element().is_nan(),
|
|
+ // FIXME: ^^^ should be -3.
|
|
+ "[B]: nan at {} => {} | {:?}",
|
|
+ i, v.min_element(), v);
|
|
+ }
|
|
+
|
|
+ // We are done here, since we were in the last
|
|
+ // lane which is the last iteration of the loop.
|
|
+ break
|
|
+ }
|
|
+
|
|
+ // We are not in the last lane, and there is only
|
|
+ // one `NaN` in the vector.
|
|
+
|
|
+ // If the vector has one lane, the result is `NaN`:
|
|
+ if $id::lanes() == 1 {
|
|
+ assert!(v.min_element().is_nan(),
|
|
+ "[C]: all nans | v={:?} | min={} | \
|
|
+ is_nan: {}",
|
|
+ v, v.min_element(),
|
|
+ v.min_element().is_nan()
|
|
+ );
|
|
+
|
|
+ // And we are done, since the vector only has
|
|
+ // one lane anyways.
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ // The vector has more than one lane, since there is
|
|
+ // only one `NaN` in the vector, the result is
|
|
+ // always `-3`.
|
|
+ assert_eq!(v.min_element(), -3.,
|
|
+ "[D]: nan at {} => {} | {:?}",
|
|
+ i, v.min_element(), v);
|
|
+
|
|
+ // If we replace all the elements in the vector
|
|
+ // up-to the `i-th` lane with `NaN`s, the result is
|
|
+ // still always `-3.` unless all elements of the
|
|
+ // vector are `NaN`s:
|
|
+ for j in 0..i {
|
|
+ v = v.replace(j, n);
|
|
+
|
|
+ if i == $id::lanes() - 1 && j == i - 1 {
|
|
+ // All elements of the vector are `NaN`s,
|
|
+ // therefore the result is NaN as well.
|
|
+ //
|
|
+ // Note: the #lanes of the vector is > 1, so
|
|
+ // "i - 1" does not overflow.
|
|
+ assert!(v.min_element().is_nan(),
|
|
+ "[E]: all nans | v={:?} | min={} | \
|
|
+ is_nan: {}",
|
|
+ v, v.min_element(),
|
|
+ v.min_element().is_nan());
|
|
+ } else {
|
|
+ // There are non-`NaN` elements in the
|
|
+ // vector, therefore the result is `-3.`:
|
|
+ assert_eq!(v.min_element(), -3.,
|
|
+ "[F]: nan at {} => {} | {:?}",
|
|
+ i, v.min_element(), v);
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ // If the vector contains all NaNs the result is NaN:
|
|
+ assert!($id::splat(n).min_element().is_nan(),
|
|
+ "all nans | v={:?} | min={} | is_nan: {}",
|
|
+ $id::splat(n), $id::splat(n).min_element(),
|
|
+ $id::splat(n).min_element().is_nan());
|
|
+ }
|
|
+ #[cfg_attr(not(target_arch = "wasm32"), test)]
|
|
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ fn max_element_test() {
|
|
+ let n = crate::$elem_ty::NAN;
|
|
+
|
|
+ assert_eq!(n.max(-3.), -3.);
|
|
+ assert_eq!((-3. as $elem_ty).max(n), -3.);
|
|
+
|
|
+ let v0 = $id::splat(-3.);
|
|
+
|
|
+ let target_with_broken_last_lane_nan = !cfg!(any(
|
|
+ target_arch = "arm", target_arch = "aarch64",
|
|
+ target_arch = "powerpc64", target_arch = "wasm32",
|
|
+ ));
|
|
+
|
|
+ // The vector is initialized to `-3.`s: [-3, -3, -3, -3]
|
|
+ for i in 0..$id::lanes() {
|
|
+ // We replace the i-th element of the vector with
|
|
+ // `NaN`: [-3, -3, -3, NaN]
|
|
+ let mut v = v0.replace(i, n);
|
|
+
|
|
+ // If the NaN is in the last place, the LLVM
|
|
+ // implementation of these methods is broken on some
|
|
+ // targets:
|
|
+ if i == $id::lanes() - 1 &&
|
|
+ target_with_broken_last_lane_nan {
|
|
+ // FIXME:
|
|
+ // https://github.com/rust-lang-nursery/packed_simd/issues/5
|
|
+ //
|
|
+ // If there is a NaN, the result should
|
|
+ // always the largest element, but currently
|
|
+ // when the last element is NaN the current
|
|
+ // implementation incorrectly returns NaN.
|
|
+ //
|
|
+ // The targets mentioned above use different
|
|
+ // codegen that produces the correct result.
|
|
+ //
|
|
+ // These asserts detect if this behavior
|
|
+ // changes
|
|
+ assert!(v.max_element().is_nan(),
|
|
+ // FIXME: ^^^ should be -3.
|
|
+ "[A]: nan at {} => {} | {:?}",
|
|
+ i, v.max_element(), v);
|
|
+
|
|
+ // If we replace all the elements in the vector
|
|
+ // up-to the `i-th` lane with `NaN`s, the result
|
|
+ // is still always `-3.` unless all elements of
|
|
+ // the vector are `NaN`s:
|
|
+ //
|
|
+ // This is also broken:
|
|
+ for j in 0..i {
|
|
+ v = v.replace(j, n);
|
|
+ assert!(v.max_element().is_nan(),
|
|
+ // FIXME: ^^^ should be -3.
|
|
+ "[B]: nan at {} => {} | {:?}",
|
|
+ i, v.max_element(), v);
|
|
+ }
|
|
+
|
|
+ // We are done here, since we were in the last
|
|
+ // lane which is the last iteration of the loop.
|
|
+ break
|
|
+ }
|
|
+
|
|
+ // We are not in the last lane, and there is only
|
|
+ // one `NaN` in the vector.
|
|
+
|
|
+ // If the vector has one lane, the result is `NaN`:
|
|
+ if $id::lanes() == 1 {
|
|
+ assert!(v.max_element().is_nan(),
|
|
+ "[C]: all nans | v={:?} | min={} | \
|
|
+ is_nan: {}",
|
|
+ v, v.max_element(),
|
|
+ v.max_element().is_nan());
|
|
+
|
|
+ // And we are done, since the vector only has
|
|
+ // one lane anyways.
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ // The vector has more than one lane, since there is
|
|
+ // only one `NaN` in the vector, the result is
|
|
+ // always `-3`.
|
|
+ assert_eq!(v.max_element(), -3.,
|
|
+ "[D]: nan at {} => {} | {:?}",
|
|
+ i, v.max_element(), v);
|
|
+
|
|
+ // If we replace all the elements in the vector
|
|
+ // up-to the `i-th` lane with `NaN`s, the result is
|
|
+ // still always `-3.` unless all elements of the
|
|
+ // vector are `NaN`s:
|
|
+ for j in 0..i {
|
|
+ v = v.replace(j, n);
|
|
+
|
|
+ if i == $id::lanes() - 1 && j == i - 1 {
|
|
+ // All elements of the vector are `NaN`s,
|
|
+ // therefore the result is NaN as well.
|
|
+ //
|
|
+ // Note: the #lanes of the vector is > 1, so
|
|
+ // "i - 1" does not overflow.
|
|
+ assert!(v.max_element().is_nan(),
|
|
+ "[E]: all nans | v={:?} | max={} | \
|
|
+ is_nan: {}",
|
|
+ v, v.max_element(),
|
|
+ v.max_element().is_nan());
|
|
+ } else {
|
|
+ // There are non-`NaN` elements in the
|
|
+ // vector, therefore the result is `-3.`:
|
|
+ assert_eq!(v.max_element(), -3.,
|
|
+ "[F]: nan at {} => {} | {:?}",
|
|
+ i, v.max_element(), v);
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ // If the vector contains all NaNs the result is NaN:
|
|
+ assert!($id::splat(n).max_element().is_nan(),
|
|
+ "all nans | v={:?} | max={} | is_nan: {}",
|
|
+ $id::splat(n), $id::splat(n).max_element(),
|
|
+ $id::splat(n).max_element().is_nan());
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+}
|
|
diff --git a/third_party/rust/packed_simd/src/api/select.rs b/third_party/rust/packed_simd/src/api/select.rs
|
|
new file mode 100644
|
|
index 000000000000..24525df56c73
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/api/select.rs
|
|
@@ -0,0 +1,75 @@
|
|
+//! Implements mask's `select`.
|
|
+
|
|
+/// Implements mask select method
|
|
+macro_rules! impl_select {
|
|
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
|
|
+ impl $id {
|
|
+ /// Selects elements of `a` and `b` using mask.
|
|
+ ///
|
|
+ /// The lanes of the result for which the mask is `true` contain
|
|
+ /// the values of `a`. The remaining lanes contain the values of
|
|
+ /// `b`.
|
|
+ #[inline]
|
|
+ pub fn select<T>(self, a: Simd<T>, b: Simd<T>) -> Simd<T>
|
|
+ where
|
|
+ T: sealed::SimdArray<
|
|
+ NT = <[$elem_ty; $elem_count] as sealed::SimdArray>::NT,
|
|
+ >,
|
|
+ {
|
|
+ use crate::llvm::simd_select;
|
|
+ Simd(unsafe { simd_select(self.0, a.0, b.0) })
|
|
+ }
|
|
+ }
|
|
+
|
|
+ test_select!(bool, $id, $id, (false, true) | $test_tt);
|
|
+ };
|
|
+}
|
|
+
|
|
+macro_rules! test_select {
|
|
+ (
|
|
+ $elem_ty:ident,
|
|
+ $mask_ty:ident,
|
|
+ $vec_ty:ident,($small:expr, $large:expr) |
|
|
+ $test_tt:tt
|
|
+ ) => {
|
|
+ test_if! {
|
|
+ $test_tt:
|
|
+ paste::item! {
|
|
+ pub mod [<$vec_ty _select>] {
|
|
+ use super::*;
|
|
+
|
|
+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ fn select() {
|
|
+ let o = $small as $elem_ty;
|
|
+ let t = $large as $elem_ty;
|
|
+
|
|
+ let a = $vec_ty::splat(o);
|
|
+ let b = $vec_ty::splat(t);
|
|
+ let m = a.lt(b);
|
|
+ assert_eq!(m.select(a, b), a);
|
|
+
|
|
+ let m = b.lt(a);
|
|
+ assert_eq!(m.select(b, a), a);
|
|
+
|
|
+ let mut c = a;
|
|
+ let mut d = b;
|
|
+ let mut m_e = $mask_ty::splat(false);
|
|
+ for i in 0..$vec_ty::lanes() {
|
|
+ if i % 2 == 0 {
|
|
+ let c_tmp = c.extract(i);
|
|
+ c = c.replace(i, d.extract(i));
|
|
+ d = d.replace(i, c_tmp);
|
|
+ } else {
|
|
+ m_e = m_e.replace(i, true);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ let m = c.lt(d);
|
|
+ assert_eq!(m_e, m);
|
|
+ assert_eq!(m.select(c, d), a);
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+}
|
|
diff --git a/third_party/rust/packed_simd/src/api/shuffle.rs b/third_party/rust/packed_simd/src/api/shuffle.rs
|
|
new file mode 100644
|
|
index 000000000000..13a7fae5fcee
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/api/shuffle.rs
|
|
@@ -0,0 +1,190 @@
|
|
+//! Implements portable vector shuffles with immediate indices.
|
|
+
|
|
+// FIXME: comprehensive tests
|
|
+// https://github.com/rust-lang-nursery/packed_simd/issues/20
|
|
+
|
|
+/// Shuffles vector elements.
|
|
+///
|
|
+/// This macro returns a new vector that contains a shuffle of the elements in
|
|
+/// one (`shuffle!(vec, [indices...])`) or two (`shuffle!(vec0, vec1,
|
|
+/// [indices...])`) input vectors.
|
|
+///
|
|
+/// The type of `vec0` and `vec1` must be equal, and the element type of the
|
|
+/// resulting vector is the element type of the input vector.
|
|
+///
|
|
+/// The number of `indices` must be a power-of-two in range `[0, 64)`, since
|
|
+/// currently, the largest vector supported by the library has 64 lanes. The
|
|
+/// length of the resulting vector equals the number of indices provided.
|
|
+///
|
|
+/// The indices must be in range `[0, M * N)` where `M` is the number of input
|
|
+/// vectors (`1` or `2`) and `N` is the number of lanes of the input vectors.
|
|
+/// The indices `i` in range `[0, N)` refer to the `i`-th element of `vec0`,
|
|
+/// while the indices in range `[N, 2*N)` refer to the `i - N`-th element of
|
|
+/// `vec1`.
|
|
+///
|
|
+/// # Examples
|
|
+///
|
|
+/// Shuffling elements of two vectors:
|
|
+///
|
|
+/// ```
|
|
+/// # #[macro_use]
|
|
+/// # extern crate packed_simd;
|
|
+/// # use packed_simd::*;
|
|
+/// # fn main() {
|
|
+/// // Shuffle allows reordering the elements:
|
|
+/// let x = i32x4::new(1, 2, 3, 4);
|
|
+/// let y = i32x4::new(5, 6, 7, 8);
|
|
+/// let r = shuffle!(x, y, [4, 0, 5, 1]);
|
|
+/// assert_eq!(r, i32x4::new(5, 1, 6, 2));
|
|
+///
|
|
+/// // The resulting vector can als be smaller than the input:
|
|
+/// let r = shuffle!(x, y, [1, 6]);
|
|
+/// assert_eq!(r, i32x2::new(2, 7));
|
|
+///
|
|
+/// // Or larger:
|
|
+/// let r = shuffle!(x, y, [1, 3, 4, 2, 1, 7, 2, 2]);
|
|
+/// assert_eq!(r, i32x8::new(2, 4, 5, 3, 2, 8, 3, 3));
|
|
+/// // At most 2 * the number of lanes in the input vector.
|
|
+/// # }
|
|
+/// ```
|
|
+///
|
|
+/// Shuffling elements of one vector:
|
|
+///
|
|
+/// ```
|
|
+/// # #[macro_use]
|
|
+/// # extern crate packed_simd;
|
|
+/// # use packed_simd::*;
|
|
+/// # fn main() {
|
|
+/// // Shuffle allows reordering the elements of a vector:
|
|
+/// let x = i32x4::new(1, 2, 3, 4);
|
|
+/// let r = shuffle!(x, [2, 1, 3, 0]);
|
|
+/// assert_eq!(r, i32x4::new(3, 2, 4, 1));
|
|
+///
|
|
+/// // The resulting vector can be smaller than the input:
|
|
+/// let r = shuffle!(x, [1, 3]);
|
|
+/// assert_eq!(r, i32x2::new(2, 4));
|
|
+///
|
|
+/// // Equal:
|
|
+/// let r = shuffle!(x, [1, 3, 2, 0]);
|
|
+/// assert_eq!(r, i32x4::new(2, 4, 3, 1));
|
|
+///
|
|
+/// // Or larger:
|
|
+/// let r = shuffle!(x, [1, 3, 2, 2, 1, 3, 2, 2]);
|
|
+/// assert_eq!(r, i32x8::new(2, 4, 3, 3, 2, 4, 3, 3));
|
|
+/// // At most 2 * the number of lanes in the input vector.
|
|
+/// # }
|
|
+/// ```
|
|
+#[macro_export]
|
|
+macro_rules! shuffle {
|
|
+ ($vec0:expr, $vec1:expr, [$l0:expr, $l1:expr]) => {{
|
|
+ #[allow(unused_unsafe)]
|
|
+ unsafe {
|
|
+ $crate::Simd($crate::__shuffle_vector2(
|
|
+ $vec0.0,
|
|
+ $vec1.0,
|
|
+ [$l0, $l1],
|
|
+ ))
|
|
+ }
|
|
+ }};
|
|
+ ($vec0:expr, $vec1:expr, [$l0:expr, $l1:expr, $l2:expr, $l3:expr]) => {{
|
|
+ #[allow(unused_unsafe)]
|
|
+ unsafe {
|
|
+ $crate::Simd($crate::__shuffle_vector4(
|
|
+ $vec0.0,
|
|
+ $vec1.0,
|
|
+ [$l0, $l1, $l2, $l3],
|
|
+ ))
|
|
+ }
|
|
+ }};
|
|
+ ($vec0:expr, $vec1:expr,
|
|
+ [$l0:expr, $l1:expr, $l2:expr, $l3:expr,
|
|
+ $l4:expr, $l5:expr, $l6:expr, $l7:expr]) => {{
|
|
+ #[allow(unused_unsafe)]
|
|
+ unsafe {
|
|
+ $crate::Simd($crate::__shuffle_vector8(
|
|
+ $vec0.0,
|
|
+ $vec1.0,
|
|
+ [$l0, $l1, $l2, $l3, $l4, $l5, $l6, $l7],
|
|
+ ))
|
|
+ }
|
|
+ }};
|
|
+ ($vec0:expr, $vec1:expr,
|
|
+ [$l0:expr, $l1:expr, $l2:expr, $l3:expr,
|
|
+ $l4:expr, $l5:expr, $l6:expr, $l7:expr,
|
|
+ $l8:expr, $l9:expr, $l10:expr, $l11:expr,
|
|
+ $l12:expr, $l13:expr, $l14:expr, $l15:expr]) => {{
|
|
+ #[allow(unused_unsafe)]
|
|
+ unsafe {
|
|
+ $crate::Simd($crate::__shuffle_vector16(
|
|
+ $vec0.0,
|
|
+ $vec1.0,
|
|
+ [
|
|
+ $l0, $l1, $l2, $l3, $l4, $l5, $l6, $l7, $l8, $l9, $l10,
|
|
+ $l11, $l12, $l13, $l14, $l15,
|
|
+ ],
|
|
+ ))
|
|
+ }
|
|
+ }};
|
|
+ ($vec0:expr, $vec1:expr,
|
|
+ [$l0:expr, $l1:expr, $l2:expr, $l3:expr,
|
|
+ $l4:expr, $l5:expr, $l6:expr, $l7:expr,
|
|
+ $l8:expr, $l9:expr, $l10:expr, $l11:expr,
|
|
+ $l12:expr, $l13:expr, $l14:expr, $l15:expr,
|
|
+ $l16:expr, $l17:expr, $l18:expr, $l19:expr,
|
|
+ $l20:expr, $l21:expr, $l22:expr, $l23:expr,
|
|
+ $l24:expr, $l25:expr, $l26:expr, $l27:expr,
|
|
+ $l28:expr, $l29:expr, $l30:expr, $l31:expr]) => {{
|
|
+ #[allow(unused_unsafe)]
|
|
+ unsafe {
|
|
+ $crate::Simd($crate::__shuffle_vector32(
|
|
+ $vec0.0,
|
|
+ $vec1.0,
|
|
+ [
|
|
+ $l0, $l1, $l2, $l3, $l4, $l5, $l6, $l7, $l8, $l9, $l10,
|
|
+ $l11, $l12, $l13, $l14, $l15, $l16, $l17, $l18, $l19,
|
|
+ $l20, $l21, $l22, $l23, $l24, $l25, $l26, $l27, $l28,
|
|
+ $l29, $l30, $l31,
|
|
+ ],
|
|
+ ))
|
|
+ }
|
|
+ }};
|
|
+ ($vec0:expr, $vec1:expr,
|
|
+ [$l0:expr, $l1:expr, $l2:expr, $l3:expr,
|
|
+ $l4:expr, $l5:expr, $l6:expr, $l7:expr,
|
|
+ $l8:expr, $l9:expr, $l10:expr, $l11:expr,
|
|
+ $l12:expr, $l13:expr, $l14:expr, $l15:expr,
|
|
+ $l16:expr, $l17:expr, $l18:expr, $l19:expr,
|
|
+ $l20:expr, $l21:expr, $l22:expr, $l23:expr,
|
|
+ $l24:expr, $l25:expr, $l26:expr, $l27:expr,
|
|
+ $l28:expr, $l29:expr, $l30:expr, $l31:expr,
|
|
+ $l32:expr, $l33:expr, $l34:expr, $l35:expr,
|
|
+ $l36:expr, $l37:expr, $l38:expr, $l39:expr,
|
|
+ $l40:expr, $l41:expr, $l42:expr, $l43:expr,
|
|
+ $l44:expr, $l45:expr, $l46:expr, $l47:expr,
|
|
+ $l48:expr, $l49:expr, $l50:expr, $l51:expr,
|
|
+ $l52:expr, $l53:expr, $l54:expr, $l55:expr,
|
|
+ $l56:expr, $l57:expr, $l58:expr, $l59:expr,
|
|
+ $l60:expr, $l61:expr, $l62:expr, $l63:expr]) => {{
|
|
+ #[allow(unused_unsafe)]
|
|
+ unsafe {
|
|
+ $crate::Simd($crate::__shuffle_vector64(
|
|
+ $vec0.0,
|
|
+ $vec1.0,
|
|
+ [
|
|
+ $l0, $l1, $l2, $l3, $l4, $l5, $l6, $l7, $l8, $l9, $l10,
|
|
+ $l11, $l12, $l13, $l14, $l15, $l16, $l17, $l18, $l19,
|
|
+ $l20, $l21, $l22, $l23, $l24, $l25, $l26, $l27, $l28,
|
|
+ $l29, $l30, $l31, $l32, $l33, $l34, $l35, $l36, $l37,
|
|
+ $l38, $l39, $l40, $l41, $l42, $l43, $l44, $l45, $l46,
|
|
+ $l47, $l48, $l49, $l50, $l51, $l52, $l53, $l54, $l55,
|
|
+ $l56, $l57, $l58, $l59, $l60, $l61, $l62, $l63,
|
|
+ ],
|
|
+ ))
|
|
+ }
|
|
+ }};
|
|
+ ($vec:expr, [$($l:expr),*]) => {
|
|
+ match $vec {
|
|
+ v => shuffle!(v, v, [$($l),*])
|
|
+ }
|
|
+ };
|
|
+}
|
|
diff --git a/third_party/rust/packed_simd/src/api/shuffle1_dyn.rs b/third_party/rust/packed_simd/src/api/shuffle1_dyn.rs
|
|
new file mode 100644
|
|
index 000000000000..64536be6cba1
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/api/shuffle1_dyn.rs
|
|
@@ -0,0 +1,159 @@
|
|
+//! Shuffle vector elements according to a dynamic vector of indices.
|
|
+
|
|
+macro_rules! impl_shuffle1_dyn {
|
|
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
|
|
+ impl $id {
|
|
+ /// Shuffle vector elements according to `indices`.
|
|
+ #[inline]
|
|
+ pub fn shuffle1_dyn<I>(self, indices: I) -> Self
|
|
+ where
|
|
+ Self: codegen::shuffle1_dyn::Shuffle1Dyn<Indices = I>,
|
|
+ {
|
|
+ codegen::shuffle1_dyn::Shuffle1Dyn::shuffle1_dyn(self, indices)
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+}
|
|
+
|
|
+macro_rules! test_shuffle1_dyn {
|
|
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
|
|
+ test_if! {
|
|
+ $test_tt:
|
|
+ paste::item! {
|
|
+ pub mod [<$id _shuffle1_dyn>] {
|
|
+ use super::*;
|
|
+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ fn shuffle1_dyn() {
|
|
+ let increasing = {
|
|
+ let mut v = $id::splat(0 as $elem_ty);
|
|
+ for i in 0..$id::lanes() {
|
|
+ v = v.replace(i, i as $elem_ty);
|
|
+ }
|
|
+ v
|
|
+ };
|
|
+ let decreasing = {
|
|
+ let mut v = $id::splat(0 as $elem_ty);
|
|
+ for i in 0..$id::lanes() {
|
|
+ v = v.replace(
|
|
+ i,
|
|
+ ($id::lanes() - 1 - i) as $elem_ty
|
|
+ );
|
|
+ }
|
|
+ v
|
|
+ };
|
|
+
|
|
+ type Indices = <
|
|
+ $id as codegen::shuffle1_dyn::Shuffle1Dyn
|
|
+ >::Indices;
|
|
+ let increasing_ids: Indices = increasing.cast();
|
|
+ let decreasing_ids: Indices = decreasing.cast();
|
|
+
|
|
+ assert_eq!(
|
|
+ increasing.shuffle1_dyn(increasing_ids),
|
|
+ increasing,
|
|
+ "(i,i)=>i"
|
|
+ );
|
|
+ assert_eq!(
|
|
+ decreasing.shuffle1_dyn(increasing_ids),
|
|
+ decreasing,
|
|
+ "(d,i)=>d"
|
|
+ );
|
|
+ assert_eq!(
|
|
+ increasing.shuffle1_dyn(decreasing_ids),
|
|
+ decreasing,
|
|
+ "(i,d)=>d"
|
|
+ );
|
|
+ assert_eq!(
|
|
+ decreasing.shuffle1_dyn(decreasing_ids),
|
|
+ increasing,
|
|
+ "(d,d)=>i"
|
|
+ );
|
|
+
|
|
+ for i in 0..$id::lanes() {
|
|
+ let v_ids: Indices
|
|
+ = $id::splat(i as $elem_ty).cast();
|
|
+ assert_eq!(increasing.shuffle1_dyn(v_ids),
|
|
+ $id::splat(increasing.extract(i))
|
|
+ );
|
|
+ assert_eq!(decreasing.shuffle1_dyn(v_ids),
|
|
+ $id::splat(decreasing.extract(i))
|
|
+ );
|
|
+ assert_eq!(
|
|
+ $id::splat(i as $elem_ty)
|
|
+ .shuffle1_dyn(increasing_ids),
|
|
+ $id::splat(i as $elem_ty)
|
|
+ );
|
|
+ assert_eq!(
|
|
+ $id::splat(i as $elem_ty)
|
|
+ .shuffle1_dyn(decreasing_ids),
|
|
+ $id::splat(i as $elem_ty)
|
|
+ );
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+}
|
|
+
|
|
+macro_rules! test_shuffle1_dyn_mask {
|
|
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
|
|
+ test_if! {
|
|
+ $test_tt:
|
|
+ paste::item! {
|
|
+ pub mod [<$id _shuffle1_dyn>] {
|
|
+ use super::*;
|
|
+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ fn shuffle1_dyn() {
|
|
+ // alternating = [true, false, true, false, ...]
|
|
+ let mut alternating = $id::splat(false);
|
|
+ for i in 0..$id::lanes() {
|
|
+ if i % 2 == 0 {
|
|
+ alternating = alternating.replace(i, true);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ type Indices = <
|
|
+ $id as codegen::shuffle1_dyn::Shuffle1Dyn
|
|
+ >::Indices;
|
|
+ // even = [0, 0, 2, 2, 4, 4, ..]
|
|
+ let even = {
|
|
+ let mut v = Indices::splat(0);
|
|
+ for i in 0..$id::lanes() {
|
|
+ if i % 2 == 0 {
|
|
+ v = v.replace(i, (i as u8).into());
|
|
+ } else {
|
|
+ v = v.replace(i, (i as u8 - 1).into());
|
|
+ }
|
|
+ }
|
|
+ v
|
|
+ };
|
|
+ // odd = [1, 1, 3, 3, 5, 5, ...]
|
|
+ let odd = {
|
|
+ let mut v = Indices::splat(0);
|
|
+ for i in 0..$id::lanes() {
|
|
+ if i % 2 != 0 {
|
|
+ v = v.replace(i, (i as u8).into());
|
|
+ } else {
|
|
+ v = v.replace(i, (i as u8 + 1).into());
|
|
+ }
|
|
+ }
|
|
+ v
|
|
+ };
|
|
+
|
|
+ assert_eq!(
|
|
+ alternating.shuffle1_dyn(even),
|
|
+ $id::splat(true)
|
|
+ );
|
|
+ if $id::lanes() > 1 {
|
|
+ assert_eq!(
|
|
+ alternating.shuffle1_dyn(odd),
|
|
+ $id::splat(false)
|
|
+ );
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+}
|
|
diff --git a/third_party/rust/packed_simd/src/api/slice.rs b/third_party/rust/packed_simd/src/api/slice.rs
|
|
new file mode 100644
|
|
index 000000000000..526b848b5c06
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/api/slice.rs
|
|
@@ -0,0 +1,7 @@
|
|
+//! Slice from/to methods
|
|
+
|
|
+#[macro_use]
|
|
+mod from_slice;
|
|
+
|
|
+#[macro_use]
|
|
+mod write_to_slice;
|
|
diff --git a/third_party/rust/packed_simd/src/api/slice/from_slice.rs b/third_party/rust/packed_simd/src/api/slice/from_slice.rs
|
|
new file mode 100644
|
|
index 000000000000..109cd1f10b01
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/api/slice/from_slice.rs
|
|
@@ -0,0 +1,216 @@
|
|
+//! Implements methods to read a vector type from a slice.
|
|
+
|
|
+macro_rules! impl_slice_from_slice {
|
|
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
|
|
+ impl $id {
|
|
+ /// Instantiates a new vector with the values of the `slice`.
|
|
+ ///
|
|
+ /// # Panics
|
|
+ ///
|
|
+ /// If `slice.len() < Self::lanes()` or `&slice[0]` is not aligned
|
|
+ /// to an `align_of::<Self>()` boundary.
|
|
+ #[inline]
|
|
+ pub fn from_slice_aligned(slice: &[$elem_ty]) -> Self {
|
|
+ unsafe {
|
|
+ assert!(slice.len() >= $elem_count);
|
|
+ let target_ptr = slice.get_unchecked(0) as *const $elem_ty;
|
|
+ assert_eq!(
|
|
+ target_ptr
|
|
+ .align_offset(crate::mem::align_of::<Self>()),
|
|
+ 0
|
|
+ );
|
|
+ Self::from_slice_aligned_unchecked(slice)
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /// Instantiates a new vector with the values of the `slice`.
|
|
+ ///
|
|
+ /// # Panics
|
|
+ ///
|
|
+ /// If `slice.len() < Self::lanes()`.
|
|
+ #[inline]
|
|
+ pub fn from_slice_unaligned(slice: &[$elem_ty]) -> Self {
|
|
+ unsafe {
|
|
+ assert!(slice.len() >= $elem_count);
|
|
+ Self::from_slice_unaligned_unchecked(slice)
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /// Instantiates a new vector with the values of the `slice`.
|
|
+ ///
|
|
+ /// # Precondition
|
|
+ ///
|
|
+ /// If `slice.len() < Self::lanes()` or `&slice[0]` is not aligned
|
|
+ /// to an `align_of::<Self>()` boundary, the behavior is undefined.
|
|
+ #[inline]
|
|
+ pub unsafe fn from_slice_aligned_unchecked(
|
|
+ slice: &[$elem_ty],
|
|
+ ) -> Self {
|
|
+ debug_assert!(slice.len() >= $elem_count);
|
|
+ let target_ptr = slice.get_unchecked(0) as *const $elem_ty;
|
|
+ debug_assert_eq!(
|
|
+ target_ptr.align_offset(crate::mem::align_of::<Self>()),
|
|
+ 0
|
|
+ );
|
|
+
|
|
+ #[allow(clippy::cast_ptr_alignment)]
|
|
+ *(target_ptr as *const Self)
|
|
+ }
|
|
+
|
|
+ /// Instantiates a new vector with the values of the `slice`.
|
|
+ ///
|
|
+ /// # Precondition
|
|
+ ///
|
|
+ /// If `slice.len() < Self::lanes()` the behavior is undefined.
|
|
+ #[inline]
|
|
+ pub unsafe fn from_slice_unaligned_unchecked(
|
|
+ slice: &[$elem_ty],
|
|
+ ) -> Self {
|
|
+ use crate::mem::size_of;
|
|
+ debug_assert!(slice.len() >= $elem_count);
|
|
+ let target_ptr =
|
|
+ slice.get_unchecked(0) as *const $elem_ty as *const u8;
|
|
+ let mut x = Self::splat(0 as $elem_ty);
|
|
+ let self_ptr = &mut x as *mut Self as *mut u8;
|
|
+ crate::ptr::copy_nonoverlapping(
|
|
+ target_ptr,
|
|
+ self_ptr,
|
|
+ size_of::<Self>(),
|
|
+ );
|
|
+ x
|
|
+ }
|
|
+ }
|
|
+
|
|
+ test_if! {
|
|
+ $test_tt:
|
|
+ paste::item! {
|
|
+ pub mod [<$id _slice_from_slice>] {
|
|
+ use super::*;
|
|
+ use crate::iter::Iterator;
|
|
+
|
|
+ #[cfg_attr(not(target_arch = "wasm32"), test)]
|
|
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ fn from_slice_unaligned() {
|
|
+ let mut unaligned = [42 as $elem_ty; $id::lanes() + 1];
|
|
+ unaligned[0] = 0 as $elem_ty;
|
|
+ let vec = $id::from_slice_unaligned(&unaligned[1..]);
|
|
+ for (index, &b) in unaligned.iter().enumerate() {
|
|
+ if index == 0 {
|
|
+ assert_eq!(b, 0 as $elem_ty);
|
|
+ } else {
|
|
+ assert_eq!(b, 42 as $elem_ty);
|
|
+ assert_eq!(b, vec.extract(index - 1));
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ // FIXME: wasm-bindgen-test does not support #[should_panic]
|
|
+ // #[cfg_attr(not(target_arch = "wasm32"), test)]
|
|
+ // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ #[cfg(not(target_arch = "wasm32"))]
|
|
+ #[test]
|
|
+ #[should_panic]
|
|
+ fn from_slice_unaligned_fail() {
|
|
+ let mut unaligned = [42 as $elem_ty; $id::lanes() + 1];
|
|
+ unaligned[0] = 0 as $elem_ty;
|
|
+ // the slice is not large enough => panic
|
|
+ let _vec = $id::from_slice_unaligned(&unaligned[2..]);
|
|
+ }
|
|
+
|
|
+ union A {
|
|
+ data: [$elem_ty; 2 * $id::lanes()],
|
|
+ _vec: $id,
|
|
+ }
|
|
+
|
|
+ #[cfg_attr(not(target_arch = "wasm32"), test)]
|
|
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ fn from_slice_aligned() {
|
|
+ let mut aligned = A {
|
|
+ data: [0 as $elem_ty; 2 * $id::lanes()],
|
|
+ };
|
|
+ for i in $id::lanes()..(2 * $id::lanes()) {
|
|
+ unsafe {
|
|
+ aligned.data[i] = 42 as $elem_ty;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ let vec = unsafe {
|
|
+ $id::from_slice_aligned(
|
|
+ &aligned.data[$id::lanes()..]
|
|
+ )
|
|
+ };
|
|
+ for (index, &b) in
|
|
+ unsafe { aligned.data.iter().enumerate() } {
|
|
+ if index < $id::lanes() {
|
|
+ assert_eq!(b, 0 as $elem_ty);
|
|
+ } else {
|
|
+ assert_eq!(b, 42 as $elem_ty);
|
|
+ assert_eq!(
|
|
+ b, vec.extract(index - $id::lanes())
|
|
+ );
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ // FIXME: wasm-bindgen-test does not support #[should_panic]
|
|
+ // #[cfg_attr(not(target_arch = "wasm32"), test)]
|
|
+ // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ #[cfg(not(target_arch = "wasm32"))]
|
|
+ #[test]
|
|
+ #[should_panic]
|
|
+ fn from_slice_aligned_fail_lanes() {
|
|
+ let aligned = A {
|
|
+ data: [0 as $elem_ty; 2 * $id::lanes()],
|
|
+ };
|
|
+ let _vec = unsafe {
|
|
+ $id::from_slice_aligned(
|
|
+ &aligned.data[2 * $id::lanes()..]
|
|
+ )
|
|
+ };
|
|
+ }
|
|
+
|
|
+ // FIXME: wasm-bindgen-test does not support #[should_panic]
|
|
+ // #[cfg_attr(not(target_arch = "wasm32"), test)]
|
|
+ // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ #[cfg(not(target_arch = "wasm32"))]
|
|
+ #[test]
|
|
+ #[should_panic]
|
|
+ fn from_slice_aligned_fail_align() {
|
|
+ unsafe {
|
|
+ let aligned = A {
|
|
+ data: [0 as $elem_ty; 2 * $id::lanes()],
|
|
+ };
|
|
+
|
|
+ // get a pointer to the front of data
|
|
+ let ptr: *const $elem_ty = aligned.data.as_ptr()
|
|
+ as *const $elem_ty;
|
|
+ // offset pointer by one element
|
|
+ let ptr = ptr.wrapping_add(1);
|
|
+
|
|
+ if ptr.align_offset(
|
|
+ crate::mem::align_of::<$id>()
|
|
+ ) == 0 {
|
|
+ // the pointer is properly aligned, so
|
|
+ // from_slice_aligned won't fail here (e.g. this
|
|
+ // can happen for i128x1). So we panic to make
|
|
+ // the "should_fail" test pass:
|
|
+ panic!("ok");
|
|
+ }
|
|
+
|
|
+ // create a slice - this is safe, because the
|
|
+ // elements of the slice exist, are properly
|
|
+ // initialized, and properly aligned:
|
|
+ let s: &[$elem_ty] = slice::from_raw_parts(
|
|
+ ptr, $id::lanes()
|
|
+ );
|
|
+ // this should always panic because the slice
|
|
+ // alignment does not match the alignment
|
|
+ // requirements for the vector type:
|
|
+ let _vec = $id::from_slice_aligned(s);
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+}
|
|
diff --git a/third_party/rust/packed_simd/src/api/slice/write_to_slice.rs b/third_party/rust/packed_simd/src/api/slice/write_to_slice.rs
|
|
new file mode 100644
|
|
index 000000000000..fcb288da70fc
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/api/slice/write_to_slice.rs
|
|
@@ -0,0 +1,211 @@
|
|
+//! Implements methods to write a vector type to a slice.
|
|
+
|
|
+macro_rules! impl_slice_write_to_slice {
|
|
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
|
|
+ impl $id {
|
|
+ /// Writes the values of the vector to the `slice`.
|
|
+ ///
|
|
+ /// # Panics
|
|
+ ///
|
|
+ /// If `slice.len() < Self::lanes()` or `&slice[0]` is not
|
|
+ /// aligned to an `align_of::<Self>()` boundary.
|
|
+ #[inline]
|
|
+ pub fn write_to_slice_aligned(self, slice: &mut [$elem_ty]) {
|
|
+ unsafe {
|
|
+ assert!(slice.len() >= $elem_count);
|
|
+ let target_ptr =
|
|
+ slice.get_unchecked_mut(0) as *mut $elem_ty;
|
|
+ assert_eq!(
|
|
+ target_ptr
|
|
+ .align_offset(crate::mem::align_of::<Self>()),
|
|
+ 0
|
|
+ );
|
|
+ self.write_to_slice_aligned_unchecked(slice);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /// Writes the values of the vector to the `slice`.
|
|
+ ///
|
|
+ /// # Panics
|
|
+ ///
|
|
+ /// If `slice.len() < Self::lanes()`.
|
|
+ #[inline]
|
|
+ pub fn write_to_slice_unaligned(self, slice: &mut [$elem_ty]) {
|
|
+ unsafe {
|
|
+ assert!(slice.len() >= $elem_count);
|
|
+ self.write_to_slice_unaligned_unchecked(slice);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /// Writes the values of the vector to the `slice`.
|
|
+ ///
|
|
+ /// # Precondition
|
|
+ ///
|
|
+ /// If `slice.len() < Self::lanes()` or `&slice[0]` is not
|
|
+ /// aligned to an `align_of::<Self>()` boundary, the behavior is
|
|
+ /// undefined.
|
|
+ #[inline]
|
|
+ pub unsafe fn write_to_slice_aligned_unchecked(
|
|
+ self, slice: &mut [$elem_ty],
|
|
+ ) {
|
|
+ debug_assert!(slice.len() >= $elem_count);
|
|
+ let target_ptr = slice.get_unchecked_mut(0) as *mut $elem_ty;
|
|
+ debug_assert_eq!(
|
|
+ target_ptr.align_offset(crate::mem::align_of::<Self>()),
|
|
+ 0
|
|
+ );
|
|
+
|
|
+ #[allow(clippy::cast_ptr_alignment)]
|
|
+ #[allow(clippy::cast_ptr_alignment)]
|
|
+ #[allow(clippy::cast_ptr_alignment)]
|
|
+ #[allow(clippy::cast_ptr_alignment)]
|
|
+ *(target_ptr as *mut Self) = self;
|
|
+ }
|
|
+
|
|
+ /// Writes the values of the vector to the `slice`.
|
|
+ ///
|
|
+ /// # Precondition
|
|
+ ///
|
|
+ /// If `slice.len() < Self::lanes()` the behavior is undefined.
|
|
+ #[inline]
|
|
+ pub unsafe fn write_to_slice_unaligned_unchecked(
|
|
+ self, slice: &mut [$elem_ty],
|
|
+ ) {
|
|
+ debug_assert!(slice.len() >= $elem_count);
|
|
+ let target_ptr =
|
|
+ slice.get_unchecked_mut(0) as *mut $elem_ty as *mut u8;
|
|
+ let self_ptr = &self as *const Self as *const u8;
|
|
+ crate::ptr::copy_nonoverlapping(
|
|
+ self_ptr,
|
|
+ target_ptr,
|
|
+ crate::mem::size_of::<Self>(),
|
|
+ );
|
|
+ }
|
|
+ }
|
|
+
|
|
+ test_if! {
|
|
+ $test_tt:
|
|
+ paste::item! {
|
|
+ pub mod [<$id _slice_write_to_slice>] {
|
|
+ use super::*;
|
|
+ use crate::iter::Iterator;
|
|
+
|
|
+ #[cfg_attr(not(target_arch = "wasm32"), test)]
|
|
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ fn write_to_slice_unaligned() {
|
|
+ let mut unaligned = [0 as $elem_ty; $id::lanes() + 1];
|
|
+ let vec = $id::splat(42 as $elem_ty);
|
|
+ vec.write_to_slice_unaligned(&mut unaligned[1..]);
|
|
+ for (index, &b) in unaligned.iter().enumerate() {
|
|
+ if index == 0 {
|
|
+ assert_eq!(b, 0 as $elem_ty);
|
|
+ } else {
|
|
+ assert_eq!(b, 42 as $elem_ty);
|
|
+ assert_eq!(b, vec.extract(index - 1));
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ // FIXME: wasm-bindgen-test does not support #[should_panic]
|
|
+ // #[cfg_attr(not(target_arch = "wasm32"), test)]
|
|
+ // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ #[cfg(not(target_arch = "wasm32"))]
|
|
+ #[test]
|
|
+ #[should_panic]
|
|
+ fn write_to_slice_unaligned_fail() {
|
|
+ let mut unaligned = [0 as $elem_ty; $id::lanes() + 1];
|
|
+ let vec = $id::splat(42 as $elem_ty);
|
|
+ vec.write_to_slice_unaligned(&mut unaligned[2..]);
|
|
+ }
|
|
+
|
|
+ union A {
|
|
+ data: [$elem_ty; 2 * $id::lanes()],
|
|
+ _vec: $id,
|
|
+ }
|
|
+
|
|
+ #[cfg_attr(not(target_arch = "wasm32"), test)]
|
|
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ fn write_to_slice_aligned() {
|
|
+ let mut aligned = A {
|
|
+ data: [0 as $elem_ty; 2 * $id::lanes()],
|
|
+ };
|
|
+ let vec = $id::splat(42 as $elem_ty);
|
|
+ unsafe {
|
|
+ vec.write_to_slice_aligned(
|
|
+ &mut aligned.data[$id::lanes()..]
|
|
+ );
|
|
+ for (idx, &b) in aligned.data.iter().enumerate() {
|
|
+ if idx < $id::lanes() {
|
|
+ assert_eq!(b, 0 as $elem_ty);
|
|
+ } else {
|
|
+ assert_eq!(b, 42 as $elem_ty);
|
|
+ assert_eq!(
|
|
+ b, vec.extract(idx - $id::lanes())
|
|
+ );
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ // FIXME: wasm-bindgen-test does not support #[should_panic]
|
|
+ // #[cfg_attr(not(target_arch = "wasm32"), test)]
|
|
+ // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ #[cfg(not(target_arch = "wasm32"))]
|
|
+ #[test]
|
|
+ #[should_panic]
|
|
+ fn write_to_slice_aligned_fail_lanes() {
|
|
+ let mut aligned = A {
|
|
+ data: [0 as $elem_ty; 2 * $id::lanes()],
|
|
+ };
|
|
+ let vec = $id::splat(42 as $elem_ty);
|
|
+ unsafe {
|
|
+ vec.write_to_slice_aligned(
|
|
+ &mut aligned.data[2 * $id::lanes()..]
|
|
+ )
|
|
+ };
|
|
+ }
|
|
+
|
|
+ // FIXME: wasm-bindgen-test does not support #[should_panic]
|
|
+ // #[cfg_attr(not(target_arch = "wasm32"), test)]
|
|
+ // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ #[cfg(not(target_arch = "wasm32"))]
|
|
+ #[test]
|
|
+ #[should_panic]
|
|
+ fn write_to_slice_aligned_fail_align() {
|
|
+ unsafe {
|
|
+ let mut aligned = A {
|
|
+ data: [0 as $elem_ty; 2 * $id::lanes()],
|
|
+ };
|
|
+
|
|
+ // get a pointer to the front of data
|
|
+ let ptr: *mut $elem_ty
|
|
+ = aligned.data.as_mut_ptr() as *mut $elem_ty;
|
|
+ // offset pointer by one element
|
|
+ let ptr = ptr.wrapping_add(1);
|
|
+
|
|
+ if ptr.align_offset(crate::mem::align_of::<$id>())
|
|
+ == 0 {
|
|
+ // the pointer is properly aligned, so
|
|
+ // write_to_slice_aligned won't fail here (e.g.
|
|
+ // this can happen for i128x1). So we panic to
|
|
+ // make the "should_fail" test pass:
|
|
+ panic!("ok");
|
|
+ }
|
|
+
|
|
+ // create a slice - this is safe, because the
|
|
+ // elements of the slice exist, are properly
|
|
+ // initialized, and properly aligned:
|
|
+ let s: &mut [$elem_ty]
|
|
+ = slice::from_raw_parts_mut(ptr, $id::lanes());
|
|
+ // this should always panic because the slice
|
|
+ // alignment does not match the alignment
|
|
+ // requirements for the vector type:
|
|
+ let vec = $id::splat(42 as $elem_ty);
|
|
+ vec.write_to_slice_aligned(s);
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+}
|
|
diff --git a/third_party/rust/packed_simd/src/api/swap_bytes.rs b/third_party/rust/packed_simd/src/api/swap_bytes.rs
|
|
new file mode 100644
|
|
index 000000000000..53bba25bd311
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/api/swap_bytes.rs
|
|
@@ -0,0 +1,192 @@
|
|
+//! Horizontal swap bytes
|
|
+
|
|
+macro_rules! impl_swap_bytes {
|
|
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
|
|
+ impl $id {
|
|
+ /// Reverses the byte order of the vector.
|
|
+ #[inline]
|
|
+ pub fn swap_bytes(self) -> Self {
|
|
+ super::codegen::swap_bytes::SwapBytes::swap_bytes(self)
|
|
+ }
|
|
+
|
|
+ /// Converts self to little endian from the target's endianness.
|
|
+ ///
|
|
+ /// On little endian this is a no-op. On big endian the bytes are
|
|
+ /// swapped.
|
|
+ #[inline]
|
|
+ pub fn to_le(self) -> Self {
|
|
+ #[cfg(target_endian = "little")]
|
|
+ {
|
|
+ self
|
|
+ }
|
|
+ #[cfg(not(target_endian = "little"))]
|
|
+ {
|
|
+ self.swap_bytes()
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /// Converts self to big endian from the target's endianness.
|
|
+ ///
|
|
+ /// On big endian this is a no-op. On little endian the bytes are
|
|
+ /// swapped.
|
|
+ #[inline]
|
|
+ pub fn to_be(self) -> Self {
|
|
+ #[cfg(target_endian = "big")]
|
|
+ {
|
|
+ self
|
|
+ }
|
|
+ #[cfg(not(target_endian = "big"))]
|
|
+ {
|
|
+ self.swap_bytes()
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /// Converts a vector from little endian to the target's endianness.
|
|
+ ///
|
|
+ /// On little endian this is a no-op. On big endian the bytes are
|
|
+ /// swapped.
|
|
+ #[inline]
|
|
+ pub fn from_le(x: Self) -> Self {
|
|
+ #[cfg(target_endian = "little")]
|
|
+ {
|
|
+ x
|
|
+ }
|
|
+ #[cfg(not(target_endian = "little"))]
|
|
+ {
|
|
+ x.swap_bytes()
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /// Converts a vector from big endian to the target's endianness.
|
|
+ ///
|
|
+ /// On big endian this is a no-op. On little endian the bytes are
|
|
+ /// swapped.
|
|
+ #[inline]
|
|
+ pub fn from_be(x: Self) -> Self {
|
|
+ #[cfg(target_endian = "big")]
|
|
+ {
|
|
+ x
|
|
+ }
|
|
+ #[cfg(not(target_endian = "big"))]
|
|
+ {
|
|
+ x.swap_bytes()
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ test_if! {
|
|
+ $test_tt:
|
|
+ paste::item_with_macros! {
|
|
+ pub mod [<$id _swap_bytes>] {
|
|
+ use super::*;
|
|
+
|
|
+ const BYTES: [u8; 64] = [
|
|
+ 0, 1, 2, 3, 4, 5, 6, 7,
|
|
+ 8, 9, 10, 11, 12, 13, 14, 15,
|
|
+ 16, 17, 18, 19, 20, 21, 22, 23,
|
|
+ 24, 25, 26, 27, 28, 29, 30, 31,
|
|
+ 32, 33, 34, 35, 36, 37, 38, 39,
|
|
+ 40, 41, 42, 43, 44, 45, 46, 47,
|
|
+ 48, 49, 50, 51, 52, 53, 54, 55,
|
|
+ 56, 57, 58, 59, 60, 61, 62, 63,
|
|
+ ];
|
|
+
|
|
+ macro_rules! swap {
|
|
+ ($func: ident) => {{
|
|
+ // catch possible future >512 vectors
|
|
+ assert!(mem::size_of::<$id>() <= 64);
|
|
+
|
|
+ let mut actual = BYTES;
|
|
+ let elems: &mut [$elem_ty] = unsafe {
|
|
+ slice::from_raw_parts_mut(
|
|
+ actual.as_mut_ptr() as *mut $elem_ty,
|
|
+ $id::lanes(),
|
|
+ )
|
|
+ };
|
|
+
|
|
+ let vec = $id::from_slice_unaligned(elems);
|
|
+ $id::$func(vec).write_to_slice_unaligned(elems);
|
|
+
|
|
+ actual
|
|
+ }};
|
|
+ }
|
|
+
|
|
+ macro_rules! test_swap {
|
|
+ ($func: ident) => {{
|
|
+ let actual = swap!($func);
|
|
+ let expected =
|
|
+ BYTES.iter().rev()
|
|
+ .skip(64 - crate::mem::size_of::<$id>());
|
|
+ assert!(actual.iter().zip(expected)
|
|
+ .all(|(x, y)| x == y));
|
|
+ }};
|
|
+ }
|
|
+
|
|
+ macro_rules! test_no_swap {
|
|
+ ($func: ident) => {{
|
|
+ let actual = swap!($func);
|
|
+ let expected = BYTES.iter()
|
|
+ .take(mem::size_of::<$id>());
|
|
+
|
|
+ assert!(actual.iter().zip(expected)
|
|
+ .all(|(x, y)| x == y));
|
|
+ }};
|
|
+ }
|
|
+
|
|
+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ fn swap_bytes() {
|
|
+ test_swap!(swap_bytes);
|
|
+ }
|
|
+
|
|
+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ fn to_le() {
|
|
+ #[cfg(target_endian = "little")]
|
|
+ {
|
|
+ test_no_swap!(to_le);
|
|
+ }
|
|
+ #[cfg(not(target_endian = "little"))]
|
|
+ {
|
|
+ test_swap!(to_le);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ fn to_be() {
|
|
+ #[cfg(target_endian = "big")]
|
|
+ {
|
|
+ test_no_swap!(to_be);
|
|
+ }
|
|
+ #[cfg(not(target_endian = "big"))]
|
|
+ {
|
|
+ test_swap!(to_be);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ fn from_le() {
|
|
+ #[cfg(target_endian = "little")]
|
|
+ {
|
|
+ test_no_swap!(from_le);
|
|
+ }
|
|
+ #[cfg(not(target_endian = "little"))]
|
|
+ {
|
|
+ test_swap!(from_le);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+ fn from_be() {
|
|
+ #[cfg(target_endian = "big")]
|
|
+ {
|
|
+ test_no_swap!(from_be);
|
|
+ }
|
|
+ #[cfg(not(target_endian = "big"))]
|
|
+ {
|
|
+ test_swap!(from_be);
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+}
|
|
diff --git a/third_party/rust/packed_simd/src/codegen.rs b/third_party/rust/packed_simd/src/codegen.rs
|
|
new file mode 100644
|
|
index 000000000000..b7ccd838603f
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/codegen.rs
|
|
@@ -0,0 +1,59 @@
|
|
+//! Code-generation utilities
|
|
+
|
|
+crate mod bit_manip;
|
|
+crate mod llvm;
|
|
+crate mod math;
|
|
+crate mod reductions;
|
|
+crate mod shuffle;
|
|
+crate mod shuffle1_dyn;
|
|
+crate mod swap_bytes;
|
|
+
|
|
+macro_rules! impl_simd_array {
|
|
+ ([$elem_ty:ident; $elem_count:expr]:
|
|
+ $tuple_id:ident | $($elem_tys:ident),*) => {
|
|
+ #[derive(Copy, Clone)]
|
|
+ #[repr(simd)]
|
|
+ pub struct $tuple_id($(crate $elem_tys),*);
|
|
+ //^^^^^^^ leaked through SimdArray
|
|
+
|
|
+ impl crate::sealed::SimdArray for [$elem_ty; $elem_count] {
|
|
+ type Tuple = $tuple_id;
|
|
+ type T = $elem_ty;
|
|
+ const N: usize = $elem_count;
|
|
+ type NT = [u32; $elem_count];
|
|
+ }
|
|
+
|
|
+ impl crate::sealed::Simd for $tuple_id {
|
|
+ type Element = $elem_ty;
|
|
+ const LANES: usize = $elem_count;
|
|
+ type LanesType = [u32; $elem_count];
|
|
+ }
|
|
+
|
|
+ }
|
|
+}
|
|
+
|
|
+crate mod pointer_sized_int;
|
|
+
|
|
+crate mod v16;
|
|
+crate use self::v16::*;
|
|
+
|
|
+crate mod v32;
|
|
+crate use self::v32::*;
|
|
+
|
|
+crate mod v64;
|
|
+crate use self::v64::*;
|
|
+
|
|
+crate mod v128;
|
|
+crate use self::v128::*;
|
|
+
|
|
+crate mod v256;
|
|
+crate use self::v256::*;
|
|
+
|
|
+crate mod v512;
|
|
+crate use self::v512::*;
|
|
+
|
|
+crate mod vSize;
|
|
+crate use self::vSize::*;
|
|
+
|
|
+crate mod vPtr;
|
|
+crate use self::vPtr::*;
|
|
diff --git a/third_party/rust/packed_simd/src/codegen/bit_manip.rs b/third_party/rust/packed_simd/src/codegen/bit_manip.rs
|
|
new file mode 100644
|
|
index 000000000000..947266f5bce8
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/codegen/bit_manip.rs
|
|
@@ -0,0 +1,354 @@
|
|
+//! LLVM bit manipulation intrinsics.
|
|
+#![rustfmt::skip]
|
|
+
|
|
+use crate::*;
|
|
+
|
|
+#[allow(improper_ctypes, dead_code)]
|
|
+extern "C" {
|
|
+ #[link_name = "llvm.ctlz.v2i8"]
|
|
+ fn ctlz_u8x2(x: u8x2, is_zero_undef: bool) -> u8x2;
|
|
+ #[link_name = "llvm.ctlz.v4i8"]
|
|
+ fn ctlz_u8x4(x: u8x4, is_zero_undef: bool) -> u8x4;
|
|
+ #[link_name = "llvm.ctlz.v8i8"]
|
|
+ fn ctlz_u8x8(x: u8x8, is_zero_undef: bool) -> u8x8;
|
|
+ #[link_name = "llvm.ctlz.v16i8"]
|
|
+ fn ctlz_u8x16(x: u8x16, is_zero_undef: bool) -> u8x16;
|
|
+ #[link_name = "llvm.ctlz.v32i8"]
|
|
+ fn ctlz_u8x32(x: u8x32, is_zero_undef: bool) -> u8x32;
|
|
+ #[link_name = "llvm.ctlz.v64i8"]
|
|
+ fn ctlz_u8x64(x: u8x64, is_zero_undef: bool) -> u8x64;
|
|
+
|
|
+ #[link_name = "llvm.ctlz.v2i16"]
|
|
+ fn ctlz_u16x2(x: u16x2, is_zero_undef: bool) -> u16x2;
|
|
+ #[link_name = "llvm.ctlz.v4i16"]
|
|
+ fn ctlz_u16x4(x: u16x4, is_zero_undef: bool) -> u16x4;
|
|
+ #[link_name = "llvm.ctlz.v8i16"]
|
|
+ fn ctlz_u16x8(x: u16x8, is_zero_undef: bool) -> u16x8;
|
|
+ #[link_name = "llvm.ctlz.v16i16"]
|
|
+ fn ctlz_u16x16(x: u16x16, is_zero_undef: bool) -> u16x16;
|
|
+ #[link_name = "llvm.ctlz.v32i16"]
|
|
+ fn ctlz_u16x32(x: u16x32, is_zero_undef: bool) -> u16x32;
|
|
+
|
|
+ #[link_name = "llvm.ctlz.v2i32"]
|
|
+ fn ctlz_u32x2(x: u32x2, is_zero_undef: bool) -> u32x2;
|
|
+ #[link_name = "llvm.ctlz.v4i32"]
|
|
+ fn ctlz_u32x4(x: u32x4, is_zero_undef: bool) -> u32x4;
|
|
+ #[link_name = "llvm.ctlz.v8i32"]
|
|
+ fn ctlz_u32x8(x: u32x8, is_zero_undef: bool) -> u32x8;
|
|
+ #[link_name = "llvm.ctlz.v16i32"]
|
|
+ fn ctlz_u32x16(x: u32x16, is_zero_undef: bool) -> u32x16;
|
|
+
|
|
+ #[link_name = "llvm.ctlz.v2i64"]
|
|
+ fn ctlz_u64x2(x: u64x2, is_zero_undef: bool) -> u64x2;
|
|
+ #[link_name = "llvm.ctlz.v4i64"]
|
|
+ fn ctlz_u64x4(x: u64x4, is_zero_undef: bool) -> u64x4;
|
|
+ #[link_name = "llvm.ctlz.v8i64"]
|
|
+ fn ctlz_u64x8(x: u64x8, is_zero_undef: bool) -> u64x8;
|
|
+
|
|
+ #[link_name = "llvm.ctlz.v1i128"]
|
|
+ fn ctlz_u128x1(x: u128x1, is_zero_undef: bool) -> u128x1;
|
|
+ #[link_name = "llvm.ctlz.v2i128"]
|
|
+ fn ctlz_u128x2(x: u128x2, is_zero_undef: bool) -> u128x2;
|
|
+ #[link_name = "llvm.ctlz.v4i128"]
|
|
+ fn ctlz_u128x4(x: u128x4, is_zero_undef: bool) -> u128x4;
|
|
+
|
|
+ #[link_name = "llvm.cttz.v2i8"]
|
|
+ fn cttz_u8x2(x: u8x2, is_zero_undef: bool) -> u8x2;
|
|
+ #[link_name = "llvm.cttz.v4i8"]
|
|
+ fn cttz_u8x4(x: u8x4, is_zero_undef: bool) -> u8x4;
|
|
+ #[link_name = "llvm.cttz.v8i8"]
|
|
+ fn cttz_u8x8(x: u8x8, is_zero_undef: bool) -> u8x8;
|
|
+ #[link_name = "llvm.cttz.v16i8"]
|
|
+ fn cttz_u8x16(x: u8x16, is_zero_undef: bool) -> u8x16;
|
|
+ #[link_name = "llvm.cttz.v32i8"]
|
|
+ fn cttz_u8x32(x: u8x32, is_zero_undef: bool) -> u8x32;
|
|
+ #[link_name = "llvm.cttz.v64i8"]
|
|
+ fn cttz_u8x64(x: u8x64, is_zero_undef: bool) -> u8x64;
|
|
+
|
|
+ #[link_name = "llvm.cttz.v2i16"]
|
|
+ fn cttz_u16x2(x: u16x2, is_zero_undef: bool) -> u16x2;
|
|
+ #[link_name = "llvm.cttz.v4i16"]
|
|
+ fn cttz_u16x4(x: u16x4, is_zero_undef: bool) -> u16x4;
|
|
+ #[link_name = "llvm.cttz.v8i16"]
|
|
+ fn cttz_u16x8(x: u16x8, is_zero_undef: bool) -> u16x8;
|
|
+ #[link_name = "llvm.cttz.v16i16"]
|
|
+ fn cttz_u16x16(x: u16x16, is_zero_undef: bool) -> u16x16;
|
|
+ #[link_name = "llvm.cttz.v32i16"]
|
|
+ fn cttz_u16x32(x: u16x32, is_zero_undef: bool) -> u16x32;
|
|
+
|
|
+ #[link_name = "llvm.cttz.v2i32"]
|
|
+ fn cttz_u32x2(x: u32x2, is_zero_undef: bool) -> u32x2;
|
|
+ #[link_name = "llvm.cttz.v4i32"]
|
|
+ fn cttz_u32x4(x: u32x4, is_zero_undef: bool) -> u32x4;
|
|
+ #[link_name = "llvm.cttz.v8i32"]
|
|
+ fn cttz_u32x8(x: u32x8, is_zero_undef: bool) -> u32x8;
|
|
+ #[link_name = "llvm.cttz.v16i32"]
|
|
+ fn cttz_u32x16(x: u32x16, is_zero_undef: bool) -> u32x16;
|
|
+
|
|
+ #[link_name = "llvm.cttz.v2i64"]
|
|
+ fn cttz_u64x2(x: u64x2, is_zero_undef: bool) -> u64x2;
|
|
+ #[link_name = "llvm.cttz.v4i64"]
|
|
+ fn cttz_u64x4(x: u64x4, is_zero_undef: bool) -> u64x4;
|
|
+ #[link_name = "llvm.cttz.v8i64"]
|
|
+ fn cttz_u64x8(x: u64x8, is_zero_undef: bool) -> u64x8;
|
|
+
|
|
+ #[link_name = "llvm.cttz.v1i128"]
|
|
+ fn cttz_u128x1(x: u128x1, is_zero_undef: bool) -> u128x1;
|
|
+ #[link_name = "llvm.cttz.v2i128"]
|
|
+ fn cttz_u128x2(x: u128x2, is_zero_undef: bool) -> u128x2;
|
|
+ #[link_name = "llvm.cttz.v4i128"]
|
|
+ fn cttz_u128x4(x: u128x4, is_zero_undef: bool) -> u128x4;
|
|
+
|
|
+ #[link_name = "llvm.ctpop.v2i8"]
|
|
+ fn ctpop_u8x2(x: u8x2) -> u8x2;
|
|
+ #[link_name = "llvm.ctpop.v4i8"]
|
|
+ fn ctpop_u8x4(x: u8x4) -> u8x4;
|
|
+ #[link_name = "llvm.ctpop.v8i8"]
|
|
+ fn ctpop_u8x8(x: u8x8) -> u8x8;
|
|
+ #[link_name = "llvm.ctpop.v16i8"]
|
|
+ fn ctpop_u8x16(x: u8x16) -> u8x16;
|
|
+ #[link_name = "llvm.ctpop.v32i8"]
|
|
+ fn ctpop_u8x32(x: u8x32) -> u8x32;
|
|
+ #[link_name = "llvm.ctpop.v64i8"]
|
|
+ fn ctpop_u8x64(x: u8x64) -> u8x64;
|
|
+
|
|
+ #[link_name = "llvm.ctpop.v2i16"]
|
|
+ fn ctpop_u16x2(x: u16x2) -> u16x2;
|
|
+ #[link_name = "llvm.ctpop.v4i16"]
|
|
+ fn ctpop_u16x4(x: u16x4) -> u16x4;
|
|
+ #[link_name = "llvm.ctpop.v8i16"]
|
|
+ fn ctpop_u16x8(x: u16x8) -> u16x8;
|
|
+ #[link_name = "llvm.ctpop.v16i16"]
|
|
+ fn ctpop_u16x16(x: u16x16) -> u16x16;
|
|
+ #[link_name = "llvm.ctpop.v32i16"]
|
|
+ fn ctpop_u16x32(x: u16x32) -> u16x32;
|
|
+
|
|
+ #[link_name = "llvm.ctpop.v2i32"]
|
|
+ fn ctpop_u32x2(x: u32x2) -> u32x2;
|
|
+ #[link_name = "llvm.ctpop.v4i32"]
|
|
+ fn ctpop_u32x4(x: u32x4) -> u32x4;
|
|
+ #[link_name = "llvm.ctpop.v8i32"]
|
|
+ fn ctpop_u32x8(x: u32x8) -> u32x8;
|
|
+ #[link_name = "llvm.ctpop.v16i32"]
|
|
+ fn ctpop_u32x16(x: u32x16) -> u32x16;
|
|
+
|
|
+ #[link_name = "llvm.ctpop.v2i64"]
|
|
+ fn ctpop_u64x2(x: u64x2) -> u64x2;
|
|
+ #[link_name = "llvm.ctpop.v4i64"]
|
|
+ fn ctpop_u64x4(x: u64x4) -> u64x4;
|
|
+ #[link_name = "llvm.ctpop.v8i64"]
|
|
+ fn ctpop_u64x8(x: u64x8) -> u64x8;
|
|
+
|
|
+ #[link_name = "llvm.ctpop.v1i128"]
|
|
+ fn ctpop_u128x1(x: u128x1) -> u128x1;
|
|
+ #[link_name = "llvm.ctpop.v2i128"]
|
|
+ fn ctpop_u128x2(x: u128x2) -> u128x2;
|
|
+ #[link_name = "llvm.ctpop.v4i128"]
|
|
+ fn ctpop_u128x4(x: u128x4) -> u128x4;
|
|
+}
|
|
+
|
|
+crate trait BitManip {
|
|
+ fn ctpop(self) -> Self;
|
|
+ fn ctlz(self) -> Self;
|
|
+ fn cttz(self) -> Self;
|
|
+}
|
|
+
|
|
+macro_rules! impl_bit_manip {
|
|
+ (inner: $ty:ident, $scalar:ty, $uty:ident,
|
|
+ $ctpop:ident, $ctlz:ident, $cttz:ident) => {
|
|
+ // FIXME: several LLVM intrinsics break on s390x https://github.com/rust-lang-nursery/packed_simd/issues/192
|
|
+ #[cfg(target_arch = "s390x")]
|
|
+ impl_bit_manip! { scalar: $ty, $scalar }
|
|
+ #[cfg(not(target_arch = "s390x"))]
|
|
+ impl BitManip for $ty {
|
|
+ #[inline]
|
|
+ fn ctpop(self) -> Self {
|
|
+ let y: $uty = self.cast();
|
|
+ unsafe { $ctpop(y).cast() }
|
|
+ }
|
|
+
|
|
+ #[inline]
|
|
+ fn ctlz(self) -> Self {
|
|
+ let y: $uty = self.cast();
|
|
+ // the ctxx intrinsics need compile-time constant
|
|
+ // `is_zero_undef`
|
|
+ unsafe { $ctlz(y, false).cast() }
|
|
+ }
|
|
+
|
|
+ #[inline]
|
|
+ fn cttz(self) -> Self {
|
|
+ let y: $uty = self.cast();
|
|
+ unsafe { $cttz(y, false).cast() }
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+ (sized_inner: $ty:ident, $scalar:ty, $uty:ident) => {
|
|
+ #[cfg(target_arch = "s390x")]
|
|
+ impl_bit_manip! { scalar: $ty, $scalar }
|
|
+ #[cfg(not(target_arch = "s390x"))]
|
|
+ impl BitManip for $ty {
|
|
+ #[inline]
|
|
+ fn ctpop(self) -> Self {
|
|
+ let y: $uty = self.cast();
|
|
+ $uty::ctpop(y).cast()
|
|
+ }
|
|
+
|
|
+ #[inline]
|
|
+ fn ctlz(self) -> Self {
|
|
+ let y: $uty = self.cast();
|
|
+ $uty::ctlz(y).cast()
|
|
+ }
|
|
+
|
|
+ #[inline]
|
|
+ fn cttz(self) -> Self {
|
|
+ let y: $uty = self.cast();
|
|
+ $uty::cttz(y).cast()
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+ (scalar: $ty:ident, $scalar:ty) => {
|
|
+ impl BitManip for $ty {
|
|
+ #[inline]
|
|
+ fn ctpop(self) -> Self {
|
|
+ let mut ones = self;
|
|
+ for i in 0..Self::lanes() {
|
|
+ ones = ones
|
|
+ .replace(i, self.extract(i).count_ones() as $scalar);
|
|
+ }
|
|
+ ones
|
|
+ }
|
|
+
|
|
+ #[inline]
|
|
+ fn ctlz(self) -> Self {
|
|
+ let mut lz = self;
|
|
+ for i in 0..Self::lanes() {
|
|
+ lz = lz.replace(
|
|
+ i,
|
|
+ self.extract(i).leading_zeros() as $scalar,
|
|
+ );
|
|
+ }
|
|
+ lz
|
|
+ }
|
|
+
|
|
+ #[inline]
|
|
+ fn cttz(self) -> Self {
|
|
+ let mut tz = self;
|
|
+ for i in 0..Self::lanes() {
|
|
+ tz = tz.replace(
|
|
+ i,
|
|
+ self.extract(i).trailing_zeros() as $scalar,
|
|
+ );
|
|
+ }
|
|
+ tz
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+ ($uty:ident, $uscalar:ty, $ity:ident, $iscalar:ty,
|
|
+ $ctpop:ident, $ctlz:ident, $cttz:ident) => {
|
|
+ impl_bit_manip! { inner: $uty, $uscalar, $uty, $ctpop, $ctlz, $cttz }
|
|
+ impl_bit_manip! { inner: $ity, $iscalar, $uty, $ctpop, $ctlz, $cttz }
|
|
+ };
|
|
+ (sized: $usize:ident, $uscalar:ty, $isize:ident,
|
|
+ $iscalar:ty, $ty:ident) => {
|
|
+ impl_bit_manip! { sized_inner: $usize, $uscalar, $ty }
|
|
+ impl_bit_manip! { sized_inner: $isize, $iscalar, $ty }
|
|
+ };
|
|
+}
|
|
+
|
|
+impl_bit_manip! { u8x2 , u8, i8x2, i8, ctpop_u8x2, ctlz_u8x2, cttz_u8x2 }
|
|
+impl_bit_manip! { u8x4 , u8, i8x4, i8, ctpop_u8x4, ctlz_u8x4, cttz_u8x4 }
|
|
+#[cfg(not(target_arch = "aarch64"))] // see below
|
|
+impl_bit_manip! { u8x8 , u8, i8x8, i8, ctpop_u8x8, ctlz_u8x8, cttz_u8x8 }
|
|
+impl_bit_manip! { u8x16 , u8, i8x16, i8, ctpop_u8x16, ctlz_u8x16, cttz_u8x16 }
|
|
+impl_bit_manip! { u8x32 , u8, i8x32, i8, ctpop_u8x32, ctlz_u8x32, cttz_u8x32 }
|
|
+impl_bit_manip! { u8x64 , u8, i8x64, i8, ctpop_u8x64, ctlz_u8x64, cttz_u8x64 }
|
|
+impl_bit_manip! { u16x2 , u16, i16x2, i16, ctpop_u16x2, ctlz_u16x2, cttz_u16x2 }
|
|
+impl_bit_manip! { u16x4 , u16, i16x4, i16, ctpop_u16x4, ctlz_u16x4, cttz_u16x4 }
|
|
+impl_bit_manip! { u16x8 , u16, i16x8, i16, ctpop_u16x8, ctlz_u16x8, cttz_u16x8 }
|
|
+impl_bit_manip! { u16x16 , u16, i16x16, i16, ctpop_u16x16, ctlz_u16x16, cttz_u16x16 }
|
|
+impl_bit_manip! { u16x32 , u16, i16x32, i16, ctpop_u16x32, ctlz_u16x32, cttz_u16x32 }
|
|
+impl_bit_manip! { u32x2 , u32, i32x2, i32, ctpop_u32x2, ctlz_u32x2, cttz_u32x2 }
|
|
+impl_bit_manip! { u32x4 , u32, i32x4, i32, ctpop_u32x4, ctlz_u32x4, cttz_u32x4 }
|
|
+impl_bit_manip! { u32x8 , u32, i32x8, i32, ctpop_u32x8, ctlz_u32x8, cttz_u32x8 }
|
|
+impl_bit_manip! { u32x16 , u32, i32x16, i32, ctpop_u32x16, ctlz_u32x16, cttz_u32x16 }
|
|
+impl_bit_manip! { u64x2 , u64, i64x2, i64, ctpop_u64x2, ctlz_u64x2, cttz_u64x2 }
|
|
+impl_bit_manip! { u64x4 , u64, i64x4, i64, ctpop_u64x4, ctlz_u64x4, cttz_u64x4 }
|
|
+impl_bit_manip! { u64x8 , u64, i64x8, i64, ctpop_u64x8, ctlz_u64x8, cttz_u64x8 }
|
|
+impl_bit_manip! { u128x1 , u128, i128x1, i128, ctpop_u128x1, ctlz_u128x1, cttz_u128x1 }
|
|
+impl_bit_manip! { u128x2 , u128, i128x2, i128, ctpop_u128x2, ctlz_u128x2, cttz_u128x2 }
|
|
+impl_bit_manip! { u128x4 , u128, i128x4, i128, ctpop_u128x4, ctlz_u128x4, cttz_u128x4 }
|
|
+
|
|
+#[cfg(target_arch = "aarch64")]
|
|
+impl BitManip for u8x8 {
|
|
+ #[inline]
|
|
+ fn ctpop(self) -> Self {
|
|
+ let y: u8x8 = self.cast();
|
|
+ unsafe { ctpop_u8x8(y).cast() }
|
|
+ }
|
|
+
|
|
+ #[inline]
|
|
+ fn ctlz(self) -> Self {
|
|
+ let y: u8x8 = self.cast();
|
|
+ unsafe { ctlz_u8x8(y, false).cast() }
|
|
+ }
|
|
+
|
|
+ #[inline]
|
|
+ fn cttz(self) -> Self {
|
|
+ // FIXME: LLVM cttz.v8i8 broken on aarch64 https://github.com/rust-lang-nursery/packed_simd/issues/191
|
|
+ // OPTIMIZE: adapt the algorithm used for v8i16/etc to Rust's aarch64
|
|
+ // intrinsics
|
|
+ let mut tz = self;
|
|
+ for i in 0..Self::lanes() {
|
|
+ tz = tz.replace(i, self.extract(i).trailing_zeros() as u8);
|
|
+ }
|
|
+ tz
|
|
+ }
|
|
+}
|
|
+#[cfg(target_arch = "aarch64")]
|
|
+impl BitManip for i8x8 {
|
|
+ #[inline]
|
|
+ fn ctpop(self) -> Self {
|
|
+ let y: u8x8 = self.cast();
|
|
+ unsafe { ctpop_u8x8(y).cast() }
|
|
+ }
|
|
+
|
|
+ #[inline]
|
|
+ fn ctlz(self) -> Self {
|
|
+ let y: u8x8 = self.cast();
|
|
+ unsafe { ctlz_u8x8(y, false).cast() }
|
|
+ }
|
|
+
|
|
+ #[inline]
|
|
+ fn cttz(self) -> Self {
|
|
+ // FIXME: LLVM cttz.v8i8 broken on aarch64 https://github.com/rust-lang-nursery/packed_simd/issues/191
|
|
+ // OPTIMIZE: adapt the algorithm used for v8i16/etc to Rust's aarch64
|
|
+ // intrinsics
|
|
+ let mut tz = self;
|
|
+ for i in 0..Self::lanes() {
|
|
+ tz = tz.replace(i, self.extract(i).trailing_zeros() as i8);
|
|
+ }
|
|
+ tz
|
|
+ }
|
|
+}
|
|
+
|
|
+cfg_if! {
|
|
+ if #[cfg(target_pointer_width = "8")] {
|
|
+ impl_bit_manip! { sized: usizex2, usize, isizex2, isize, u8x2 }
|
|
+ impl_bit_manip! { sized: usizex4, usize, isizex4, isize, u8x4 }
|
|
+ impl_bit_manip! { sized: usizex8, usize, isizex8, isize, u8x8 }
|
|
+ } else if #[cfg(target_pointer_width = "16")] {
|
|
+ impl_bit_manip! { sized: usizex2, usize, isizex2, isize, u16x2 }
|
|
+ impl_bit_manip! { sized: usizex4, usize, isizex4, isize, u16x4 }
|
|
+ impl_bit_manip! { sized: usizex8, usize, isizex8, isize, u16x8 }
|
|
+ } else if #[cfg(target_pointer_width = "32")] {
|
|
+ impl_bit_manip! { sized: usizex2, usize, isizex2, isize, u32x2 }
|
|
+ impl_bit_manip! { sized: usizex4, usize, isizex4, isize, u32x4 }
|
|
+ impl_bit_manip! { sized: usizex8, usize, isizex8, isize, u32x8 }
|
|
+ } else if #[cfg(target_pointer_width = "64")] {
|
|
+ impl_bit_manip! { sized: usizex2, usize, isizex2, isize, u64x2 }
|
|
+ impl_bit_manip! { sized: usizex4, usize, isizex4, isize, u64x4 }
|
|
+ impl_bit_manip! { sized: usizex8, usize, isizex8, isize, u64x8 }
|
|
+ } else {
|
|
+ compile_error!("unsupported target_pointer_width");
|
|
+ }
|
|
+}
|
|
diff --git a/third_party/rust/packed_simd/src/codegen/llvm.rs b/third_party/rust/packed_simd/src/codegen/llvm.rs
|
|
new file mode 100644
|
|
index 000000000000..91c2b0758dcf
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/codegen/llvm.rs
|
|
@@ -0,0 +1,99 @@
|
|
+//! LLVM's platform intrinsics
|
|
+#![allow(dead_code)]
|
|
+
|
|
+use crate::sealed::Shuffle;
|
|
+#[allow(unused_imports)] // FIXME: spurious warning?
|
|
+use crate::sealed::Simd;
|
|
+
|
|
+// Shuffle intrinsics: expanded in users' crates, therefore public.
|
|
+extern "platform-intrinsic" {
|
|
+ // FIXME: Passing this intrinsics an `idx` array with an index that is
|
|
+ // out-of-bounds will produce a monomorphization-time error.
|
|
+ // https://github.com/rust-lang-nursery/packed_simd/issues/21
|
|
+ pub fn simd_shuffle2<T, U>(x: T, y: T, idx: [u32; 2]) -> U
|
|
+ where
|
|
+ T: Simd,
|
|
+ <T as Simd>::Element: Shuffle<[u32; 2], Output = U>;
|
|
+
|
|
+ pub fn simd_shuffle4<T, U>(x: T, y: T, idx: [u32; 4]) -> U
|
|
+ where
|
|
+ T: Simd,
|
|
+ <T as Simd>::Element: Shuffle<[u32; 4], Output = U>;
|
|
+
|
|
+ pub fn simd_shuffle8<T, U>(x: T, y: T, idx: [u32; 8]) -> U
|
|
+ where
|
|
+ T: Simd,
|
|
+ <T as Simd>::Element: Shuffle<[u32; 8], Output = U>;
|
|
+
|
|
+ pub fn simd_shuffle16<T, U>(x: T, y: T, idx: [u32; 16]) -> U
|
|
+ where
|
|
+ T: Simd,
|
|
+ <T as Simd>::Element: Shuffle<[u32; 16], Output = U>;
|
|
+
|
|
+ pub fn simd_shuffle32<T, U>(x: T, y: T, idx: [u32; 32]) -> U
|
|
+ where
|
|
+ T: Simd,
|
|
+ <T as Simd>::Element: Shuffle<[u32; 32], Output = U>;
|
|
+
|
|
+ pub fn simd_shuffle64<T, U>(x: T, y: T, idx: [u32; 64]) -> U
|
|
+ where
|
|
+ T: Simd,
|
|
+ <T as Simd>::Element: Shuffle<[u32; 64], Output = U>;
|
|
+}
|
|
+
|
|
+pub use self::simd_shuffle16 as __shuffle_vector16;
|
|
+pub use self::simd_shuffle2 as __shuffle_vector2;
|
|
+pub use self::simd_shuffle32 as __shuffle_vector32;
|
|
+pub use self::simd_shuffle4 as __shuffle_vector4;
|
|
+pub use self::simd_shuffle64 as __shuffle_vector64;
|
|
+pub use self::simd_shuffle8 as __shuffle_vector8;
|
|
+
|
|
+extern "platform-intrinsic" {
|
|
+ crate fn simd_eq<T, U>(x: T, y: T) -> U;
|
|
+ crate fn simd_ne<T, U>(x: T, y: T) -> U;
|
|
+ crate fn simd_lt<T, U>(x: T, y: T) -> U;
|
|
+ crate fn simd_le<T, U>(x: T, y: T) -> U;
|
|
+ crate fn simd_gt<T, U>(x: T, y: T) -> U;
|
|
+ crate fn simd_ge<T, U>(x: T, y: T) -> U;
|
|
+
|
|
+ crate fn simd_insert<T, U>(x: T, idx: u32, val: U) -> T;
|
|
+ crate fn simd_extract<T, U>(x: T, idx: u32) -> U;
|
|
+
|
|
+ crate fn simd_cast<T, U>(x: T) -> U;
|
|
+
|
|
+ crate fn simd_add<T>(x: T, y: T) -> T;
|
|
+ crate fn simd_sub<T>(x: T, y: T) -> T;
|
|
+ crate fn simd_mul<T>(x: T, y: T) -> T;
|
|
+ crate fn simd_div<T>(x: T, y: T) -> T;
|
|
+ crate fn simd_rem<T>(x: T, y: T) -> T;
|
|
+ crate fn simd_shl<T>(x: T, y: T) -> T;
|
|
+ crate fn simd_shr<T>(x: T, y: T) -> T;
|
|
+ crate fn simd_and<T>(x: T, y: T) -> T;
|
|
+ crate fn simd_or<T>(x: T, y: T) -> T;
|
|
+ crate fn simd_xor<T>(x: T, y: T) -> T;
|
|
+
|
|
+ crate fn simd_reduce_add_unordered<T, U>(x: T) -> U;
|
|
+ crate fn simd_reduce_mul_unordered<T, U>(x: T) -> U;
|
|
+ crate fn simd_reduce_add_ordered<T, U>(x: T, acc: U) -> U;
|
|
+ crate fn simd_reduce_mul_ordered<T, U>(x: T, acc: U) -> U;
|
|
+ crate fn simd_reduce_min<T, U>(x: T) -> U;
|
|
+ crate fn simd_reduce_max<T, U>(x: T) -> U;
|
|
+ crate fn simd_reduce_min_nanless<T, U>(x: T) -> U;
|
|
+ crate fn simd_reduce_max_nanless<T, U>(x: T) -> U;
|
|
+ crate fn simd_reduce_and<T, U>(x: T) -> U;
|
|
+ crate fn simd_reduce_or<T, U>(x: T) -> U;
|
|
+ crate fn simd_reduce_xor<T, U>(x: T) -> U;
|
|
+ crate fn simd_reduce_all<T>(x: T) -> bool;
|
|
+ crate fn simd_reduce_any<T>(x: T) -> bool;
|
|
+
|
|
+ crate fn simd_select<M, T>(m: M, a: T, b: T) -> T;
|
|
+
|
|
+ crate fn simd_fmin<T>(a: T, b: T) -> T;
|
|
+ crate fn simd_fmax<T>(a: T, b: T) -> T;
|
|
+
|
|
+ crate fn simd_fsqrt<T>(a: T) -> T;
|
|
+ crate fn simd_fma<T>(a: T, b: T, c: T) -> T;
|
|
+
|
|
+ crate fn simd_gather<T, P, M>(value: T, pointers: P, mask: M) -> T;
|
|
+ crate fn simd_scatter<T, P, M>(value: T, pointers: P, mask: M);
|
|
+}
|
|
diff --git a/third_party/rust/packed_simd/src/codegen/math.rs b/third_party/rust/packed_simd/src/codegen/math.rs
|
|
new file mode 100644
|
|
index 000000000000..f3997c7f1135
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/codegen/math.rs
|
|
@@ -0,0 +1,3 @@
|
|
+//! Vertical math operations
|
|
+
|
|
+crate mod float;
|
|
diff --git a/third_party/rust/packed_simd/src/codegen/math/float.rs b/third_party/rust/packed_simd/src/codegen/math/float.rs
|
|
new file mode 100644
|
|
index 000000000000..5e89bf6ae6b0
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/codegen/math/float.rs
|
|
@@ -0,0 +1,18 @@
|
|
+//! Vertical floating-point math operations.
|
|
+#![allow(clippy::useless_transmute)]
|
|
+
|
|
+#[macro_use]
|
|
+crate mod macros;
|
|
+crate mod abs;
|
|
+crate mod cos;
|
|
+crate mod cos_pi;
|
|
+crate mod exp;
|
|
+crate mod ln;
|
|
+crate mod mul_add;
|
|
+crate mod mul_adde;
|
|
+crate mod powf;
|
|
+crate mod sin;
|
|
+crate mod sin_cos_pi;
|
|
+crate mod sin_pi;
|
|
+crate mod sqrt;
|
|
+crate mod sqrte;
|
|
diff --git a/third_party/rust/packed_simd/src/codegen/math/float/abs.rs b/third_party/rust/packed_simd/src/codegen/math/float/abs.rs
|
|
new file mode 100644
|
|
index 000000000000..bc4421f61de2
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/codegen/math/float/abs.rs
|
|
@@ -0,0 +1,103 @@
|
|
+//! Vertical floating-point `fabs`
|
|
+#![allow(unused)]
|
|
+
|
|
+// FIXME 64-bit 1 elem vectors fabs
|
|
+
|
|
+use crate::*;
|
|
+
|
|
+crate trait Abs {
|
|
+ fn abs(self) -> Self;
|
|
+}
|
|
+
|
|
+#[allow(improper_ctypes)]
|
|
+extern "C" {
|
|
+ #[link_name = "llvm.fabs.v2f32"]
|
|
+ fn fabs_v2f32(x: f32x2) -> f32x2;
|
|
+ #[link_name = "llvm.fabs.v4f32"]
|
|
+ fn fabs_v4f32(x: f32x4) -> f32x4;
|
|
+ #[link_name = "llvm.fabs.v8f32"]
|
|
+ fn fabs_v8f32(x: f32x8) -> f32x8;
|
|
+ #[link_name = "llvm.fabs.v16f32"]
|
|
+ fn fabs_v16f32(x: f32x16) -> f32x16;
|
|
+ /* FIXME 64-bit fabsgle elem vectors
|
|
+ #[link_name = "llvm.fabs.v1f64"]
|
|
+ fn fabs_v1f64(x: f64x1) -> f64x1;
|
|
+ */
|
|
+ #[link_name = "llvm.fabs.v2f64"]
|
|
+ fn fabs_v2f64(x: f64x2) -> f64x2;
|
|
+ #[link_name = "llvm.fabs.v4f64"]
|
|
+ fn fabs_v4f64(x: f64x4) -> f64x4;
|
|
+ #[link_name = "llvm.fabs.v8f64"]
|
|
+ fn fabs_v8f64(x: f64x8) -> f64x8;
|
|
+
|
|
+ #[link_name = "llvm.fabs.f32"]
|
|
+ fn fabs_f32(x: f32) -> f32;
|
|
+ #[link_name = "llvm.fabs.f64"]
|
|
+ fn fabs_f64(x: f64) -> f64;
|
|
+}
|
|
+
|
|
+gen_unary_impl_table!(Abs, abs);
|
|
+
|
|
+cfg_if! {
|
|
+ if #[cfg(target_arch = "s390x")] {
|
|
+ // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/14
|
|
+ impl_unary!(f32x2[f32; 2]: fabs_f32);
|
|
+ impl_unary!(f32x4[f32; 4]: fabs_f32);
|
|
+ impl_unary!(f32x8[f32; 8]: fabs_f32);
|
|
+ impl_unary!(f32x16[f32; 16]: fabs_f32);
|
|
+
|
|
+ impl_unary!(f64x2[f64; 2]: fabs_f64);
|
|
+ impl_unary!(f64x4[f64; 4]: fabs_f64);
|
|
+ impl_unary!(f64x8[f64; 8]: fabs_f64);
|
|
+ } else if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] {
|
|
+ use sleef_sys::*;
|
|
+ cfg_if! {
|
|
+ if #[cfg(target_feature = "avx2")] {
|
|
+ impl_unary!(f32x2[t => f32x4]: Sleef_fabsf4_avx2128);
|
|
+ impl_unary!(f32x16[h => f32x8]: Sleef_fabsf8_avx2);
|
|
+ impl_unary!(f64x8[h => f64x4]: Sleef_fabsd4_avx2);
|
|
+
|
|
+ impl_unary!(f32x4: Sleef_fabsf4_avx2128);
|
|
+ impl_unary!(f32x8: Sleef_fabsf8_avx2);
|
|
+ impl_unary!(f64x2: Sleef_fabsd2_avx2128);
|
|
+ impl_unary!(f64x4: Sleef_fabsd4_avx2);
|
|
+ } else if #[cfg(target_feature = "avx")] {
|
|
+ impl_unary!(f32x2[t => f32x4]: Sleef_fabsf4_sse4);
|
|
+ impl_unary!(f32x16[h => f32x8]: Sleef_fabsf8_avx);
|
|
+ impl_unary!(f64x8[h => f64x4]: Sleef_fabsd4_avx);
|
|
+
|
|
+ impl_unary!(f32x4: Sleef_fabsf4_sse4);
|
|
+ impl_unary!(f32x8: Sleef_fabsf8_avx);
|
|
+ impl_unary!(f64x2: Sleef_fabsd2_sse4);
|
|
+ impl_unary!(f64x4: Sleef_fabsd4_avx);
|
|
+ } else if #[cfg(target_feature = "sse4.2")] {
|
|
+ impl_unary!(f32x2[t => f32x4]: Sleef_fabsf4_sse4);
|
|
+ impl_unary!(f32x16[q => f32x4]: Sleef_fabsf4_sse4);
|
|
+ impl_unary!(f64x8[q => f64x2]: Sleef_fabsd2_sse4);
|
|
+
|
|
+ impl_unary!(f32x4: Sleef_fabsf4_sse4);
|
|
+ impl_unary!(f32x8[h => f32x4]: Sleef_fabsf4_sse4);
|
|
+ impl_unary!(f64x2: Sleef_fabsd2_sse4);
|
|
+ impl_unary!(f64x4[h => f64x2]: Sleef_fabsd2_sse4);
|
|
+ } else {
|
|
+ impl_unary!(f32x2[f32; 2]: fabs_f32);
|
|
+ impl_unary!(f32x16: fabs_v16f32);
|
|
+ impl_unary!(f64x8: fabs_v8f64);
|
|
+
|
|
+ impl_unary!(f32x4: fabs_v4f32);
|
|
+ impl_unary!(f32x8: fabs_v8f32);
|
|
+ impl_unary!(f64x2: fabs_v2f64);
|
|
+ impl_unary!(f64x4: fabs_v4f64);
|
|
+ }
|
|
+ }
|
|
+ } else {
|
|
+ impl_unary!(f32x2[f32; 2]: fabs_f32);
|
|
+ impl_unary!(f32x4: fabs_v4f32);
|
|
+ impl_unary!(f32x8: fabs_v8f32);
|
|
+ impl_unary!(f32x16: fabs_v16f32);
|
|
+
|
|
+ impl_unary!(f64x2: fabs_v2f64);
|
|
+ impl_unary!(f64x4: fabs_v4f64);
|
|
+ impl_unary!(f64x8: fabs_v8f64);
|
|
+ }
|
|
+}
|
|
diff --git a/third_party/rust/packed_simd/src/codegen/math/float/cos.rs b/third_party/rust/packed_simd/src/codegen/math/float/cos.rs
|
|
new file mode 100644
|
|
index 000000000000..50f6c16da255
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/codegen/math/float/cos.rs
|
|
@@ -0,0 +1,103 @@
|
|
+//! Vertical floating-point `cos`
|
|
+#![allow(unused)]
|
|
+
|
|
+// FIXME 64-bit 1 elem vector cos
|
|
+
|
|
+use crate::*;
|
|
+
|
|
+crate trait Cos {
|
|
+ fn cos(self) -> Self;
|
|
+}
|
|
+
|
|
+#[allow(improper_ctypes)]
|
|
+extern "C" {
|
|
+ #[link_name = "llvm.cos.v2f32"]
|
|
+ fn cos_v2f32(x: f32x2) -> f32x2;
|
|
+ #[link_name = "llvm.cos.v4f32"]
|
|
+ fn cos_v4f32(x: f32x4) -> f32x4;
|
|
+ #[link_name = "llvm.cos.v8f32"]
|
|
+ fn cos_v8f32(x: f32x8) -> f32x8;
|
|
+ #[link_name = "llvm.cos.v16f32"]
|
|
+ fn cos_v16f32(x: f32x16) -> f32x16;
|
|
+ /* FIXME 64-bit cosgle elem vectors
|
|
+ #[link_name = "llvm.cos.v1f64"]
|
|
+ fn cos_v1f64(x: f64x1) -> f64x1;
|
|
+ */
|
|
+ #[link_name = "llvm.cos.v2f64"]
|
|
+ fn cos_v2f64(x: f64x2) -> f64x2;
|
|
+ #[link_name = "llvm.cos.v4f64"]
|
|
+ fn cos_v4f64(x: f64x4) -> f64x4;
|
|
+ #[link_name = "llvm.cos.v8f64"]
|
|
+ fn cos_v8f64(x: f64x8) -> f64x8;
|
|
+
|
|
+ #[link_name = "llvm.cos.f32"]
|
|
+ fn cos_f32(x: f32) -> f32;
|
|
+ #[link_name = "llvm.cos.f64"]
|
|
+ fn cos_f64(x: f64) -> f64;
|
|
+}
|
|
+
|
|
+gen_unary_impl_table!(Cos, cos);
|
|
+
|
|
+cfg_if! {
|
|
+ if #[cfg(target_arch = "s390x")] {
|
|
+ // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/14
|
|
+ impl_unary!(f32x2[f32; 2]: cos_f32);
|
|
+ impl_unary!(f32x4[f32; 4]: cos_f32);
|
|
+ impl_unary!(f32x8[f32; 8]: cos_f32);
|
|
+ impl_unary!(f32x16[f32; 16]: cos_f32);
|
|
+
|
|
+ impl_unary!(f64x2[f64; 2]: cos_f64);
|
|
+ impl_unary!(f64x4[f64; 4]: cos_f64);
|
|
+ impl_unary!(f64x8[f64; 8]: cos_f64);
|
|
+ } else if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] {
|
|
+ use sleef_sys::*;
|
|
+ cfg_if! {
|
|
+ if #[cfg(target_feature = "avx2")] {
|
|
+ impl_unary!(f32x2[t => f32x4]: Sleef_cosf4_u10avx2128);
|
|
+ impl_unary!(f32x16[h => f32x8]: Sleef_cosf8_u10avx2);
|
|
+ impl_unary!(f64x8[h => f64x4]: Sleef_cosd4_u10avx2);
|
|
+
|
|
+ impl_unary!(f32x4: Sleef_cosf4_u10avx2128);
|
|
+ impl_unary!(f32x8: Sleef_cosf8_u10avx2);
|
|
+ impl_unary!(f64x2: Sleef_cosd2_u10avx2128);
|
|
+ impl_unary!(f64x4: Sleef_cosd4_u10avx2);
|
|
+ } else if #[cfg(target_feature = "avx")] {
|
|
+ impl_unary!(f32x2[t => f32x4]: Sleef_cosf4_u10sse4);
|
|
+ impl_unary!(f32x16[h => f32x8]: Sleef_cosf8_u10avx);
|
|
+ impl_unary!(f64x8[h => f64x4]: Sleef_cosd4_u10avx);
|
|
+
|
|
+ impl_unary!(f32x4: Sleef_cosf4_u10sse4);
|
|
+ impl_unary!(f32x8: Sleef_cosf8_u10avx);
|
|
+ impl_unary!(f64x2: Sleef_cosd2_u10sse4);
|
|
+ impl_unary!(f64x4: Sleef_cosd4_u10avx);
|
|
+ } else if #[cfg(target_feature = "sse4.2")] {
|
|
+ impl_unary!(f32x2[t => f32x4]: Sleef_cosf4_u10sse4);
|
|
+ impl_unary!(f32x16[q => f32x4]: Sleef_cosf4_u10sse4);
|
|
+ impl_unary!(f64x8[q => f64x2]: Sleef_cosd2_u10sse4);
|
|
+
|
|
+ impl_unary!(f32x4: Sleef_cosf4_u10sse4);
|
|
+ impl_unary!(f32x8[h => f32x4]: Sleef_cosf4_u10sse4);
|
|
+ impl_unary!(f64x2: Sleef_cosd2_u10sse4);
|
|
+ impl_unary!(f64x4[h => f64x2]: Sleef_cosd2_u10sse4);
|
|
+ } else {
|
|
+ impl_unary!(f32x2[f32; 2]: cos_f32);
|
|
+ impl_unary!(f32x16: cos_v16f32);
|
|
+ impl_unary!(f64x8: cos_v8f64);
|
|
+
|
|
+ impl_unary!(f32x4: cos_v4f32);
|
|
+ impl_unary!(f32x8: cos_v8f32);
|
|
+ impl_unary!(f64x2: cos_v2f64);
|
|
+ impl_unary!(f64x4: cos_v4f64);
|
|
+ }
|
|
+ }
|
|
+ } else {
|
|
+ impl_unary!(f32x2[f32; 2]: cos_f32);
|
|
+ impl_unary!(f32x4: cos_v4f32);
|
|
+ impl_unary!(f32x8: cos_v8f32);
|
|
+ impl_unary!(f32x16: cos_v16f32);
|
|
+
|
|
+ impl_unary!(f64x2: cos_v2f64);
|
|
+ impl_unary!(f64x4: cos_v4f64);
|
|
+ impl_unary!(f64x8: cos_v8f64);
|
|
+ }
|
|
+}
|
|
diff --git a/third_party/rust/packed_simd/src/codegen/math/float/cos_pi.rs b/third_party/rust/packed_simd/src/codegen/math/float/cos_pi.rs
|
|
new file mode 100644
|
|
index 000000000000..ebff5fd1c751
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/codegen/math/float/cos_pi.rs
|
|
@@ -0,0 +1,87 @@
|
|
+//! Vertical floating-point `cos`
|
|
+#![allow(unused)]
|
|
+
|
|
+// FIXME 64-bit 1 elem vectors cos_pi
|
|
+
|
|
+use crate::*;
|
|
+
|
|
+crate trait CosPi {
|
|
+ fn cos_pi(self) -> Self;
|
|
+}
|
|
+
|
|
+gen_unary_impl_table!(CosPi, cos_pi);
|
|
+
|
|
+macro_rules! impl_def {
|
|
+ ($vid:ident, $PI:path) => {
|
|
+ impl CosPi for $vid {
|
|
+ #[inline]
|
|
+ fn cos_pi(self) -> Self {
|
|
+ (self * Self::splat($PI)).cos()
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+}
|
|
+macro_rules! impl_def32 {
|
|
+ ($vid:ident) => {
|
|
+ impl_def!($vid, crate::f32::consts::PI);
|
|
+ };
|
|
+}
|
|
+macro_rules! impl_def64 {
|
|
+ ($vid:ident) => {
|
|
+ impl_def!($vid, crate::f64::consts::PI);
|
|
+ };
|
|
+}
|
|
+
|
|
+cfg_if! {
|
|
+ if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] {
|
|
+ use sleef_sys::*;
|
|
+ cfg_if! {
|
|
+ if #[cfg(target_feature = "avx2")] {
|
|
+ impl_unary!(f32x2[t => f32x4]: Sleef_cospif4_u05avx2128);
|
|
+ impl_unary!(f32x16[h => f32x8]: Sleef_cospif8_u05avx2);
|
|
+ impl_unary!(f64x8[h => f64x4]: Sleef_cospid4_u05avx2);
|
|
+
|
|
+ impl_unary!(f32x4: Sleef_cospif4_u05avx2128);
|
|
+ impl_unary!(f32x8: Sleef_cospif8_u05avx2);
|
|
+ impl_unary!(f64x2: Sleef_cospid2_u05avx2128);
|
|
+ impl_unary!(f64x4: Sleef_cospid4_u05avx2);
|
|
+ } else if #[cfg(target_feature = "avx")] {
|
|
+ impl_unary!(f32x2[t => f32x4]: Sleef_cospif4_u05sse4);
|
|
+ impl_unary!(f32x16[h => f32x8]: Sleef_cospif8_u05avx);
|
|
+ impl_unary!(f64x8[h => f64x4]: Sleef_cospid4_u05avx);
|
|
+
|
|
+ impl_unary!(f32x4: Sleef_cospif4_u05sse4);
|
|
+ impl_unary!(f32x8: Sleef_cospif8_u05avx);
|
|
+ impl_unary!(f64x2: Sleef_cospid2_u05sse4);
|
|
+ impl_unary!(f64x4: Sleef_cospid4_u05avx);
|
|
+ } else if #[cfg(target_feature = "sse4.2")] {
|
|
+ impl_unary!(f32x2[t => f32x4]: Sleef_cospif4_u05sse4);
|
|
+ impl_unary!(f32x16[q => f32x4]: Sleef_cospif4_u05sse4);
|
|
+ impl_unary!(f64x8[q => f64x2]: Sleef_cospid2_u05sse4);
|
|
+
|
|
+ impl_unary!(f32x4: Sleef_cospif4_u05sse4);
|
|
+ impl_unary!(f32x8[h => f32x4]: Sleef_cospif4_u05sse4);
|
|
+ impl_unary!(f64x2: Sleef_cospid2_u05sse4);
|
|
+ impl_unary!(f64x4[h => f64x2]: Sleef_cospid2_u05sse4);
|
|
+ } else {
|
|
+ impl_def32!(f32x2);
|
|
+ impl_def32!(f32x4);
|
|
+ impl_def32!(f32x8);
|
|
+ impl_def32!(f32x16);
|
|
+
|
|
+ impl_def64!(f64x2);
|
|
+ impl_def64!(f64x4);
|
|
+ impl_def64!(f64x8);
|
|
+ }
|
|
+ }
|
|
+ } else {
|
|
+ impl_def32!(f32x2);
|
|
+ impl_def32!(f32x4);
|
|
+ impl_def32!(f32x8);
|
|
+ impl_def32!(f32x16);
|
|
+
|
|
+ impl_def64!(f64x2);
|
|
+ impl_def64!(f64x4);
|
|
+ impl_def64!(f64x8);
|
|
+ }
|
|
+}
|
|
diff --git a/third_party/rust/packed_simd/src/codegen/math/float/exp.rs b/third_party/rust/packed_simd/src/codegen/math/float/exp.rs
|
|
new file mode 100644
|
|
index 000000000000..00d10e9fa644
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/codegen/math/float/exp.rs
|
|
@@ -0,0 +1,112 @@
|
|
+//! Vertical floating-point `exp`
|
|
+#![allow(unused)]
|
|
+
|
|
+// FIXME 64-bit expgle elem vectors misexpg
|
|
+
|
|
+use crate::*;
|
|
+
|
|
+crate trait Exp {
|
|
+ fn exp(self) -> Self;
|
|
+}
|
|
+
|
|
+#[allow(improper_ctypes)]
|
|
+extern "C" {
|
|
+ #[link_name = "llvm.exp.v2f32"]
|
|
+ fn exp_v2f32(x: f32x2) -> f32x2;
|
|
+ #[link_name = "llvm.exp.v4f32"]
|
|
+ fn exp_v4f32(x: f32x4) -> f32x4;
|
|
+ #[link_name = "llvm.exp.v8f32"]
|
|
+ fn exp_v8f32(x: f32x8) -> f32x8;
|
|
+ #[link_name = "llvm.exp.v16f32"]
|
|
+ fn exp_v16f32(x: f32x16) -> f32x16;
|
|
+ /* FIXME 64-bit expgle elem vectors
|
|
+ #[link_name = "llvm.exp.v1f64"]
|
|
+ fn exp_v1f64(x: f64x1) -> f64x1;
|
|
+ */
|
|
+ #[link_name = "llvm.exp.v2f64"]
|
|
+ fn exp_v2f64(x: f64x2) -> f64x2;
|
|
+ #[link_name = "llvm.exp.v4f64"]
|
|
+ fn exp_v4f64(x: f64x4) -> f64x4;
|
|
+ #[link_name = "llvm.exp.v8f64"]
|
|
+ fn exp_v8f64(x: f64x8) -> f64x8;
|
|
+
|
|
+ #[link_name = "llvm.exp.f32"]
|
|
+ fn exp_f32(x: f32) -> f32;
|
|
+ #[link_name = "llvm.exp.f64"]
|
|
+ fn exp_f64(x: f64) -> f64;
|
|
+}
|
|
+
|
|
+gen_unary_impl_table!(Exp, exp);
|
|
+
|
|
+cfg_if! {
|
|
+ if #[cfg(target_arch = "s390x")] {
|
|
+ // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/14
|
|
+ impl_unary!(f32x2[f32; 2]: exp_f32);
|
|
+ impl_unary!(f32x4[f32; 4]: exp_f32);
|
|
+ impl_unary!(f32x8[f32; 8]: exp_f32);
|
|
+ impl_unary!(f32x16[f32; 16]: exp_f32);
|
|
+
|
|
+ impl_unary!(f64x2[f64; 2]: exp_f64);
|
|
+ impl_unary!(f64x4[f64; 4]: exp_f64);
|
|
+ impl_unary!(f64x8[f64; 8]: exp_f64);
|
|
+ } else if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] {
|
|
+ use sleef_sys::*;
|
|
+ cfg_if! {
|
|
+ if #[cfg(target_feature = "avx2")] {
|
|
+ impl_unary!(f32x2[t => f32x4]: Sleef_expf4_u10avx2128);
|
|
+ impl_unary!(f32x16[h => f32x8]: Sleef_expf8_u10avx2);
|
|
+ impl_unary!(f64x8[h => f64x4]: Sleef_expd4_u10avx2);
|
|
+
|
|
+ impl_unary!(f32x4: Sleef_expf4_u10avx2128);
|
|
+ impl_unary!(f32x8: Sleef_expf8_u10avx2);
|
|
+ impl_unary!(f64x2: Sleef_expd2_u10avx2128);
|
|
+ impl_unary!(f64x4: Sleef_expd4_u10avx2);
|
|
+ } else if #[cfg(target_feature = "avx")] {
|
|
+ impl_unary!(f32x2[t => f32x4]: Sleef_expf4_u10sse4);
|
|
+ impl_unary!(f32x16[h => f32x8]: Sleef_expf8_u10avx);
|
|
+ impl_unary!(f64x8[h => f64x4]: Sleef_expd4_u10avx);
|
|
+
|
|
+ impl_unary!(f32x4: Sleef_expf4_u10sse4);
|
|
+ impl_unary!(f32x8: Sleef_expf8_u10avx);
|
|
+ impl_unary!(f64x2: Sleef_expd2_u10sse4);
|
|
+ impl_unary!(f64x4: Sleef_expd4_u10avx);
|
|
+ } else if #[cfg(target_feature = "sse4.2")] {
|
|
+ impl_unary!(f32x2[t => f32x4]: Sleef_expf4_u10sse4);
|
|
+ impl_unary!(f32x16[q => f32x4]: Sleef_expf4_u10sse4);
|
|
+ impl_unary!(f64x8[q => f64x2]: Sleef_expd2_u10sse4);
|
|
+
|
|
+ impl_unary!(f32x4: Sleef_expf4_u10sse4);
|
|
+ impl_unary!(f32x8[h => f32x4]: Sleef_expf4_u10sse4);
|
|
+ impl_unary!(f64x2: Sleef_expd2_u10sse4);
|
|
+ impl_unary!(f64x4[h => f64x2]: Sleef_expd2_u10sse4);
|
|
+ } else if #[cfg(target_feature = "sse2")] {
|
|
+ impl_unary!(f32x2[t => f32x4]: Sleef_expf4_u10sse2);
|
|
+ impl_unary!(f32x16[q => f32x4]: Sleef_expf4_u10sse2);
|
|
+ impl_unary!(f64x8[q => f64x2]: Sleef_expd2_u10sse2);
|
|
+
|
|
+ impl_unary!(f32x4: Sleef_expf4_u10sse2);
|
|
+ impl_unary!(f32x8[h => f32x4]: Sleef_expf4_u10sse2);
|
|
+ impl_unary!(f64x2: Sleef_expd2_u10sse2);
|
|
+ impl_unary!(f64x4[h => f64x2]: Sleef_expd2_u10sse2);
|
|
+ } else {
|
|
+ impl_unary!(f32x2[f32; 2]: exp_f32);
|
|
+ impl_unary!(f32x16: exp_v16f32);
|
|
+ impl_unary!(f64x8: exp_v8f64);
|
|
+
|
|
+ impl_unary!(f32x4: exp_v4f32);
|
|
+ impl_unary!(f32x8: exp_v8f32);
|
|
+ impl_unary!(f64x2: exp_v2f64);
|
|
+ impl_unary!(f64x4: exp_v4f64);
|
|
+ }
|
|
+ }
|
|
+ } else {
|
|
+ impl_unary!(f32x2[f32; 2]: exp_f32);
|
|
+ impl_unary!(f32x4: exp_v4f32);
|
|
+ impl_unary!(f32x8: exp_v8f32);
|
|
+ impl_unary!(f32x16: exp_v16f32);
|
|
+
|
|
+ impl_unary!(f64x2: exp_v2f64);
|
|
+ impl_unary!(f64x4: exp_v4f64);
|
|
+ impl_unary!(f64x8: exp_v8f64);
|
|
+ }
|
|
+}
|
|
diff --git a/third_party/rust/packed_simd/src/codegen/math/float/ln.rs b/third_party/rust/packed_simd/src/codegen/math/float/ln.rs
|
|
new file mode 100644
|
|
index 000000000000..88a5a6c6c158
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/codegen/math/float/ln.rs
|
|
@@ -0,0 +1,112 @@
|
|
+//! Vertical floating-point `ln`
|
|
+#![allow(unused)]
|
|
+
|
|
+// FIXME 64-bit lngle elem vectors mislng
|
|
+
|
|
+use crate::*;
|
|
+
|
|
+crate trait Ln {
|
|
+ fn ln(self) -> Self;
|
|
+}
|
|
+
|
|
+#[allow(improper_ctypes)]
|
|
+extern "C" {
|
|
+ #[link_name = "llvm.log.v2f32"]
|
|
+ fn ln_v2f32(x: f32x2) -> f32x2;
|
|
+ #[link_name = "llvm.log.v4f32"]
|
|
+ fn ln_v4f32(x: f32x4) -> f32x4;
|
|
+ #[link_name = "llvm.log.v8f32"]
|
|
+ fn ln_v8f32(x: f32x8) -> f32x8;
|
|
+ #[link_name = "llvm.log.v16f32"]
|
|
+ fn ln_v16f32(x: f32x16) -> f32x16;
|
|
+ /* FIXME 64-bit lngle elem vectors
|
|
+ #[link_name = "llvm.log.v1f64"]
|
|
+ fn ln_v1f64(x: f64x1) -> f64x1;
|
|
+ */
|
|
+ #[link_name = "llvm.log.v2f64"]
|
|
+ fn ln_v2f64(x: f64x2) -> f64x2;
|
|
+ #[link_name = "llvm.log.v4f64"]
|
|
+ fn ln_v4f64(x: f64x4) -> f64x4;
|
|
+ #[link_name = "llvm.log.v8f64"]
|
|
+ fn ln_v8f64(x: f64x8) -> f64x8;
|
|
+
|
|
+ #[link_name = "llvm.log.f32"]
|
|
+ fn ln_f32(x: f32) -> f32;
|
|
+ #[link_name = "llvm.log.f64"]
|
|
+ fn ln_f64(x: f64) -> f64;
|
|
+}
|
|
+
|
|
+gen_unary_impl_table!(Ln, ln);
|
|
+
|
|
+cfg_if! {
|
|
+ if #[cfg(target_arch = "s390x")] {
|
|
+ // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/14
|
|
+ impl_unary!(f32x2[f32; 2]: ln_f32);
|
|
+ impl_unary!(f32x4[f32; 4]: ln_f32);
|
|
+ impl_unary!(f32x8[f32; 8]: ln_f32);
|
|
+ impl_unary!(f32x16[f32; 16]: ln_f32);
|
|
+
|
|
+ impl_unary!(f64x2[f64; 2]: ln_f64);
|
|
+ impl_unary!(f64x4[f64; 4]: ln_f64);
|
|
+ impl_unary!(f64x8[f64; 8]: ln_f64);
|
|
+ } else if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] {
|
|
+ use sleef_sys::*;
|
|
+ cfg_if! {
|
|
+ if #[cfg(target_feature = "avx2")] {
|
|
+ impl_unary!(f32x2[t => f32x4]: Sleef_logf4_u10avx2128);
|
|
+ impl_unary!(f32x16[h => f32x8]: Sleef_logf8_u10avx2);
|
|
+ impl_unary!(f64x8[h => f64x4]: Sleef_logd4_u10avx2);
|
|
+
|
|
+ impl_unary!(f32x4: Sleef_logf4_u10avx2128);
|
|
+ impl_unary!(f32x8: Sleef_logf8_u10avx2);
|
|
+ impl_unary!(f64x2: Sleef_logd2_u10avx2128);
|
|
+ impl_unary!(f64x4: Sleef_logd4_u10avx2);
|
|
+ } else if #[cfg(target_feature = "avx")] {
|
|
+ impl_unary!(f32x2[t => f32x4]: Sleef_logf4_u10sse4);
|
|
+ impl_unary!(f32x16[h => f32x8]: Sleef_logf8_u10avx);
|
|
+ impl_unary!(f64x8[h => f64x4]: Sleef_logd4_u10avx);
|
|
+
|
|
+ impl_unary!(f32x4: Sleef_logf4_u10sse4);
|
|
+ impl_unary!(f32x8: Sleef_logf8_u10avx);
|
|
+ impl_unary!(f64x2: Sleef_logd2_u10sse4);
|
|
+ impl_unary!(f64x4: Sleef_logd4_u10avx);
|
|
+ } else if #[cfg(target_feature = "sse4.2")] {
|
|
+ impl_unary!(f32x2[t => f32x4]: Sleef_logf4_u10sse4);
|
|
+ impl_unary!(f32x16[q => f32x4]: Sleef_logf4_u10sse4);
|
|
+ impl_unary!(f64x8[q => f64x2]: Sleef_logd2_u10sse4);
|
|
+
|
|
+ impl_unary!(f32x4: Sleef_logf4_u10sse4);
|
|
+ impl_unary!(f32x8[h => f32x4]: Sleef_logf4_u10sse4);
|
|
+ impl_unary!(f64x2: Sleef_logd2_u10sse4);
|
|
+ impl_unary!(f64x4[h => f64x2]: Sleef_logd2_u10sse4);
|
|
+ } else if #[cfg(target_feature = "sse2")] {
|
|
+ impl_unary!(f32x2[t => f32x4]: Sleef_logf4_u10sse2);
|
|
+ impl_unary!(f32x16[q => f32x4]: Sleef_logf4_u10sse2);
|
|
+ impl_unary!(f64x8[q => f64x2]: Sleef_logd2_u10sse2);
|
|
+
|
|
+ impl_unary!(f32x4: Sleef_logf4_u10sse2);
|
|
+ impl_unary!(f32x8[h => f32x4]: Sleef_logf4_u10sse2);
|
|
+ impl_unary!(f64x2: Sleef_logd2_u10sse2);
|
|
+ impl_unary!(f64x4[h => f64x2]: Sleef_logd2_u10sse2);
|
|
+ } else {
|
|
+ impl_unary!(f32x2[f32; 2]: ln_f32);
|
|
+ impl_unary!(f32x16: ln_v16f32);
|
|
+ impl_unary!(f64x8: ln_v8f64);
|
|
+
|
|
+ impl_unary!(f32x4: ln_v4f32);
|
|
+ impl_unary!(f32x8: ln_v8f32);
|
|
+ impl_unary!(f64x2: ln_v2f64);
|
|
+ impl_unary!(f64x4: ln_v4f64);
|
|
+ }
|
|
+ }
|
|
+ } else {
|
|
+ impl_unary!(f32x2[f32; 2]: ln_f32);
|
|
+ impl_unary!(f32x4: ln_v4f32);
|
|
+ impl_unary!(f32x8: ln_v8f32);
|
|
+ impl_unary!(f32x16: ln_v16f32);
|
|
+
|
|
+ impl_unary!(f64x2: ln_v2f64);
|
|
+ impl_unary!(f64x4: ln_v4f64);
|
|
+ impl_unary!(f64x8: ln_v8f64);
|
|
+ }
|
|
+}
|
|
diff --git a/third_party/rust/packed_simd/src/codegen/math/float/macros.rs b/third_party/rust/packed_simd/src/codegen/math/float/macros.rs
|
|
new file mode 100644
|
|
index 000000000000..02d0ca3f5c7a
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/codegen/math/float/macros.rs
|
|
@@ -0,0 +1,559 @@
|
|
+//! Utility macros
|
|
+#![allow(unused)]
|
|
+
|
|
+
|
|
+macro_rules! impl_unary_ {
|
|
+ // implementation mapping 1:1
|
|
+ (vec | $trait_id:ident, $trait_method:ident, $vec_id:ident,
|
|
+ $fun:ident) => {
|
|
+ impl $trait_id for $vec_id {
|
|
+ #[inline]
|
|
+ fn $trait_method(self) -> Self {
|
|
+ unsafe {
|
|
+ use crate::mem::transmute;
|
|
+ transmute($fun(transmute(self)))
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+ // implementation mapping 1:1 for when `$fun` is a generic function
|
|
+ // like some of the fp math rustc intrinsics (e.g. `fn fun<T>(x: T) -> T`).
|
|
+ (gen | $trait_id:ident, $trait_method:ident, $vec_id:ident,
|
|
+ $fun:ident) => {
|
|
+ impl $trait_id for $vec_id {
|
|
+ #[inline]
|
|
+ fn $trait_method(self) -> Self {
|
|
+ unsafe {
|
|
+ use crate::mem::transmute;
|
|
+ transmute($fun(self.0))
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+ (scalar | $trait_id:ident, $trait_method:ident,
|
|
+ $vec_id:ident, [$sid:ident; $scount:expr], $fun:ident) => {
|
|
+ impl $trait_id for $vec_id {
|
|
+ #[inline]
|
|
+ fn $trait_method(self) -> Self {
|
|
+ unsafe {
|
|
+ union U {
|
|
+ vec: $vec_id,
|
|
+ scalars: [$sid; $scount],
|
|
+ }
|
|
+ let mut scalars = U { vec: self }.scalars;
|
|
+ for i in &mut scalars {
|
|
+ *i = $fun(*i);
|
|
+ }
|
|
+ U { scalars }.vec
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+ // implementation calling fun twice on each of the vector halves:
|
|
+ (halves | $trait_id:ident, $trait_method:ident, $vec_id:ident,
|
|
+ $vech_id:ident, $fun:ident) => {
|
|
+ impl $trait_id for $vec_id {
|
|
+ #[inline]
|
|
+ fn $trait_method(self) -> Self {
|
|
+ unsafe {
|
|
+ use crate::mem::transmute;
|
|
+ union U {
|
|
+ vec: $vec_id,
|
|
+ halves: [$vech_id; 2],
|
|
+ }
|
|
+
|
|
+ let mut halves = U { vec: self }.halves;
|
|
+
|
|
+ *halves.get_unchecked_mut(0) =
|
|
+ transmute($fun(transmute(*halves.get_unchecked(0))));
|
|
+ *halves.get_unchecked_mut(1) =
|
|
+ transmute($fun(transmute(*halves.get_unchecked(1))));
|
|
+
|
|
+ U { halves }.vec
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+ // implementation calling fun four times on each of the vector quarters:
|
|
+ (quarter | $trait_id:ident, $trait_method:ident, $vec_id:ident,
|
|
+ $vecq_id:ident, $fun:ident) => {
|
|
+ impl $trait_id for $vec_id {
|
|
+ #[inline]
|
|
+ fn $trait_method(self) -> Self {
|
|
+ unsafe {
|
|
+ use crate::mem::transmute;
|
|
+ union U {
|
|
+ vec: $vec_id,
|
|
+ quarters: [$vecq_id; 4],
|
|
+ }
|
|
+
|
|
+ let mut quarters = U { vec: self }.quarters;
|
|
+
|
|
+ *quarters.get_unchecked_mut(0) =
|
|
+ transmute($fun(transmute(*quarters.get_unchecked(0))));
|
|
+ *quarters.get_unchecked_mut(1) =
|
|
+ transmute($fun(transmute(*quarters.get_unchecked(1))));
|
|
+ *quarters.get_unchecked_mut(2) =
|
|
+ transmute($fun(transmute(*quarters.get_unchecked(2))));
|
|
+ *quarters.get_unchecked_mut(3) =
|
|
+ transmute($fun(transmute(*quarters.get_unchecked(3))));
|
|
+
|
|
+ U { quarters }.vec
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+ // implementation calling fun once on a vector twice as large:
|
|
+ (twice | $trait_id:ident, $trait_method:ident, $vec_id:ident,
|
|
+ $vect_id:ident, $fun:ident) => {
|
|
+ impl $trait_id for $vec_id {
|
|
+ #[inline]
|
|
+ fn $trait_method(self) -> Self {
|
|
+ unsafe {
|
|
+ use crate::mem::{transmute, uninitialized};
|
|
+
|
|
+ union U {
|
|
+ vec: [$vec_id; 2],
|
|
+ twice: $vect_id,
|
|
+ }
|
|
+
|
|
+ let twice = U { vec: [self, uninitialized()] }.twice;
|
|
+ let twice = transmute($fun(transmute(twice)));
|
|
+
|
|
+ *(U { twice }.vec.get_unchecked(0))
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+}
|
|
+
|
|
+macro_rules! gen_unary_impl_table {
|
|
+ ($trait_id:ident, $trait_method:ident) => {
|
|
+ macro_rules! impl_unary {
|
|
+ ($vid:ident: $fun:ident) => {
|
|
+ impl_unary_!(vec | $trait_id, $trait_method, $vid, $fun);
|
|
+ };
|
|
+ ($vid:ident[g]: $fun:ident) => {
|
|
+ impl_unary_!(gen | $trait_id, $trait_method, $vid, $fun);
|
|
+ };
|
|
+ ($vid:ident[$sid:ident; $sc:expr]: $fun:ident) => {
|
|
+ impl_unary_!(
|
|
+ scalar | $trait_id,
|
|
+ $trait_method,
|
|
+ $vid,
|
|
+ [$sid; $sc],
|
|
+ $fun
|
|
+ );
|
|
+ };
|
|
+ ($vid:ident[s]: $fun:ident) => {
|
|
+ impl_unary_!(scalar | $trait_id, $trait_method, $vid, $fun);
|
|
+ };
|
|
+ ($vid:ident[h => $vid_h:ident]: $fun:ident) => {
|
|
+ impl_unary_!(
|
|
+ halves | $trait_id,
|
|
+ $trait_method,
|
|
+ $vid,
|
|
+ $vid_h,
|
|
+ $fun
|
|
+ );
|
|
+ };
|
|
+ ($vid:ident[q => $vid_q:ident]: $fun:ident) => {
|
|
+ impl_unary_!(
|
|
+ quarter | $trait_id,
|
|
+ $trait_method,
|
|
+ $vid,
|
|
+ $vid_q,
|
|
+ $fun
|
|
+ );
|
|
+ };
|
|
+ ($vid:ident[t => $vid_t:ident]: $fun:ident) => {
|
|
+ impl_unary_!(
|
|
+ twice | $trait_id,
|
|
+ $trait_method,
|
|
+ $vid,
|
|
+ $vid_t,
|
|
+ $fun
|
|
+ );
|
|
+ };
|
|
+ }
|
|
+ };
|
|
+}
|
|
+
|
|
+macro_rules! impl_tertiary_ {
|
|
+ // implementation mapping 1:1
|
|
+ (vec | $trait_id:ident, $trait_method:ident, $vec_id:ident,
|
|
+ $fun:ident) => {
|
|
+ impl $trait_id for $vec_id {
|
|
+ #[inline]
|
|
+ fn $trait_method(self, y: Self, z: Self) -> Self {
|
|
+ unsafe {
|
|
+ use crate::mem::transmute;
|
|
+ transmute($fun(
|
|
+ transmute(self),
|
|
+ transmute(y),
|
|
+ transmute(z),
|
|
+ ))
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+ (scalar | $trait_id:ident, $trait_method:ident,
|
|
+ $vec_id:ident, [$sid:ident; $scount:expr], $fun:ident) => {
|
|
+ impl $trait_id for $vec_id {
|
|
+ #[inline]
|
|
+ fn $trait_method(self, y: Self, z: Self) -> Self {
|
|
+ unsafe {
|
|
+ union U {
|
|
+ vec: $vec_id,
|
|
+ scalars: [$sid; $scount],
|
|
+ }
|
|
+ let mut x = U { vec: self }.scalars;
|
|
+ let y = U { vec: y }.scalars;
|
|
+ let z = U { vec: z }.scalars;
|
|
+ for (x, (y, z)) in (&mut scalars).zip(&y).zip(&z) {
|
|
+ *i = $fun(*i, *y, *z);
|
|
+ }
|
|
+ U { vec: x }.vec
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+ // implementation calling fun twice on each of the vector halves:
|
|
+ (halves | $trait_id:ident, $trait_method:ident, $vec_id:ident,
|
|
+ $vech_id:ident, $fun:ident) => {
|
|
+ impl $trait_id for $vec_id {
|
|
+ #[inline]
|
|
+ fn $trait_method(self, y: Self, z: Self) -> Self {
|
|
+ unsafe {
|
|
+ use crate::mem::transmute;
|
|
+ union U {
|
|
+ vec: $vec_id,
|
|
+ halves: [$vech_id; 2],
|
|
+ }
|
|
+
|
|
+ let mut x_halves = U { vec: self }.halves;
|
|
+ let y_halves = U { vec: y }.halves;
|
|
+ let z_halves = U { vec: z }.halves;
|
|
+
|
|
+ *x_halves.get_unchecked_mut(0) = transmute($fun(
|
|
+ transmute(*x_halves.get_unchecked(0)),
|
|
+ transmute(*y_halves.get_unchecked(0)),
|
|
+ transmute(*z_halves.get_unchecked(0)),
|
|
+ ));
|
|
+ *x_halves.get_unchecked_mut(1) = transmute($fun(
|
|
+ transmute(*x_halves.get_unchecked(1)),
|
|
+ transmute(*y_halves.get_unchecked(1)),
|
|
+ transmute(*z_halves.get_unchecked(1)),
|
|
+ ));
|
|
+
|
|
+ U { halves: x_halves }.vec
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+ // implementation calling fun four times on each of the vector quarters:
|
|
+ (quarter | $trait_id:ident, $trait_method:ident, $vec_id:ident,
|
|
+ $vecq_id:ident, $fun:ident) => {
|
|
+ impl $trait_id for $vec_id {
|
|
+ #[inline]
|
|
+ fn $trait_method(self, y: Self, z: Self) -> Self {
|
|
+ unsafe {
|
|
+ use crate::mem::transmute;
|
|
+ union U {
|
|
+ vec: $vec_id,
|
|
+ quarters: [$vecq_id; 4],
|
|
+ }
|
|
+
|
|
+ let mut x_quarters = U { vec: self }.quarters;
|
|
+ let y_quarters = U { vec: y }.quarters;
|
|
+ let z_quarters = U { vec: z }.quarters;
|
|
+
|
|
+ *x_quarters.get_unchecked_mut(0) = transmute($fun(
|
|
+ transmute(*x_quarters.get_unchecked(0)),
|
|
+ transmute(*y_quarters.get_unchecked(0)),
|
|
+ transmute(*z_quarters.get_unchecked(0)),
|
|
+ ));
|
|
+
|
|
+ *x_quarters.get_unchecked_mut(1) = transmute($fun(
|
|
+ transmute(*x_quarters.get_unchecked(1)),
|
|
+ transmute(*y_quarters.get_unchecked(1)),
|
|
+ transmute(*z_quarters.get_unchecked(1)),
|
|
+ ));
|
|
+
|
|
+ *x_quarters.get_unchecked_mut(2) = transmute($fun(
|
|
+ transmute(*x_quarters.get_unchecked(2)),
|
|
+ transmute(*y_quarters.get_unchecked(2)),
|
|
+ transmute(*z_quarters.get_unchecked(2)),
|
|
+ ));
|
|
+
|
|
+ *x_quarters.get_unchecked_mut(3) = transmute($fun(
|
|
+ transmute(*x_quarters.get_unchecked(3)),
|
|
+ transmute(*y_quarters.get_unchecked(3)),
|
|
+ transmute(*z_quarters.get_unchecked(3)),
|
|
+ ));
|
|
+
|
|
+ U { quarters: x_quarters }.vec
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+ // implementation calling fun once on a vector twice as large:
|
|
+ (twice | $trait_id:ident, $trait_method:ident, $vec_id:ident,
|
|
+ $vect_id:ident, $fun:ident) => {
|
|
+ impl $trait_id for $vec_id {
|
|
+ #[inline]
|
|
+ fn $trait_method(self, y: Self, z: Self) -> Self {
|
|
+ unsafe {
|
|
+ use crate::mem::{transmute, uninitialized};
|
|
+
|
|
+ union U {
|
|
+ vec: [$vec_id; 2],
|
|
+ twice: $vect_id,
|
|
+ }
|
|
+
|
|
+ let x_twice = U { vec: [self, uninitialized()] }.twice;
|
|
+ let y_twice = U { vec: [y, uninitialized()] }.twice;
|
|
+ let z_twice = U { vec: [z, uninitialized()] }.twice;
|
|
+ let twice: $vect_id = transmute($fun(
|
|
+ transmute(x_twice),
|
|
+ transmute(y_twice),
|
|
+ transmute(z_twice),
|
|
+ ));
|
|
+
|
|
+ *(U { twice }.vec.get_unchecked(0))
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+}
|
|
+
|
|
+macro_rules! gen_tertiary_impl_table {
|
|
+ ($trait_id:ident, $trait_method:ident) => {
|
|
+ macro_rules! impl_tertiary {
|
|
+ ($vid:ident: $fun:ident) => {
|
|
+ impl_tertiary_!(vec | $trait_id, $trait_method, $vid, $fun);
|
|
+ };
|
|
+ ($vid:ident[$sid:ident; $sc:expr]: $fun:ident) => {
|
|
+ impl_tertiary_!(
|
|
+ scalar | $trait_id,
|
|
+ $trait_method,
|
|
+ $vid,
|
|
+ [$sid; $sc],
|
|
+ $fun
|
|
+ );
|
|
+ };
|
|
+ ($vid:ident[s]: $fun:ident) => {
|
|
+ impl_tertiary_!(scalar | $trait_id, $trait_method, $vid, $fun);
|
|
+ };
|
|
+ ($vid:ident[h => $vid_h:ident]: $fun:ident) => {
|
|
+ impl_tertiary_!(
|
|
+ halves | $trait_id,
|
|
+ $trait_method,
|
|
+ $vid,
|
|
+ $vid_h,
|
|
+ $fun
|
|
+ );
|
|
+ };
|
|
+ ($vid:ident[q => $vid_q:ident]: $fun:ident) => {
|
|
+ impl_tertiary_!(
|
|
+ quarter | $trait_id,
|
|
+ $trait_method,
|
|
+ $vid,
|
|
+ $vid_q,
|
|
+ $fun
|
|
+ );
|
|
+ };
|
|
+ ($vid:ident[t => $vid_t:ident]: $fun:ident) => {
|
|
+ impl_tertiary_!(
|
|
+ twice | $trait_id,
|
|
+ $trait_method,
|
|
+ $vid,
|
|
+ $vid_t,
|
|
+ $fun
|
|
+ );
|
|
+ };
|
|
+ }
|
|
+ };
|
|
+}
|
|
+
|
|
+macro_rules! impl_binary_ {
|
|
+ // implementation mapping 1:1
|
|
+ (vec | $trait_id:ident, $trait_method:ident, $vec_id:ident,
|
|
+ $fun:ident) => {
|
|
+ impl $trait_id for $vec_id {
|
|
+ #[inline]
|
|
+ fn $trait_method(self, y: Self) -> Self {
|
|
+ unsafe {
|
|
+ use crate::mem::transmute;
|
|
+ transmute($fun(transmute(self), transmute(y)))
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+ (scalar | $trait_id:ident, $trait_method:ident,
|
|
+ $vec_id:ident, [$sid:ident; $scount:expr], $fun:ident) => {
|
|
+ impl $trait_id for $vec_id {
|
|
+ #[inline]
|
|
+ fn $trait_method(self, y: Self) -> Self {
|
|
+ unsafe {
|
|
+ union U {
|
|
+ vec: $vec_id,
|
|
+ scalars: [$sid; $scount],
|
|
+ }
|
|
+ let mut x = U { vec: self }.scalars;
|
|
+ let y = U { vec: y }.scalars;
|
|
+ for (x, y) in x.iter_mut().zip(&y) {
|
|
+ *x = $fun(*x, *y);
|
|
+ }
|
|
+ U { scalars: x }.vec
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+ // implementation calling fun twice on each of the vector halves:
|
|
+ (halves | $trait_id:ident, $trait_method:ident, $vec_id:ident,
|
|
+ $vech_id:ident, $fun:ident) => {
|
|
+ impl $trait_id for $vec_id {
|
|
+ #[inline]
|
|
+ fn $trait_method(self, y: Self) -> Self {
|
|
+ unsafe {
|
|
+ use crate::mem::transmute;
|
|
+ union U {
|
|
+ vec: $vec_id,
|
|
+ halves: [$vech_id; 2],
|
|
+ }
|
|
+
|
|
+ let mut x_halves = U { vec: self }.halves;
|
|
+ let y_halves = U { vec: y }.halves;
|
|
+
|
|
+ *x_halves.get_unchecked_mut(0) = transmute($fun(
|
|
+ transmute(*x_halves.get_unchecked(0)),
|
|
+ transmute(*y_halves.get_unchecked(0)),
|
|
+ ));
|
|
+ *x_halves.get_unchecked_mut(1) = transmute($fun(
|
|
+ transmute(*x_halves.get_unchecked(1)),
|
|
+ transmute(*y_halves.get_unchecked(1)),
|
|
+ ));
|
|
+
|
|
+ U { halves: x_halves }.vec
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+ // implementation calling fun four times on each of the vector quarters:
|
|
+ (quarter | $trait_id:ident, $trait_method:ident, $vec_id:ident,
|
|
+ $vecq_id:ident, $fun:ident) => {
|
|
+ impl $trait_id for $vec_id {
|
|
+ #[inline]
|
|
+ fn $trait_method(self, y: Self) -> Self {
|
|
+ unsafe {
|
|
+ use crate::mem::transmute;
|
|
+ union U {
|
|
+ vec: $vec_id,
|
|
+ quarters: [$vecq_id; 4],
|
|
+ }
|
|
+
|
|
+ let mut x_quarters = U { vec: self }.quarters;
|
|
+ let y_quarters = U { vec: y }.quarters;
|
|
+
|
|
+ *x_quarters.get_unchecked_mut(0) = transmute($fun(
|
|
+ transmute(*x_quarters.get_unchecked(0)),
|
|
+ transmute(*y_quarters.get_unchecked(0)),
|
|
+ ));
|
|
+
|
|
+ *x_quarters.get_unchecked_mut(1) = transmute($fun(
|
|
+ transmute(*x_quarters.get_unchecked(1)),
|
|
+ transmute(*y_quarters.get_unchecked(1)),
|
|
+ ));
|
|
+
|
|
+ *x_quarters.get_unchecked_mut(2) = transmute($fun(
|
|
+ transmute(*x_quarters.get_unchecked(2)),
|
|
+ transmute(*y_quarters.get_unchecked(2)),
|
|
+ ));
|
|
+
|
|
+ *x_quarters.get_unchecked_mut(3) = transmute($fun(
|
|
+ transmute(*x_quarters.get_unchecked(3)),
|
|
+ transmute(*y_quarters.get_unchecked(3)),
|
|
+ ));
|
|
+
|
|
+ U { quarters: x_quarters }.vec
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+ // implementation calling fun once on a vector twice as large:
|
|
+ (twice | $trait_id:ident, $trait_method:ident, $vec_id:ident,
|
|
+ $vect_id:ident, $fun:ident) => {
|
|
+ impl $trait_id for $vec_id {
|
|
+ #[inline]
|
|
+ fn $trait_method(self, y: Self) -> Self {
|
|
+ unsafe {
|
|
+ use crate::mem::{transmute, uninitialized};
|
|
+
|
|
+ union U {
|
|
+ vec: [$vec_id; 2],
|
|
+ twice: $vect_id,
|
|
+ }
|
|
+
|
|
+ let x_twice = U { vec: [self, uninitialized()] }.twice;
|
|
+ let y_twice = U { vec: [y, uninitialized()] }.twice;
|
|
+ let twice: $vect_id = transmute($fun(
|
|
+ transmute(x_twice),
|
|
+ transmute(y_twice),
|
|
+ ));
|
|
+
|
|
+ *(U { twice }.vec.get_unchecked(0))
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+}
|
|
+
|
|
+macro_rules! gen_binary_impl_table {
|
|
+ ($trait_id:ident, $trait_method:ident) => {
|
|
+ macro_rules! impl_binary {
|
|
+ ($vid:ident: $fun:ident) => {
|
|
+ impl_binary_!(vec | $trait_id, $trait_method, $vid, $fun);
|
|
+ };
|
|
+ ($vid:ident[$sid:ident; $sc:expr]: $fun:ident) => {
|
|
+ impl_binary_!(
|
|
+ scalar | $trait_id,
|
|
+ $trait_method,
|
|
+ $vid,
|
|
+ [$sid; $sc],
|
|
+ $fun
|
|
+ );
|
|
+ };
|
|
+ ($vid:ident[s]: $fun:ident) => {
|
|
+ impl_binary_!(scalar | $trait_id, $trait_method, $vid, $fun);
|
|
+ };
|
|
+ ($vid:ident[h => $vid_h:ident]: $fun:ident) => {
|
|
+ impl_binary_!(
|
|
+ halves | $trait_id,
|
|
+ $trait_method,
|
|
+ $vid,
|
|
+ $vid_h,
|
|
+ $fun
|
|
+ );
|
|
+ };
|
|
+ ($vid:ident[q => $vid_q:ident]: $fun:ident) => {
|
|
+ impl_binary_!(
|
|
+ quarter | $trait_id,
|
|
+ $trait_method,
|
|
+ $vid,
|
|
+ $vid_q,
|
|
+ $fun
|
|
+ );
|
|
+ };
|
|
+ ($vid:ident[t => $vid_t:ident]: $fun:ident) => {
|
|
+ impl_binary_!(
|
|
+ twice | $trait_id,
|
|
+ $trait_method,
|
|
+ $vid,
|
|
+ $vid_t,
|
|
+ $fun
|
|
+ );
|
|
+ };
|
|
+ }
|
|
+ };
|
|
+}
|
|
diff --git a/third_party/rust/packed_simd/src/codegen/math/float/mul_add.rs b/third_party/rust/packed_simd/src/codegen/math/float/mul_add.rs
|
|
new file mode 100644
|
|
index 000000000000..f48a57dc46c6
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/codegen/math/float/mul_add.rs
|
|
@@ -0,0 +1,109 @@
|
|
+//! Vertical floating-point `mul_add`
|
|
+#![allow(unused)]
|
|
+use crate::*;
|
|
+
|
|
+// FIXME: 64-bit 1 element mul_add
|
|
+
|
|
+crate trait MulAdd {
|
|
+ fn mul_add(self, y: Self, z: Self) -> Self;
|
|
+}
|
|
+
|
|
+#[cfg(not(target_arch = "s390x"))]
|
|
+#[allow(improper_ctypes)]
|
|
+extern "C" {
|
|
+ #[link_name = "llvm.fma.v2f32"]
|
|
+ fn fma_v2f32(x: f32x2, y: f32x2, z: f32x2) -> f32x2;
|
|
+ #[link_name = "llvm.fma.v4f32"]
|
|
+ fn fma_v4f32(x: f32x4, y: f32x4, z: f32x4) -> f32x4;
|
|
+ #[link_name = "llvm.fma.v8f32"]
|
|
+ fn fma_v8f32(x: f32x8, y: f32x8, z: f32x8) -> f32x8;
|
|
+ #[link_name = "llvm.fma.v16f32"]
|
|
+ fn fma_v16f32(x: f32x16, y: f32x16, z: f32x16) -> f32x16;
|
|
+ /* FIXME 64-bit single elem vectors
|
|
+ #[link_name = "llvm.fma.v1f64"]
|
|
+ fn fma_v1f64(x: f64x1, y: f64x1, z: f64x1) -> f64x1;
|
|
+ */
|
|
+ #[link_name = "llvm.fma.v2f64"]
|
|
+ fn fma_v2f64(x: f64x2, y: f64x2, z: f64x2) -> f64x2;
|
|
+ #[link_name = "llvm.fma.v4f64"]
|
|
+ fn fma_v4f64(x: f64x4, y: f64x4, z: f64x4) -> f64x4;
|
|
+ #[link_name = "llvm.fma.v8f64"]
|
|
+ fn fma_v8f64(x: f64x8, y: f64x8, z: f64x8) -> f64x8;
|
|
+}
|
|
+
|
|
+gen_tertiary_impl_table!(MulAdd, mul_add);
|
|
+
|
|
+cfg_if! {
|
|
+ if #[cfg(target_arch = "s390x")] {
|
|
+ // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/14
|
|
+ macro_rules! impl_broken {
|
|
+ ($id:ident) => {
|
|
+ impl MulAdd for $id {
|
|
+ #[inline]
|
|
+ fn mul_add(self, y: Self, z: Self) -> Self {
|
|
+ self * y + z
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+ }
|
|
+
|
|
+ impl_broken!(f32x2);
|
|
+ impl_broken!(f32x4);
|
|
+ impl_broken!(f32x8);
|
|
+ impl_broken!(f32x16);
|
|
+
|
|
+ impl_broken!(f64x2);
|
|
+ impl_broken!(f64x4);
|
|
+ impl_broken!(f64x8);
|
|
+ } else if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] {
|
|
+ use sleef_sys::*;
|
|
+ cfg_if! {
|
|
+ if #[cfg(target_feature = "avx2")] {
|
|
+ impl_tertiary!(f32x2[t => f32x4]: Sleef_fmaf4_avx2128);
|
|
+ impl_tertiary!(f32x16[h => f32x8]: Sleef_fmaf8_avx2);
|
|
+ impl_tertiary!(f64x8[h => f64x4]: Sleef_fmad4_avx2);
|
|
+
|
|
+ impl_tertiary!(f32x4: Sleef_fmaf4_avx2128);
|
|
+ impl_tertiary!(f32x8: Sleef_fmaf8_avx2);
|
|
+ impl_tertiary!(f64x2: Sleef_fmad2_avx2128);
|
|
+ impl_tertiary!(f64x4: Sleef_fmad4_avx2);
|
|
+ } else if #[cfg(target_feature = "avx")] {
|
|
+ impl_tertiary!(f32x2[t => f32x4]: Sleef_fmaf4_sse4);
|
|
+ impl_tertiary!(f32x16[h => f32x8]: Sleef_fmaf8_avx);
|
|
+ impl_tertiary!(f64x8[h => f64x4]: Sleef_fmad4_avx);
|
|
+
|
|
+ impl_tertiary!(f32x4: Sleef_fmaf4_sse4);
|
|
+ impl_tertiary!(f32x8: Sleef_fmaf8_avx);
|
|
+ impl_tertiary!(f64x2: Sleef_fmad2_sse4);
|
|
+ impl_tertiary!(f64x4: Sleef_fmad4_avx);
|
|
+ } else if #[cfg(target_feature = "sse4.2")] {
|
|
+ impl_tertiary!(f32x2[t => f32x4]: Sleef_fmaf4_sse4);
|
|
+ impl_tertiary!(f32x16[q => f32x4]: Sleef_fmaf4_sse4);
|
|
+ impl_tertiary!(f64x8[q => f64x2]: Sleef_fmad2_sse4);
|
|
+
|
|
+ impl_tertiary!(f32x4: Sleef_fmaf4_sse4);
|
|
+ impl_tertiary!(f32x8[h => f32x4]: Sleef_fmaf4_sse4);
|
|
+ impl_tertiary!(f64x2: Sleef_fmad2_sse4);
|
|
+ impl_tertiary!(f64x4[h => f64x2]: Sleef_fmad2_sse4);
|
|
+ } else {
|
|
+ impl_tertiary!(f32x2: fma_v2f32);
|
|
+ impl_tertiary!(f32x16: fma_v16f32);
|
|
+ impl_tertiary!(f64x8: fma_v8f64);
|
|
+
|
|
+ impl_tertiary!(f32x4: fma_v4f32);
|
|
+ impl_tertiary!(f32x8: fma_v8f32);
|
|
+ impl_tertiary!(f64x2: fma_v2f64);
|
|
+ impl_tertiary!(f64x4: fma_v4f64);
|
|
+ }
|
|
+ }
|
|
+ } else {
|
|
+ impl_tertiary!(f32x2: fma_v2f32);
|
|
+ impl_tertiary!(f32x4: fma_v4f32);
|
|
+ impl_tertiary!(f32x8: fma_v8f32);
|
|
+ impl_tertiary!(f32x16: fma_v16f32);
|
|
+ // impl_tertiary!(f64x1: fma_v1f64); // FIXME 64-bit fmagle elem vectors
|
|
+ impl_tertiary!(f64x2: fma_v2f64);
|
|
+ impl_tertiary!(f64x4: fma_v4f64);
|
|
+ impl_tertiary!(f64x8: fma_v8f64);
|
|
+ }
|
|
+}
|
|
diff --git a/third_party/rust/packed_simd/src/codegen/math/float/mul_adde.rs b/third_party/rust/packed_simd/src/codegen/math/float/mul_adde.rs
|
|
new file mode 100644
|
|
index 000000000000..8c41fb131d94
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/codegen/math/float/mul_adde.rs
|
|
@@ -0,0 +1,66 @@
|
|
+//! Approximation for floating-point `mul_add`
|
|
+use crate::*;
|
|
+
|
|
+// FIXME: 64-bit 1 element mul_adde
|
|
+
|
|
+crate trait MulAddE {
|
|
+ fn mul_adde(self, y: Self, z: Self) -> Self;
|
|
+}
|
|
+
|
|
+#[cfg(not(target_arch = "s390x"))]
|
|
+#[allow(improper_ctypes)]
|
|
+extern "C" {
|
|
+ #[link_name = "llvm.fmuladd.v2f32"]
|
|
+ fn fmuladd_v2f32(x: f32x2, y: f32x2, z: f32x2) -> f32x2;
|
|
+ #[link_name = "llvm.fmuladd.v4f32"]
|
|
+ fn fmuladd_v4f32(x: f32x4, y: f32x4, z: f32x4) -> f32x4;
|
|
+ #[link_name = "llvm.fmuladd.v8f32"]
|
|
+ fn fmuladd_v8f32(x: f32x8, y: f32x8, z: f32x8) -> f32x8;
|
|
+ #[link_name = "llvm.fmuladd.v16f32"]
|
|
+ fn fmuladd_v16f32(x: f32x16, y: f32x16, z: f32x16) -> f32x16;
|
|
+ /* FIXME 64-bit single elem vectors
|
|
+ #[link_name = "llvm.fmuladd.v1f64"]
|
|
+ fn fmuladd_v1f64(x: f64x1, y: f64x1, z: f64x1) -> f64x1;
|
|
+ */
|
|
+ #[link_name = "llvm.fmuladd.v2f64"]
|
|
+ fn fmuladd_v2f64(x: f64x2, y: f64x2, z: f64x2) -> f64x2;
|
|
+ #[link_name = "llvm.fmuladd.v4f64"]
|
|
+ fn fmuladd_v4f64(x: f64x4, y: f64x4, z: f64x4) -> f64x4;
|
|
+ #[link_name = "llvm.fmuladd.v8f64"]
|
|
+ fn fmuladd_v8f64(x: f64x8, y: f64x8, z: f64x8) -> f64x8;
|
|
+}
|
|
+
|
|
+macro_rules! impl_mul_adde {
|
|
+ ($id:ident : $fn:ident) => {
|
|
+ impl MulAddE for $id {
|
|
+ #[inline]
|
|
+ fn mul_adde(self, y: Self, z: Self) -> Self {
|
|
+ #[cfg(not(target_arch = "s390x"))]
|
|
+ {
|
|
+ use crate::mem::transmute;
|
|
+ unsafe {
|
|
+ transmute($fn(
|
|
+ transmute(self),
|
|
+ transmute(y),
|
|
+ transmute(z),
|
|
+ ))
|
|
+ }
|
|
+ }
|
|
+ #[cfg(target_arch = "s390x")]
|
|
+ {
|
|
+ // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/14
|
|
+ self * y + z
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+}
|
|
+
|
|
+impl_mul_adde!(f32x2: fmuladd_v2f32);
|
|
+impl_mul_adde!(f32x4: fmuladd_v4f32);
|
|
+impl_mul_adde!(f32x8: fmuladd_v8f32);
|
|
+impl_mul_adde!(f32x16: fmuladd_v16f32);
|
|
+// impl_mul_adde!(f64x1: fma_v1f64); // FIXME 64-bit fmagle elem vectors
|
|
+impl_mul_adde!(f64x2: fmuladd_v2f64);
|
|
+impl_mul_adde!(f64x4: fmuladd_v4f64);
|
|
+impl_mul_adde!(f64x8: fmuladd_v8f64);
|
|
diff --git a/third_party/rust/packed_simd/src/codegen/math/float/powf.rs b/third_party/rust/packed_simd/src/codegen/math/float/powf.rs
|
|
new file mode 100644
|
|
index 000000000000..bc15067d73a3
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/codegen/math/float/powf.rs
|
|
@@ -0,0 +1,112 @@
|
|
+//! Vertical floating-point `powf`
|
|
+#![allow(unused)]
|
|
+
|
|
+// FIXME 64-bit powfgle elem vectors mispowfg
|
|
+
|
|
+use crate::*;
|
|
+
|
|
+crate trait Powf {
|
|
+ fn powf(self, x: Self) -> Self;
|
|
+}
|
|
+
|
|
+#[allow(improper_ctypes)]
|
|
+extern "C" {
|
|
+ #[link_name = "llvm.pow.v2f32"]
|
|
+ fn powf_v2f32(x: f32x2, y: f32x2) -> f32x2;
|
|
+ #[link_name = "llvm.pow.v4f32"]
|
|
+ fn powf_v4f32(x: f32x4, y: f32x4) -> f32x4;
|
|
+ #[link_name = "llvm.pow.v8f32"]
|
|
+ fn powf_v8f32(x: f32x8, y: f32x8) -> f32x8;
|
|
+ #[link_name = "llvm.pow.v16f32"]
|
|
+ fn powf_v16f32(x: f32x16, y: f32x16) -> f32x16;
|
|
+ /* FIXME 64-bit powfgle elem vectors
|
|
+ #[link_name = "llvm.pow.v1f64"]
|
|
+ fn powf_v1f64(x: f64x1, y: f64x1) -> f64x1;
|
|
+ */
|
|
+ #[link_name = "llvm.pow.v2f64"]
|
|
+ fn powf_v2f64(x: f64x2, y: f64x2) -> f64x2;
|
|
+ #[link_name = "llvm.pow.v4f64"]
|
|
+ fn powf_v4f64(x: f64x4, y: f64x4) -> f64x4;
|
|
+ #[link_name = "llvm.pow.v8f64"]
|
|
+ fn powf_v8f64(x: f64x8, y: f64x8) -> f64x8;
|
|
+
|
|
+ #[link_name = "llvm.pow.f32"]
|
|
+ fn powf_f32(x: f32, y: f32) -> f32;
|
|
+ #[link_name = "llvm.pow.f64"]
|
|
+ fn powf_f64(x: f64, y: f64) -> f64;
|
|
+}
|
|
+
|
|
+gen_binary_impl_table!(Powf, powf);
|
|
+
|
|
+cfg_if! {
|
|
+ if #[cfg(target_arch = "s390x")] {
|
|
+ // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/14
|
|
+ impl_binary!(f32x2[f32; 2]: powf_f32);
|
|
+ impl_binary!(f32x4[f32; 4]: powf_f32);
|
|
+ impl_binary!(f32x8[f32; 8]: powf_f32);
|
|
+ impl_binary!(f32x16[f32; 16]: powf_f32);
|
|
+
|
|
+ impl_binary!(f64x2[f64; 2]: powf_f64);
|
|
+ impl_binary!(f64x4[f64; 4]: powf_f64);
|
|
+ impl_binary!(f64x8[f64; 8]: powf_f64);
|
|
+ } else if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] {
|
|
+ use sleef_sys::*;
|
|
+ cfg_if! {
|
|
+ if #[cfg(target_feature = "avx2")] {
|
|
+ impl_binary!(f32x2[t => f32x4]: Sleef_powf4_u10avx2128);
|
|
+ impl_binary!(f32x16[h => f32x8]: Sleef_powf8_u10avx2);
|
|
+ impl_binary!(f64x8[h => f64x4]: Sleef_powd4_u10avx2);
|
|
+
|
|
+ impl_binary!(f32x4: Sleef_powf4_u10avx2128);
|
|
+ impl_binary!(f32x8: Sleef_powf8_u10avx2);
|
|
+ impl_binary!(f64x2: Sleef_powd2_u10avx2128);
|
|
+ impl_binary!(f64x4: Sleef_powd4_u10avx2);
|
|
+ } else if #[cfg(target_feature = "avx")] {
|
|
+ impl_binary!(f32x2[t => f32x4]: Sleef_powf4_u10sse4);
|
|
+ impl_binary!(f32x16[h => f32x8]: Sleef_powf8_u10avx);
|
|
+ impl_binary!(f64x8[h => f64x4]: Sleef_powd4_u10avx);
|
|
+
|
|
+ impl_binary!(f32x4: Sleef_powf4_u10sse4);
|
|
+ impl_binary!(f32x8: Sleef_powf8_u10avx);
|
|
+ impl_binary!(f64x2: Sleef_powd2_u10sse4);
|
|
+ impl_binary!(f64x4: Sleef_powd4_u10avx);
|
|
+ } else if #[cfg(target_feature = "sse4.2")] {
|
|
+ impl_binary!(f32x2[t => f32x4]: Sleef_powf4_u10sse4);
|
|
+ impl_binary!(f32x16[q => f32x4]: Sleef_powf4_u10sse4);
|
|
+ impl_binary!(f64x8[q => f64x2]: Sleef_powd2_u10sse4);
|
|
+
|
|
+ impl_binary!(f32x4: Sleef_powf4_u10sse4);
|
|
+ impl_binary!(f32x8[h => f32x4]: Sleef_powf4_u10sse4);
|
|
+ impl_binary!(f64x2: Sleef_powd2_u10sse4);
|
|
+ impl_binary!(f64x4[h => f64x2]: Sleef_powd2_u10sse4);
|
|
+ } else if #[cfg(target_feature = "sse2")] {
|
|
+ impl_binary!(f32x2[t => f32x4]: Sleef_powf4_u10sse2);
|
|
+ impl_binary!(f32x16[q => f32x4]: Sleef_powf4_u10sse2);
|
|
+ impl_binary!(f64x8[q => f64x2]: Sleef_powd2_u10sse2);
|
|
+
|
|
+ impl_binary!(f32x4: Sleef_powf4_u10sse2);
|
|
+ impl_binary!(f32x8[h => f32x4]: Sleef_powf4_u10sse2);
|
|
+ impl_binary!(f64x2: Sleef_powd2_u10sse2);
|
|
+ impl_binary!(f64x4[h => f64x2]: Sleef_powd2_u10sse2);
|
|
+ } else {
|
|
+ impl_binary!(f32x2[f32; 2]: powf_f32);
|
|
+ impl_binary!(f32x4: powf_v4f32);
|
|
+ impl_binary!(f32x8: powf_v8f32);
|
|
+ impl_binary!(f32x16: powf_v16f32);
|
|
+
|
|
+ impl_binary!(f64x2: powf_v2f64);
|
|
+ impl_binary!(f64x4: powf_v4f64);
|
|
+ impl_binary!(f64x8: powf_v8f64);
|
|
+ }
|
|
+ }
|
|
+ } else {
|
|
+ impl_binary!(f32x2[f32; 2]: powf_f32);
|
|
+ impl_binary!(f32x4: powf_v4f32);
|
|
+ impl_binary!(f32x8: powf_v8f32);
|
|
+ impl_binary!(f32x16: powf_v16f32);
|
|
+
|
|
+ impl_binary!(f64x2: powf_v2f64);
|
|
+ impl_binary!(f64x4: powf_v4f64);
|
|
+ impl_binary!(f64x8: powf_v8f64);
|
|
+ }
|
|
+}
|
|
diff --git a/third_party/rust/packed_simd/src/codegen/math/float/sin.rs b/third_party/rust/packed_simd/src/codegen/math/float/sin.rs
|
|
new file mode 100644
|
|
index 000000000000..7b014d07da8d
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/codegen/math/float/sin.rs
|
|
@@ -0,0 +1,103 @@
|
|
+//! Vertical floating-point `sin`
|
|
+#![allow(unused)]
|
|
+
|
|
+// FIXME 64-bit 1 elem vectors sin
|
|
+
|
|
+use crate::*;
|
|
+
|
|
+crate trait Sin {
|
|
+ fn sin(self) -> Self;
|
|
+}
|
|
+
|
|
+#[allow(improper_ctypes)]
|
|
+extern "C" {
|
|
+ #[link_name = "llvm.sin.v2f32"]
|
|
+ fn sin_v2f32(x: f32x2) -> f32x2;
|
|
+ #[link_name = "llvm.sin.v4f32"]
|
|
+ fn sin_v4f32(x: f32x4) -> f32x4;
|
|
+ #[link_name = "llvm.sin.v8f32"]
|
|
+ fn sin_v8f32(x: f32x8) -> f32x8;
|
|
+ #[link_name = "llvm.sin.v16f32"]
|
|
+ fn sin_v16f32(x: f32x16) -> f32x16;
|
|
+ /* FIXME 64-bit single elem vectors
|
|
+ #[link_name = "llvm.sin.v1f64"]
|
|
+ fn sin_v1f64(x: f64x1) -> f64x1;
|
|
+ */
|
|
+ #[link_name = "llvm.sin.v2f64"]
|
|
+ fn sin_v2f64(x: f64x2) -> f64x2;
|
|
+ #[link_name = "llvm.sin.v4f64"]
|
|
+ fn sin_v4f64(x: f64x4) -> f64x4;
|
|
+ #[link_name = "llvm.sin.v8f64"]
|
|
+ fn sin_v8f64(x: f64x8) -> f64x8;
|
|
+
|
|
+ #[link_name = "llvm.sin.f32"]
|
|
+ fn sin_f32(x: f32) -> f32;
|
|
+ #[link_name = "llvm.sin.f64"]
|
|
+ fn sin_f64(x: f64) -> f64;
|
|
+}
|
|
+
|
|
+gen_unary_impl_table!(Sin, sin);
|
|
+
|
|
+cfg_if! {
|
|
+ if #[cfg(target_arch = "s390x")] {
|
|
+ // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/14
|
|
+ impl_unary!(f32x2[f32; 2]: sin_f32);
|
|
+ impl_unary!(f32x4[f32; 4]: sin_f32);
|
|
+ impl_unary!(f32x8[f32; 8]: sin_f32);
|
|
+ impl_unary!(f32x16[f32; 16]: sin_f32);
|
|
+
|
|
+ impl_unary!(f64x2[f64; 2]: sin_f64);
|
|
+ impl_unary!(f64x4[f64; 4]: sin_f64);
|
|
+ impl_unary!(f64x8[f64; 8]: sin_f64);
|
|
+ } else if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] {
|
|
+ use sleef_sys::*;
|
|
+ cfg_if! {
|
|
+ if #[cfg(target_feature = "avx2")] {
|
|
+ impl_unary!(f32x2[t => f32x4]: Sleef_sinf4_u10avx2128);
|
|
+ impl_unary!(f32x16[h => f32x8]: Sleef_sinf8_u10avx2);
|
|
+ impl_unary!(f64x8[h => f64x4]: Sleef_sind4_u10avx2);
|
|
+
|
|
+ impl_unary!(f32x4: Sleef_sinf4_u10avx2128);
|
|
+ impl_unary!(f32x8: Sleef_sinf8_u10avx2);
|
|
+ impl_unary!(f64x2: Sleef_sind2_u10avx2128);
|
|
+ impl_unary!(f64x4: Sleef_sind4_u10avx2);
|
|
+ } else if #[cfg(target_feature = "avx")] {
|
|
+ impl_unary!(f32x2[t => f32x4]: Sleef_sinf4_u10sse4);
|
|
+ impl_unary!(f32x16[h => f32x8]: Sleef_sinf8_u10avx);
|
|
+ impl_unary!(f64x8[h => f64x4]: Sleef_sind4_u10avx);
|
|
+
|
|
+ impl_unary!(f32x4: Sleef_sinf4_u10sse4);
|
|
+ impl_unary!(f32x8: Sleef_sinf8_u10avx);
|
|
+ impl_unary!(f64x2: Sleef_sind2_u10sse4);
|
|
+ impl_unary!(f64x4: Sleef_sind4_u10avx);
|
|
+ } else if #[cfg(target_feature = "sse4.2")] {
|
|
+ impl_unary!(f32x2[t => f32x4]: Sleef_sinf4_u10sse4);
|
|
+ impl_unary!(f32x16[q => f32x4]: Sleef_sinf4_u10sse4);
|
|
+ impl_unary!(f64x8[q => f64x2]: Sleef_sind2_u10sse4);
|
|
+
|
|
+ impl_unary!(f32x4: Sleef_sinf4_u10sse4);
|
|
+ impl_unary!(f32x8[h => f32x4]: Sleef_sinf4_u10sse4);
|
|
+ impl_unary!(f64x2: Sleef_sind2_u10sse4);
|
|
+ impl_unary!(f64x4[h => f64x2]: Sleef_sind2_u10sse4);
|
|
+ } else {
|
|
+ impl_unary!(f32x2[f32; 2]: sin_f32);
|
|
+ impl_unary!(f32x16: sin_v16f32);
|
|
+ impl_unary!(f64x8: sin_v8f64);
|
|
+
|
|
+ impl_unary!(f32x4: sin_v4f32);
|
|
+ impl_unary!(f32x8: sin_v8f32);
|
|
+ impl_unary!(f64x2: sin_v2f64);
|
|
+ impl_unary!(f64x4: sin_v4f64);
|
|
+ }
|
|
+ }
|
|
+ } else {
|
|
+ impl_unary!(f32x2[f32; 2]: sin_f32);
|
|
+ impl_unary!(f32x4: sin_v4f32);
|
|
+ impl_unary!(f32x8: sin_v8f32);
|
|
+ impl_unary!(f32x16: sin_v16f32);
|
|
+
|
|
+ impl_unary!(f64x2: sin_v2f64);
|
|
+ impl_unary!(f64x4: sin_v4f64);
|
|
+ impl_unary!(f64x8: sin_v8f64);
|
|
+ }
|
|
+}
|
|
diff --git a/third_party/rust/packed_simd/src/codegen/math/float/sin_cos_pi.rs b/third_party/rust/packed_simd/src/codegen/math/float/sin_cos_pi.rs
|
|
new file mode 100644
|
|
index 000000000000..0f1249ec88f0
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/codegen/math/float/sin_cos_pi.rs
|
|
@@ -0,0 +1,195 @@
|
|
+//! Vertical floating-point `sin_cos`
|
|
+#![allow(unused)]
|
|
+
|
|
+// FIXME 64-bit 1 elem vectors sin_cos
|
|
+
|
|
+use crate::*;
|
|
+
|
|
+crate trait SinCosPi: Sized {
|
|
+ type Output;
|
|
+ fn sin_cos_pi(self) -> Self::Output;
|
|
+}
|
|
+
|
|
+macro_rules! impl_def {
|
|
+ ($vid:ident, $PI:path) => {
|
|
+ impl SinCosPi for $vid {
|
|
+ type Output = (Self, Self);
|
|
+ #[inline]
|
|
+ fn sin_cos_pi(self) -> Self::Output {
|
|
+ let v = self * Self::splat($PI);
|
|
+ (v.sin(), v.cos())
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+}
|
|
+
|
|
+macro_rules! impl_def32 {
|
|
+ ($vid:ident) => {
|
|
+ impl_def!($vid, crate::f32::consts::PI);
|
|
+ };
|
|
+}
|
|
+macro_rules! impl_def64 {
|
|
+ ($vid:ident) => {
|
|
+ impl_def!($vid, crate::f64::consts::PI);
|
|
+ };
|
|
+}
|
|
+
|
|
+macro_rules! impl_unary_t {
|
|
+ ($vid:ident: $fun:ident) => {
|
|
+ impl SinCosPi for $vid {
|
|
+ type Output = (Self, Self);
|
|
+ fn sin_cos_pi(self) -> Self::Output {
|
|
+ unsafe {
|
|
+ use crate::mem::transmute;
|
|
+ transmute($fun(transmute(self)))
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+ ($vid:ident[t => $vid_t:ident]: $fun:ident) => {
|
|
+ impl SinCosPi for $vid {
|
|
+ type Output = (Self, Self);
|
|
+ fn sin_cos_pi(self) -> Self::Output {
|
|
+ unsafe {
|
|
+ use crate::mem::{transmute, uninitialized};
|
|
+
|
|
+ union U {
|
|
+ vec: [$vid; 2],
|
|
+ twice: $vid_t,
|
|
+ }
|
|
+
|
|
+ let twice = U { vec: [self, uninitialized()] }.twice;
|
|
+ let twice = transmute($fun(transmute(twice)));
|
|
+
|
|
+ union R {
|
|
+ twice: ($vid_t, $vid_t),
|
|
+ vecs: ([$vid; 2], [$vid; 2]),
|
|
+ }
|
|
+ let r = R { twice }.vecs;
|
|
+ (*r.0.get_unchecked(0), *r.0.get_unchecked(1))
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+ ($vid:ident[h => $vid_h:ident]: $fun:ident) => {
|
|
+ impl SinCosPi for $vid {
|
|
+ type Output = (Self, Self);
|
|
+ fn sin_cos_pi(self) -> Self::Output {
|
|
+ unsafe {
|
|
+ use crate::mem::transmute;
|
|
+
|
|
+ union U {
|
|
+ vec: $vid,
|
|
+ halves: [$vid_h; 2],
|
|
+ }
|
|
+
|
|
+ let halves = U { vec: self }.halves;
|
|
+
|
|
+ let res_0: ($vid_h, $vid_h) =
|
|
+ transmute($fun(transmute(*halves.get_unchecked(0))));
|
|
+ let res_1: ($vid_h, $vid_h) =
|
|
+ transmute($fun(transmute(*halves.get_unchecked(1))));
|
|
+
|
|
+ union R {
|
|
+ result: ($vid, $vid),
|
|
+ halves: ([$vid_h; 2], [$vid_h; 2]),
|
|
+ }
|
|
+ R { halves: ([res_0.0, res_1.0], [res_0.1, res_1.1]) }
|
|
+ .result
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+ ($vid:ident[q => $vid_q:ident]: $fun:ident) => {
|
|
+ impl SinCosPi for $vid {
|
|
+ type Output = (Self, Self);
|
|
+ fn sin_cos_pi(self) -> Self::Output {
|
|
+ unsafe {
|
|
+ use crate::mem::transmute;
|
|
+
|
|
+ union U {
|
|
+ vec: $vid,
|
|
+ quarters: [$vid_q; 4],
|
|
+ }
|
|
+
|
|
+ let quarters = U { vec: self }.quarters;
|
|
+
|
|
+ let res_0: ($vid_q, $vid_q) =
|
|
+ transmute($fun(transmute(*quarters.get_unchecked(0))));
|
|
+ let res_1: ($vid_q, $vid_q) =
|
|
+ transmute($fun(transmute(*quarters.get_unchecked(1))));
|
|
+ let res_2: ($vid_q, $vid_q) =
|
|
+ transmute($fun(transmute(*quarters.get_unchecked(2))));
|
|
+ let res_3: ($vid_q, $vid_q) =
|
|
+ transmute($fun(transmute(*quarters.get_unchecked(3))));
|
|
+
|
|
+ union R {
|
|
+ result: ($vid, $vid),
|
|
+ quarters: ([$vid_q; 4], [$vid_q; 4]),
|
|
+ }
|
|
+ R {
|
|
+ quarters: (
|
|
+ [res_0.0, res_1.0, res_2.0, res_3.0],
|
|
+ [res_0.1, res_1.1, res_2.1, res_3.1],
|
|
+ ),
|
|
+ }
|
|
+ .result
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+}
|
|
+
|
|
+cfg_if! {
|
|
+ if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] {
|
|
+ use sleef_sys::*;
|
|
+ cfg_if! {
|
|
+ if #[cfg(target_feature = "avx2")] {
|
|
+ impl_unary_t!(f32x2[t => f32x4]: Sleef_sincospif4_u05avx2128);
|
|
+ impl_unary_t!(f32x16[h => f32x8]: Sleef_sincospif8_u05avx2);
|
|
+ impl_unary_t!(f64x8[h => f64x4]: Sleef_sincospid4_u05avx2);
|
|
+
|
|
+ impl_unary_t!(f32x4: Sleef_sincospif4_u05avx2128);
|
|
+ impl_unary_t!(f32x8: Sleef_sincospif8_u05avx2);
|
|
+ impl_unary_t!(f64x2: Sleef_sincospid2_u05avx2128);
|
|
+ impl_unary_t!(f64x4: Sleef_sincospid4_u05avx2);
|
|
+ } else if #[cfg(target_feature = "avx")] {
|
|
+ impl_unary_t!(f32x2[t => f32x4]: Sleef_sincospif4_u05sse4);
|
|
+ impl_unary_t!(f32x16[h => f32x8]: Sleef_sincospif8_u05avx);
|
|
+ impl_unary_t!(f64x8[h => f64x4]: Sleef_sincospid4_u05avx);
|
|
+
|
|
+ impl_unary_t!(f32x4: Sleef_sincospif4_u05sse4);
|
|
+ impl_unary_t!(f32x8: Sleef_sincospif8_u05avx);
|
|
+ impl_unary_t!(f64x2: Sleef_sincospid2_u05sse4);
|
|
+ impl_unary_t!(f64x4: Sleef_sincospid4_u05avx);
|
|
+ } else if #[cfg(target_feature = "sse4.2")] {
|
|
+ impl_unary_t!(f32x2[t => f32x4]: Sleef_sincospif4_u05sse4);
|
|
+ impl_unary_t!(f32x16[q => f32x4]: Sleef_sincospif4_u05sse4);
|
|
+ impl_unary_t!(f64x8[q => f64x2]: Sleef_sincospid2_u05sse4);
|
|
+
|
|
+ impl_unary_t!(f32x4: Sleef_sincospif4_u05sse4);
|
|
+ impl_unary_t!(f32x8[h => f32x4]: Sleef_sincospif4_u05sse4);
|
|
+ impl_unary_t!(f64x2: Sleef_sincospid2_u05sse4);
|
|
+ impl_unary_t!(f64x4[h => f64x2]: Sleef_sincospid2_u05sse4);
|
|
+ } else {
|
|
+ impl_def32!(f32x2);
|
|
+ impl_def32!(f32x4);
|
|
+ impl_def32!(f32x8);
|
|
+ impl_def32!(f32x16);
|
|
+
|
|
+ impl_def64!(f64x2);
|
|
+ impl_def64!(f64x4);
|
|
+ impl_def64!(f64x8);
|
|
+ }
|
|
+ }
|
|
+ } else {
|
|
+ impl_def32!(f32x2);
|
|
+ impl_def32!(f32x4);
|
|
+ impl_def32!(f32x8);
|
|
+ impl_def32!(f32x16);
|
|
+
|
|
+ impl_def64!(f64x2);
|
|
+ impl_def64!(f64x4);
|
|
+ impl_def64!(f64x8);
|
|
+ }
|
|
+}
|
|
diff --git a/third_party/rust/packed_simd/src/codegen/math/float/sin_pi.rs b/third_party/rust/packed_simd/src/codegen/math/float/sin_pi.rs
|
|
new file mode 100644
|
|
index 000000000000..72df98c93c91
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/codegen/math/float/sin_pi.rs
|
|
@@ -0,0 +1,87 @@
|
|
+//! Vertical floating-point `sin_pi`
|
|
+#![allow(unused)]
|
|
+
|
|
+// FIXME 64-bit 1 elem vectors sin_pi
|
|
+
|
|
+use crate::*;
|
|
+
|
|
+crate trait SinPi {
|
|
+ fn sin_pi(self) -> Self;
|
|
+}
|
|
+
|
|
+gen_unary_impl_table!(SinPi, sin_pi);
|
|
+
|
|
+macro_rules! impl_def {
|
|
+ ($vid:ident, $PI:path) => {
|
|
+ impl SinPi for $vid {
|
|
+ #[inline]
|
|
+ fn sin_pi(self) -> Self {
|
|
+ (self * Self::splat($PI)).sin()
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+}
|
|
+macro_rules! impl_def32 {
|
|
+ ($vid:ident) => {
|
|
+ impl_def!($vid, crate::f32::consts::PI);
|
|
+ };
|
|
+}
|
|
+macro_rules! impl_def64 {
|
|
+ ($vid:ident) => {
|
|
+ impl_def!($vid, crate::f64::consts::PI);
|
|
+ };
|
|
+}
|
|
+
|
|
+cfg_if! {
|
|
+ if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] {
|
|
+ use sleef_sys::*;
|
|
+ cfg_if! {
|
|
+ if #[cfg(target_feature = "avx2")] {
|
|
+ impl_unary!(f32x2[t => f32x4]: Sleef_sinpif4_u05avx2128);
|
|
+ impl_unary!(f32x16[h => f32x8]: Sleef_sinpif8_u05avx2);
|
|
+ impl_unary!(f64x8[h => f64x4]: Sleef_sinpid4_u05avx2);
|
|
+
|
|
+ impl_unary!(f32x4: Sleef_sinpif4_u05avx2128);
|
|
+ impl_unary!(f32x8: Sleef_sinpif8_u05avx2);
|
|
+ impl_unary!(f64x2: Sleef_sinpid2_u05avx2128);
|
|
+ impl_unary!(f64x4: Sleef_sinpid4_u05avx2);
|
|
+ } else if #[cfg(target_feature = "avx")] {
|
|
+ impl_unary!(f32x2[t => f32x4]: Sleef_sinpif4_u05sse4);
|
|
+ impl_unary!(f32x16[h => f32x8]: Sleef_sinpif8_u05avx);
|
|
+ impl_unary!(f64x8[h => f64x4]: Sleef_sinpid4_u05avx);
|
|
+
|
|
+ impl_unary!(f32x4: Sleef_sinpif4_u05sse4);
|
|
+ impl_unary!(f32x8: Sleef_sinpif8_u05avx);
|
|
+ impl_unary!(f64x2: Sleef_sinpid2_u05sse4);
|
|
+ impl_unary!(f64x4: Sleef_sinpid4_u05avx);
|
|
+ } else if #[cfg(target_feature = "sse4.2")] {
|
|
+ impl_unary!(f32x2[t => f32x4]: Sleef_sinpif4_u05sse4);
|
|
+ impl_unary!(f32x16[q => f32x4]: Sleef_sinpif4_u05sse4);
|
|
+ impl_unary!(f64x8[q => f64x2]: Sleef_sinpid2_u05sse4);
|
|
+
|
|
+ impl_unary!(f32x4: Sleef_sinpif4_u05sse4);
|
|
+ impl_unary!(f32x8[h => f32x4]: Sleef_sinpif4_u05sse4);
|
|
+ impl_unary!(f64x2: Sleef_sinpid2_u05sse4);
|
|
+ impl_unary!(f64x4[h => f64x2]: Sleef_sinpid2_u05sse4);
|
|
+ } else {
|
|
+ impl_def32!(f32x2);
|
|
+ impl_def32!(f32x4);
|
|
+ impl_def32!(f32x8);
|
|
+ impl_def32!(f32x16);
|
|
+
|
|
+ impl_def64!(f64x2);
|
|
+ impl_def64!(f64x4);
|
|
+ impl_def64!(f64x8);
|
|
+ }
|
|
+ }
|
|
+ } else {
|
|
+ impl_def32!(f32x2);
|
|
+ impl_def32!(f32x4);
|
|
+ impl_def32!(f32x8);
|
|
+ impl_def32!(f32x16);
|
|
+
|
|
+ impl_def64!(f64x2);
|
|
+ impl_def64!(f64x4);
|
|
+ impl_def64!(f64x8);
|
|
+ }
|
|
+}
|
|
diff --git a/third_party/rust/packed_simd/src/codegen/math/float/sqrt.rs b/third_party/rust/packed_simd/src/codegen/math/float/sqrt.rs
|
|
new file mode 100644
|
|
index 000000000000..7ce31df62662
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/codegen/math/float/sqrt.rs
|
|
@@ -0,0 +1,103 @@
|
|
+//! Vertical floating-point `sqrt`
|
|
+#![allow(unused)]
|
|
+
|
|
+// FIXME 64-bit 1 elem vectors sqrt
|
|
+
|
|
+use crate::*;
|
|
+
|
|
+crate trait Sqrt {
|
|
+ fn sqrt(self) -> Self;
|
|
+}
|
|
+
|
|
+#[allow(improper_ctypes)]
|
|
+extern "C" {
|
|
+ #[link_name = "llvm.sqrt.v2f32"]
|
|
+ fn sqrt_v2f32(x: f32x2) -> f32x2;
|
|
+ #[link_name = "llvm.sqrt.v4f32"]
|
|
+ fn sqrt_v4f32(x: f32x4) -> f32x4;
|
|
+ #[link_name = "llvm.sqrt.v8f32"]
|
|
+ fn sqrt_v8f32(x: f32x8) -> f32x8;
|
|
+ #[link_name = "llvm.sqrt.v16f32"]
|
|
+ fn sqrt_v16f32(x: f32x16) -> f32x16;
|
|
+ /* FIXME 64-bit sqrtgle elem vectors
|
|
+ #[link_name = "llvm.sqrt.v1f64"]
|
|
+ fn sqrt_v1f64(x: f64x1) -> f64x1;
|
|
+ */
|
|
+ #[link_name = "llvm.sqrt.v2f64"]
|
|
+ fn sqrt_v2f64(x: f64x2) -> f64x2;
|
|
+ #[link_name = "llvm.sqrt.v4f64"]
|
|
+ fn sqrt_v4f64(x: f64x4) -> f64x4;
|
|
+ #[link_name = "llvm.sqrt.v8f64"]
|
|
+ fn sqrt_v8f64(x: f64x8) -> f64x8;
|
|
+
|
|
+ #[link_name = "llvm.sqrt.f32"]
|
|
+ fn sqrt_f32(x: f32) -> f32;
|
|
+ #[link_name = "llvm.sqrt.f64"]
|
|
+ fn sqrt_f64(x: f64) -> f64;
|
|
+}
|
|
+
|
|
+gen_unary_impl_table!(Sqrt, sqrt);
|
|
+
|
|
+cfg_if! {
|
|
+ if #[cfg(target_arch = "s390x")] {
|
|
+ // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/14
|
|
+ impl_unary!(f32x2[f32; 2]: sqrt_f32);
|
|
+ impl_unary!(f32x4[f32; 4]: sqrt_f32);
|
|
+ impl_unary!(f32x8[f32; 8]: sqrt_f32);
|
|
+ impl_unary!(f32x16[f32; 16]: sqrt_f32);
|
|
+
|
|
+ impl_unary!(f64x2[f64; 2]: sqrt_f64);
|
|
+ impl_unary!(f64x4[f64; 4]: sqrt_f64);
|
|
+ impl_unary!(f64x8[f64; 8]: sqrt_f64);
|
|
+ } else if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] {
|
|
+ use sleef_sys::*;
|
|
+ cfg_if! {
|
|
+ if #[cfg(target_feature = "avx2")] {
|
|
+ impl_unary!(f32x2[t => f32x4]: Sleef_sqrtf4_avx2128);
|
|
+ impl_unary!(f32x16[h => f32x8]: Sleef_sqrtf8_avx2);
|
|
+ impl_unary!(f64x8[h => f64x4]: Sleef_sqrtd4_avx2);
|
|
+
|
|
+ impl_unary!(f32x4: Sleef_sqrtf4_avx2128);
|
|
+ impl_unary!(f32x8: Sleef_sqrtf8_avx2);
|
|
+ impl_unary!(f64x2: Sleef_sqrtd2_avx2128);
|
|
+ impl_unary!(f64x4: Sleef_sqrtd4_avx2);
|
|
+ } else if #[cfg(target_feature = "avx")] {
|
|
+ impl_unary!(f32x2[t => f32x4]: Sleef_sqrtf4_sse4);
|
|
+ impl_unary!(f32x16[h => f32x8]: Sleef_sqrtf8_avx);
|
|
+ impl_unary!(f64x8[h => f64x4]: Sleef_sqrtd4_avx);
|
|
+
|
|
+ impl_unary!(f32x4: Sleef_sqrtf4_sse4);
|
|
+ impl_unary!(f32x8: Sleef_sqrtf8_avx);
|
|
+ impl_unary!(f64x2: Sleef_sqrtd2_sse4);
|
|
+ impl_unary!(f64x4: Sleef_sqrtd4_avx);
|
|
+ } else if #[cfg(target_feature = "sse4.2")] {
|
|
+ impl_unary!(f32x2[t => f32x4]: Sleef_sqrtf4_sse4);
|
|
+ impl_unary!(f32x16[q => f32x4]: Sleef_sqrtf4_sse4);
|
|
+ impl_unary!(f64x8[q => f64x2]: Sleef_sqrtd2_sse4);
|
|
+
|
|
+ impl_unary!(f32x4: Sleef_sqrtf4_sse4);
|
|
+ impl_unary!(f32x8[h => f32x4]: Sleef_sqrtf4_sse4);
|
|
+ impl_unary!(f64x2: Sleef_sqrtd2_sse4);
|
|
+ impl_unary!(f64x4[h => f64x2]: Sleef_sqrtd2_sse4);
|
|
+ } else {
|
|
+ impl_unary!(f32x2[f32; 2]: sqrt_f32);
|
|
+ impl_unary!(f32x16: sqrt_v16f32);
|
|
+ impl_unary!(f64x8: sqrt_v8f64);
|
|
+
|
|
+ impl_unary!(f32x4: sqrt_v4f32);
|
|
+ impl_unary!(f32x8: sqrt_v8f32);
|
|
+ impl_unary!(f64x2: sqrt_v2f64);
|
|
+ impl_unary!(f64x4: sqrt_v4f64);
|
|
+ }
|
|
+ }
|
|
+ } else {
|
|
+ impl_unary!(f32x2[f32; 2]: sqrt_f32);
|
|
+ impl_unary!(f32x4: sqrt_v4f32);
|
|
+ impl_unary!(f32x8: sqrt_v8f32);
|
|
+ impl_unary!(f32x16: sqrt_v16f32);
|
|
+
|
|
+ impl_unary!(f64x2: sqrt_v2f64);
|
|
+ impl_unary!(f64x4: sqrt_v4f64);
|
|
+ impl_unary!(f64x8: sqrt_v8f64);
|
|
+ }
|
|
+}
|
|
diff --git a/third_party/rust/packed_simd/src/codegen/math/float/sqrte.rs b/third_party/rust/packed_simd/src/codegen/math/float/sqrte.rs
|
|
new file mode 100644
|
|
index 000000000000..c1e379c34241
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/codegen/math/float/sqrte.rs
|
|
@@ -0,0 +1,67 @@
|
|
+//! Vertical floating-point `sqrt`
|
|
+#![allow(unused)]
|
|
+
|
|
+// FIXME 64-bit 1 elem vectors sqrte
|
|
+
|
|
+use crate::llvm::simd_fsqrt;
|
|
+use crate::*;
|
|
+
|
|
+crate trait Sqrte {
|
|
+ fn sqrte(self) -> Self;
|
|
+}
|
|
+
|
|
+gen_unary_impl_table!(Sqrte, sqrte);
|
|
+
|
|
+cfg_if! {
|
|
+ if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] {
|
|
+ use sleef_sys::*;
|
|
+ cfg_if! {
|
|
+ if #[cfg(target_feature = "avx2")] {
|
|
+ impl_unary!(f32x2[t => f32x4]: Sleef_sqrtf4_u35avx2128);
|
|
+ impl_unary!(f32x16[h => f32x8]: Sleef_sqrtf8_u35avx2);
|
|
+ impl_unary!(f64x8[h => f64x4]: Sleef_sqrtd4_u35avx2);
|
|
+
|
|
+ impl_unary!(f32x4: Sleef_sqrtf4_u35avx2128);
|
|
+ impl_unary!(f32x8: Sleef_sqrtf8_u35avx2);
|
|
+ impl_unary!(f64x2: Sleef_sqrtd2_u35avx2128);
|
|
+ impl_unary!(f64x4: Sleef_sqrtd4_u35avx2);
|
|
+ } else if #[cfg(target_feature = "avx")] {
|
|
+ impl_unary!(f32x2[t => f32x4]: Sleef_sqrtf4_u35sse4);
|
|
+ impl_unary!(f32x16[h => f32x8]: Sleef_sqrtf8_u35avx);
|
|
+ impl_unary!(f64x8[h => f64x4]: Sleef_sqrtd4_u35avx);
|
|
+
|
|
+ impl_unary!(f32x4: Sleef_sqrtf4_u35sse4);
|
|
+ impl_unary!(f32x8: Sleef_sqrtf8_u35avx);
|
|
+ impl_unary!(f64x2: Sleef_sqrtd2_u35sse4);
|
|
+ impl_unary!(f64x4: Sleef_sqrtd4_u35avx);
|
|
+ } else if #[cfg(target_feature = "sse4.2")] {
|
|
+ impl_unary!(f32x2[t => f32x4]: Sleef_sqrtf4_u35sse4);
|
|
+ impl_unary!(f32x16[q => f32x4]: Sleef_sqrtf4_u35sse4);
|
|
+ impl_unary!(f64x8[q => f64x2]: Sleef_sqrtd2_u35sse4);
|
|
+
|
|
+ impl_unary!(f32x4: Sleef_sqrtf4_u35sse4);
|
|
+ impl_unary!(f32x8[h => f32x4]: Sleef_sqrtf4_u35sse4);
|
|
+ impl_unary!(f64x2: Sleef_sqrtd2_u35sse4);
|
|
+ impl_unary!(f64x4[h => f64x2]: Sleef_sqrtd2_u35sse4);
|
|
+ } else {
|
|
+ impl_unary!(f32x2[g]: simd_fsqrt);
|
|
+ impl_unary!(f32x16[g]: simd_fsqrt);
|
|
+ impl_unary!(f64x8[g]: simd_fsqrt);
|
|
+
|
|
+ impl_unary!(f32x4[g]: simd_fsqrt);
|
|
+ impl_unary!(f32x8[g]: simd_fsqrt);
|
|
+ impl_unary!(f64x2[g]: simd_fsqrt);
|
|
+ impl_unary!(f64x4[g]: simd_fsqrt);
|
|
+ }
|
|
+ }
|
|
+ } else {
|
|
+ impl_unary!(f32x2[g]: simd_fsqrt);
|
|
+ impl_unary!(f32x4[g]: simd_fsqrt);
|
|
+ impl_unary!(f32x8[g]: simd_fsqrt);
|
|
+ impl_unary!(f32x16[g]: simd_fsqrt);
|
|
+
|
|
+ impl_unary!(f64x2[g]: simd_fsqrt);
|
|
+ impl_unary!(f64x4[g]: simd_fsqrt);
|
|
+ impl_unary!(f64x8[g]: simd_fsqrt);
|
|
+ }
|
|
+}
|
|
diff --git a/third_party/rust/packed_simd/src/codegen/pointer_sized_int.rs b/third_party/rust/packed_simd/src/codegen/pointer_sized_int.rs
|
|
new file mode 100644
|
|
index 000000000000..39f493d3b17f
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/codegen/pointer_sized_int.rs
|
|
@@ -0,0 +1,28 @@
|
|
+//! Provides `isize` and `usize`
|
|
+
|
|
+use cfg_if::cfg_if;
|
|
+
|
|
+cfg_if! {
|
|
+ if #[cfg(target_pointer_width = "8")] {
|
|
+ crate type isize_ = i8;
|
|
+ crate type usize_ = u8;
|
|
+ } else if #[cfg(target_pointer_width = "16")] {
|
|
+ crate type isize_ = i16;
|
|
+ crate type usize_ = u16;
|
|
+ } else if #[cfg(target_pointer_width = "32")] {
|
|
+ crate type isize_ = i32;
|
|
+ crate type usize_ = u32;
|
|
+
|
|
+ } else if #[cfg(target_pointer_width = "64")] {
|
|
+ crate type isize_ = i64;
|
|
+ crate type usize_ = u64;
|
|
+ } else if #[cfg(target_pointer_width = "64")] {
|
|
+ crate type isize_ = i64;
|
|
+ crate type usize_ = u64;
|
|
+ } else if #[cfg(target_pointer_width = "128")] {
|
|
+ crate type isize_ = i128;
|
|
+ crate type usize_ = u128;
|
|
+ } else {
|
|
+ compile_error!("unsupported target_pointer_width");
|
|
+ }
|
|
+}
|
|
diff --git a/third_party/rust/packed_simd/src/codegen/reductions.rs b/third_party/rust/packed_simd/src/codegen/reductions.rs
|
|
new file mode 100644
|
|
index 000000000000..7be4f5fabbea
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/codegen/reductions.rs
|
|
@@ -0,0 +1 @@
|
|
+crate mod mask;
|
|
diff --git a/third_party/rust/packed_simd/src/codegen/reductions/mask.rs b/third_party/rust/packed_simd/src/codegen/reductions/mask.rs
|
|
new file mode 100644
|
|
index 000000000000..97260c6d4e03
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/codegen/reductions/mask.rs
|
|
@@ -0,0 +1,69 @@
|
|
+//! Code generation workaround for `all()` mask horizontal reduction.
|
|
+//!
|
|
+//! Works arround [LLVM bug 36702].
|
|
+//!
|
|
+//! [LLVM bug 36702]: https://bugs.llvm.org/show_bug.cgi?id=36702
|
|
+#![allow(unused_macros)]
|
|
+
|
|
+use crate::*;
|
|
+
|
|
+crate trait All: crate::marker::Sized {
|
|
+ unsafe fn all(self) -> bool;
|
|
+}
|
|
+
|
|
+crate trait Any: crate::marker::Sized {
|
|
+ unsafe fn any(self) -> bool;
|
|
+}
|
|
+
|
|
+#[macro_use]
|
|
+mod fallback_impl;
|
|
+
|
|
+cfg_if! {
|
|
+ if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] {
|
|
+ #[macro_use]
|
|
+ mod x86;
|
|
+ } else if #[cfg(all(target_arch = "arm", target_feature = "v7",
|
|
+ target_feature = "neon",
|
|
+ any(feature = "core_arch", libcore_neon)))] {
|
|
+ #[macro_use]
|
|
+ mod arm;
|
|
+ } else if #[cfg(all(target_arch = "aarch64", target_feature = "neon"))] {
|
|
+ #[macro_use]
|
|
+ mod aarch64;
|
|
+ } else {
|
|
+ #[macro_use]
|
|
+ mod fallback;
|
|
+ }
|
|
+}
|
|
+
|
|
+impl_mask_reductions!(m8x2);
|
|
+impl_mask_reductions!(m8x4);
|
|
+impl_mask_reductions!(m8x8);
|
|
+impl_mask_reductions!(m8x16);
|
|
+impl_mask_reductions!(m8x32);
|
|
+impl_mask_reductions!(m8x64);
|
|
+
|
|
+impl_mask_reductions!(m16x2);
|
|
+impl_mask_reductions!(m16x4);
|
|
+impl_mask_reductions!(m16x8);
|
|
+impl_mask_reductions!(m16x16);
|
|
+impl_mask_reductions!(m16x32);
|
|
+
|
|
+impl_mask_reductions!(m32x2);
|
|
+impl_mask_reductions!(m32x4);
|
|
+impl_mask_reductions!(m32x8);
|
|
+impl_mask_reductions!(m32x16);
|
|
+
|
|
+// FIXME: 64-bit single element vector
|
|
+// impl_mask_reductions!(m64x1);
|
|
+impl_mask_reductions!(m64x2);
|
|
+impl_mask_reductions!(m64x4);
|
|
+impl_mask_reductions!(m64x8);
|
|
+
|
|
+impl_mask_reductions!(m128x1);
|
|
+impl_mask_reductions!(m128x2);
|
|
+impl_mask_reductions!(m128x4);
|
|
+
|
|
+impl_mask_reductions!(msizex2);
|
|
+impl_mask_reductions!(msizex4);
|
|
+impl_mask_reductions!(msizex8);
|
|
diff --git a/third_party/rust/packed_simd/src/codegen/reductions/mask/aarch64.rs b/third_party/rust/packed_simd/src/codegen/reductions/mask/aarch64.rs
|
|
new file mode 100644
|
|
index 000000000000..e9586eace1ff
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/codegen/reductions/mask/aarch64.rs
|
|
@@ -0,0 +1,71 @@
|
|
+//! Mask reductions implementation for `aarch64` targets
|
|
+
|
|
+/// 128-bit wide vectors
|
|
+macro_rules! aarch64_128_neon_impl {
|
|
+ ($id:ident, $vmin:ident, $vmax:ident) => {
|
|
+ impl All for $id {
|
|
+ #[inline]
|
|
+ #[target_feature(enable = "neon")]
|
|
+ unsafe fn all(self) -> bool {
|
|
+ use crate::arch::aarch64::$vmin;
|
|
+ $vmin(crate::mem::transmute(self)) != 0
|
|
+ }
|
|
+ }
|
|
+ impl Any for $id {
|
|
+ #[inline]
|
|
+ #[target_feature(enable = "neon")]
|
|
+ unsafe fn any(self) -> bool {
|
|
+ use crate::arch::aarch64::$vmax;
|
|
+ $vmax(crate::mem::transmute(self)) != 0
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
+/// 64-bit wide vectors
|
|
+macro_rules! aarch64_64_neon_impl {
|
|
+ ($id:ident, $vec128:ident) => {
|
|
+ impl All for $id {
|
|
+ #[inline]
|
|
+ #[target_feature(enable = "neon")]
|
|
+ unsafe fn all(self) -> bool {
|
|
+ // Duplicates the 64-bit vector into a 128-bit one and
|
|
+ // calls all on that.
|
|
+ union U {
|
|
+ halves: ($id, $id),
|
|
+ vec: $vec128,
|
|
+ }
|
|
+ U {
|
|
+ halves: (self, self),
|
|
+ }.vec.all()
|
|
+ }
|
|
+ }
|
|
+ impl Any for $id {
|
|
+ #[inline]
|
|
+ #[target_feature(enable = "neon")]
|
|
+ unsafe fn any(self) -> bool {
|
|
+ union U {
|
|
+ halves: ($id, $id),
|
|
+ vec: $vec128,
|
|
+ }
|
|
+ U {
|
|
+ halves: (self, self),
|
|
+ }.vec.any()
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+}
|
|
+
|
|
+/// Mask reduction implementation for `aarch64` targets
|
|
+macro_rules! impl_mask_reductions {
|
|
+ // 64-bit wide masks
|
|
+ (m8x8) => { aarch64_64_neon_impl!(m8x8, m8x16); };
|
|
+ (m16x4) => { aarch64_64_neon_impl!(m16x4, m16x8); };
|
|
+ (m32x2) => { aarch64_64_neon_impl!(m32x2, m32x4); };
|
|
+ // 128-bit wide masks
|
|
+ (m8x16) => { aarch64_128_neon_impl!(m8x16, vminvq_u8, vmaxvq_u8); };
|
|
+ (m16x8) => { aarch64_128_neon_impl!(m16x8, vminvq_u16, vmaxvq_u16); };
|
|
+ (m32x4) => { aarch64_128_neon_impl!(m32x4, vminvq_u32, vmaxvq_u32); };
|
|
+ // Fallback to LLVM's default code-generation:
|
|
+ ($id:ident) => { fallback_impl!($id); };
|
|
+}
|
|
diff --git a/third_party/rust/packed_simd/src/codegen/reductions/mask/arm.rs b/third_party/rust/packed_simd/src/codegen/reductions/mask/arm.rs
|
|
new file mode 100644
|
|
index 000000000000..1987af7a9676
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/codegen/reductions/mask/arm.rs
|
|
@@ -0,0 +1,54 @@
|
|
+//! Mask reductions implementation for `arm` targets
|
|
+
|
|
+/// Implementation for ARM + v7 + NEON for 64-bit or 128-bit wide vectors with
|
|
+/// more than two elements.
|
|
+macro_rules! arm_128_v7_neon_impl {
|
|
+ ($id:ident, $half:ident, $vpmin:ident, $vpmax:ident) => {
|
|
+ impl All for $id {
|
|
+ #[inline]
|
|
+ #[target_feature(enable = "v7,neon")]
|
|
+ unsafe fn all(self) -> bool {
|
|
+ use crate::arch::arm::$vpmin;
|
|
+ use crate::mem::transmute;
|
|
+ union U {
|
|
+ halves: ($half, $half),
|
|
+ vec: $id,
|
|
+ }
|
|
+ let halves = U { vec: self }.halves;
|
|
+ let h: $half = transmute($vpmin(
|
|
+ transmute(halves.0),
|
|
+ transmute(halves.1),
|
|
+ ));
|
|
+ h.all()
|
|
+ }
|
|
+ }
|
|
+ impl Any for $id {
|
|
+ #[inline]
|
|
+ #[target_feature(enable = "v7,neon")]
|
|
+ unsafe fn any(self) -> bool {
|
|
+ use crate::arch::arm::$vpmax;
|
|
+ use crate::mem::transmute;
|
|
+ union U {
|
|
+ halves: ($half, $half),
|
|
+ vec: $id,
|
|
+ }
|
|
+ let halves = U { vec: self }.halves;
|
|
+ let h: $half = transmute($vpmax(
|
|
+ transmute(halves.0),
|
|
+ transmute(halves.1),
|
|
+ ));
|
|
+ h.any()
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+}
|
|
+
|
|
+/// Mask reduction implementation for `arm` targets
|
|
+macro_rules! impl_mask_reductions {
|
|
+ // 128-bit wide masks
|
|
+ (m8x16) => { arm_128_v7_neon_impl!(m8x16, m8x8, vpmin_u8, vpmax_u8); };
|
|
+ (m16x8) => { arm_128_v7_neon_impl!(m16x8, m16x4, vpmin_u16, vpmax_u16); };
|
|
+ (m32x4) => { arm_128_v7_neon_impl!(m32x4, m32x2, vpmin_u32, vpmax_u32); };
|
|
+ // Fallback to LLVM's default code-generation:
|
|
+ ($id:ident) => { fallback_impl!($id); };
|
|
+}
|
|
diff --git a/third_party/rust/packed_simd/src/codegen/reductions/mask/fallback.rs b/third_party/rust/packed_simd/src/codegen/reductions/mask/fallback.rs
|
|
new file mode 100644
|
|
index 000000000000..25e5c813abca
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/codegen/reductions/mask/fallback.rs
|
|
@@ -0,0 +1,6 @@
|
|
+//! Default mask reduction implementations.
|
|
+
|
|
+/// Default mask reduction implementation
|
|
+macro_rules! impl_mask_reductions {
|
|
+ ($id:ident) => { fallback_impl!($id); };
|
|
+}
|
|
diff --git a/third_party/rust/packed_simd/src/codegen/reductions/mask/fallback_impl.rs b/third_party/rust/packed_simd/src/codegen/reductions/mask/fallback_impl.rs
|
|
new file mode 100644
|
|
index 000000000000..0d246e2fdab6
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/codegen/reductions/mask/fallback_impl.rs
|
|
@@ -0,0 +1,237 @@
|
|
+//! Default implementation of a mask reduction for any target.
|
|
+
|
|
+macro_rules! fallback_to_other_impl {
|
|
+ ($id:ident, $other:ident) => {
|
|
+ impl All for $id {
|
|
+ #[inline]
|
|
+ unsafe fn all(self) -> bool {
|
|
+ let m: $other = crate::mem::transmute(self);
|
|
+ m.all()
|
|
+ }
|
|
+ }
|
|
+ impl Any for $id {
|
|
+ #[inline]
|
|
+ unsafe fn any(self) -> bool {
|
|
+ let m: $other = crate::mem::transmute(self);
|
|
+ m.any()
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+}
|
|
+
|
|
+/// Fallback implementation.
|
|
+macro_rules! fallback_impl {
|
|
+ // 16-bit wide masks:
|
|
+ (m8x2) => {
|
|
+ impl All for m8x2 {
|
|
+ #[inline]
|
|
+ unsafe fn all(self) -> bool {
|
|
+ let i: u16 = crate::mem::transmute(self);
|
|
+ i == u16::max_value()
|
|
+ }
|
|
+ }
|
|
+ impl Any for m8x2 {
|
|
+ #[inline]
|
|
+ unsafe fn any(self) -> bool {
|
|
+ let i: u16 = crate::mem::transmute(self);
|
|
+ i != 0
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+ // 32-bit wide masks
|
|
+ (m8x4) => {
|
|
+ impl All for m8x4 {
|
|
+ #[inline]
|
|
+ unsafe fn all(self) -> bool {
|
|
+ let i: u32 = crate::mem::transmute(self);
|
|
+ i == u32::max_value()
|
|
+ }
|
|
+ }
|
|
+ impl Any for m8x4 {
|
|
+ #[inline]
|
|
+ unsafe fn any(self) -> bool {
|
|
+ let i: u32 = crate::mem::transmute(self);
|
|
+ i != 0
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+ (m16x2) => {
|
|
+ fallback_to_other_impl!(m16x2, m8x4);
|
|
+ };
|
|
+ // 64-bit wide masks:
|
|
+ (m8x8) => {
|
|
+ impl All for m8x8 {
|
|
+ #[inline]
|
|
+ unsafe fn all(self) -> bool {
|
|
+ let i: u64 = crate::mem::transmute(self);
|
|
+ i == u64::max_value()
|
|
+ }
|
|
+ }
|
|
+ impl Any for m8x8 {
|
|
+ #[inline]
|
|
+ unsafe fn any(self) -> bool {
|
|
+ let i: u64 = crate::mem::transmute(self);
|
|
+ i != 0
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+ (m16x4) => {
|
|
+ fallback_to_other_impl!(m16x4, m8x8);
|
|
+ };
|
|
+ (m32x2) => {
|
|
+ fallback_to_other_impl!(m32x2, m16x4);
|
|
+ };
|
|
+ // FIXME: 64x1 maxk
|
|
+ // 128-bit wide masks:
|
|
+ (m8x16) => {
|
|
+ impl All for m8x16 {
|
|
+ #[inline]
|
|
+ unsafe fn all(self) -> bool {
|
|
+ let i: u128 = crate::mem::transmute(self);
|
|
+ i == u128::max_value()
|
|
+ }
|
|
+ }
|
|
+ impl Any for m8x16 {
|
|
+ #[inline]
|
|
+ unsafe fn any(self) -> bool {
|
|
+ let i: u128 = crate::mem::transmute(self);
|
|
+ i != 0
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+ (m16x8) => {
|
|
+ fallback_to_other_impl!(m16x8, m8x16);
|
|
+ };
|
|
+ (m32x4) => {
|
|
+ fallback_to_other_impl!(m32x4, m16x8);
|
|
+ };
|
|
+ (m64x2) => {
|
|
+ fallback_to_other_impl!(m64x2, m32x4);
|
|
+ };
|
|
+ (m128x1) => {
|
|
+ fallback_to_other_impl!(m128x1, m64x2);
|
|
+ };
|
|
+ // 256-bit wide masks
|
|
+ (m8x32) => {
|
|
+ impl All for m8x32 {
|
|
+ #[inline]
|
|
+ unsafe fn all(self) -> bool {
|
|
+ let i: [u128; 2] = crate::mem::transmute(self);
|
|
+ let o: [u128; 2] = [u128::max_value(); 2];
|
|
+ i == o
|
|
+ }
|
|
+ }
|
|
+ impl Any for m8x32 {
|
|
+ #[inline]
|
|
+ unsafe fn any(self) -> bool {
|
|
+ let i: [u128; 2] = crate::mem::transmute(self);
|
|
+ let o: [u128; 2] = [0; 2];
|
|
+ i != o
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+ (m16x16) => {
|
|
+ fallback_to_other_impl!(m16x16, m8x32);
|
|
+ };
|
|
+ (m32x8) => {
|
|
+ fallback_to_other_impl!(m32x8, m16x16);
|
|
+ };
|
|
+ (m64x4) => {
|
|
+ fallback_to_other_impl!(m64x4, m32x8);
|
|
+ };
|
|
+ (m128x2) => {
|
|
+ fallback_to_other_impl!(m128x2, m64x4);
|
|
+ };
|
|
+ // 512-bit wide masks
|
|
+ (m8x64) => {
|
|
+ impl All for m8x64 {
|
|
+ #[inline]
|
|
+ unsafe fn all(self) -> bool {
|
|
+ let i: [u128; 4] = crate::mem::transmute(self);
|
|
+ let o: [u128; 4] = [u128::max_value(); 4];
|
|
+ i == o
|
|
+ }
|
|
+ }
|
|
+ impl Any for m8x64 {
|
|
+ #[inline]
|
|
+ unsafe fn any(self) -> bool {
|
|
+ let i: [u128; 4] = crate::mem::transmute(self);
|
|
+ let o: [u128; 4] = [0; 4];
|
|
+ i != o
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+ (m16x32) => {
|
|
+ fallback_to_other_impl!(m16x32, m8x64);
|
|
+ };
|
|
+ (m32x16) => {
|
|
+ fallback_to_other_impl!(m32x16, m16x32);
|
|
+ };
|
|
+ (m64x8) => {
|
|
+ fallback_to_other_impl!(m64x8, m32x16);
|
|
+ };
|
|
+ (m128x4) => {
|
|
+ fallback_to_other_impl!(m128x4, m64x8);
|
|
+ };
|
|
+ // Masks with pointer-sized elements64
|
|
+ (msizex2) => {
|
|
+ cfg_if! {
|
|
+ if #[cfg(target_pointer_width = "64")] {
|
|
+ fallback_to_other_impl!(msizex2, m64x2);
|
|
+ } else if #[cfg(target_pointer_width = "32")] {
|
|
+ fallback_to_other_impl!(msizex2, m32x2);
|
|
+ } else {
|
|
+ compile_error!("unsupported target_pointer_width");
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+ (msizex4) => {
|
|
+ cfg_if! {
|
|
+ if #[cfg(target_pointer_width = "64")] {
|
|
+ fallback_to_other_impl!(msizex4, m64x4);
|
|
+ } else if #[cfg(target_pointer_width = "32")] {
|
|
+ fallback_to_other_impl!(msizex4, m32x4);
|
|
+ } else {
|
|
+ compile_error!("unsupported target_pointer_width");
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+ (msizex8) => {
|
|
+ cfg_if! {
|
|
+ if #[cfg(target_pointer_width = "64")] {
|
|
+ fallback_to_other_impl!(msizex8, m64x8);
|
|
+ } else if #[cfg(target_pointer_width = "32")] {
|
|
+ fallback_to_other_impl!(msizex8, m32x8);
|
|
+ } else {
|
|
+ compile_error!("unsupported target_pointer_width");
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+}
|
|
+
|
|
+macro_rules! recurse_half {
|
|
+ ($vid:ident, $vid_h:ident) => {
|
|
+ impl All for $vid {
|
|
+ #[inline]
|
|
+ unsafe fn all(self) -> bool {
|
|
+ union U {
|
|
+ halves: ($vid_h, $vid_h),
|
|
+ vec: $vid,
|
|
+ }
|
|
+ let halves = U { vec: self }.halves;
|
|
+ halves.0.all() && halves.1.all()
|
|
+ }
|
|
+ }
|
|
+ impl Any for $vid {
|
|
+ #[inline]
|
|
+ unsafe fn any(self) -> bool {
|
|
+ union U {
|
|
+ halves: ($vid_h, $vid_h),
|
|
+ vec: $vid,
|
|
+ }
|
|
+ let halves = U { vec: self }.halves;
|
|
+ halves.0.any() || halves.1.any()
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+}
|
|
diff --git a/third_party/rust/packed_simd/src/codegen/reductions/mask/x86.rs b/third_party/rust/packed_simd/src/codegen/reductions/mask/x86.rs
|
|
new file mode 100644
|
|
index 000000000000..2ae4ed81c416
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/codegen/reductions/mask/x86.rs
|
|
@@ -0,0 +1,194 @@
|
|
+//! Mask reductions implementation for `x86` and `x86_64` targets
|
|
+
|
|
+#[cfg(target_feature = "sse")]
|
|
+#[macro_use]
|
|
+mod sse;
|
|
+
|
|
+#[cfg(target_feature = "sse2")]
|
|
+#[macro_use]
|
|
+mod sse2;
|
|
+
|
|
+#[cfg(target_feature = "avx")]
|
|
+#[macro_use]
|
|
+mod avx;
|
|
+
|
|
+#[cfg(target_feature = "avx2")]
|
|
+#[macro_use]
|
|
+mod avx2;
|
|
+
|
|
+/// x86 64-bit m8x8 implementation
|
|
+macro_rules! x86_m8x8_impl {
|
|
+ ($id:ident) => {
|
|
+ cfg_if! {
|
|
+ if #[cfg(all(target_arch = "x86_64", target_feature = "sse"))] {
|
|
+ x86_m8x8_sse_impl!($id);
|
|
+ } else {
|
|
+ fallback_impl!($id);
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+}
|
|
+
|
|
+/// x86 128-bit m8x16 implementation
|
|
+macro_rules! x86_m8x16_impl {
|
|
+ ($id:ident) => {
|
|
+ cfg_if! {
|
|
+ if #[cfg(target_feature = "sse2")] {
|
|
+ x86_m8x16_sse2_impl!($id);
|
|
+ } else {
|
|
+ fallback_impl!($id);
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+}
|
|
+
|
|
+/// x86 128-bit m32x4 implementation
|
|
+macro_rules! x86_m32x4_impl {
|
|
+ ($id:ident) => {
|
|
+ cfg_if! {
|
|
+ if #[cfg(target_feature = "sse")] {
|
|
+ x86_m32x4_sse_impl!($id);
|
|
+ } else {
|
|
+ fallback_impl!($id);
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+}
|
|
+
|
|
+/// x86 128-bit m64x2 implementation
|
|
+macro_rules! x86_m64x2_impl {
|
|
+ ($id:ident) => {
|
|
+ cfg_if! {
|
|
+ if #[cfg(target_feature = "sse2")] {
|
|
+ x86_m64x2_sse2_impl!($id);
|
|
+ } else if #[cfg(target_feature = "sse")] {
|
|
+ x86_m32x4_sse_impl!($id);
|
|
+ } else {
|
|
+ fallback_impl!($id);
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+}
|
|
+
|
|
+/// x86 256-bit m8x32 implementation
|
|
+macro_rules! x86_m8x32_impl {
|
|
+ ($id:ident, $half_id:ident) => {
|
|
+ cfg_if! {
|
|
+ if #[cfg(target_feature = "avx2")] {
|
|
+ x86_m8x32_avx2_impl!($id);
|
|
+ } else if #[cfg(target_feature = "avx")] {
|
|
+ x86_m8x32_avx_impl!($id);
|
|
+ } else if #[cfg(target_feature = "sse2")] {
|
|
+ recurse_half!($id, $half_id);
|
|
+ } else {
|
|
+ fallback_impl!($id);
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+}
|
|
+
|
|
+/// x86 256-bit m32x8 implementation
|
|
+macro_rules! x86_m32x8_impl {
|
|
+ ($id:ident, $half_id:ident) => {
|
|
+ cfg_if! {
|
|
+ if #[cfg(target_feature = "avx")] {
|
|
+ x86_m32x8_avx_impl!($id);
|
|
+ } else if #[cfg(target_feature = "sse")] {
|
|
+ recurse_half!($id, $half_id);
|
|
+ } else {
|
|
+ fallback_impl!($id);
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+}
|
|
+
|
|
+/// x86 256-bit m64x4 implementation
|
|
+macro_rules! x86_m64x4_impl {
|
|
+ ($id:ident, $half_id:ident) => {
|
|
+ cfg_if! {
|
|
+ if #[cfg(target_feature = "avx")] {
|
|
+ x86_m64x4_avx_impl!($id);
|
|
+ } else if #[cfg(target_feature = "sse")] {
|
|
+ recurse_half!($id, $half_id);
|
|
+ } else {
|
|
+ fallback_impl!($id);
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+}
|
|
+
|
|
+/// Fallback implementation.
|
|
+macro_rules! x86_intr_impl {
|
|
+ ($id:ident) => {
|
|
+ impl All for $id {
|
|
+ #[inline]
|
|
+ unsafe fn all(self) -> bool {
|
|
+ use crate::llvm::simd_reduce_all;
|
|
+ simd_reduce_all(self.0)
|
|
+ }
|
|
+ }
|
|
+ impl Any for $id {
|
|
+ #[inline]
|
|
+ unsafe fn any(self) -> bool {
|
|
+ use crate::llvm::simd_reduce_any;
|
|
+ simd_reduce_any(self.0)
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+}
|
|
+
|
|
+/// Mask reduction implementation for `x86` and `x86_64` targets
|
|
+macro_rules! impl_mask_reductions {
|
|
+ // 64-bit wide masks
|
|
+ (m8x8) => { x86_m8x8_impl!(m8x8); };
|
|
+ (m16x4) => { x86_m8x8_impl!(m16x4); };
|
|
+ (m32x2) => { x86_m8x8_impl!(m32x2); };
|
|
+ // 128-bit wide masks
|
|
+ (m8x16) => { x86_m8x16_impl!(m8x16); };
|
|
+ (m16x8) => { x86_m8x16_impl!(m16x8); };
|
|
+ (m32x4) => { x86_m32x4_impl!(m32x4); };
|
|
+ (m64x2) => { x86_m64x2_impl!(m64x2); };
|
|
+ (m128x1) => { x86_intr_impl!(m128x1); };
|
|
+ // 256-bit wide masks:
|
|
+ (m8x32) => { x86_m8x32_impl!(m8x32, m8x16); };
|
|
+ (m16x16) => { x86_m8x32_impl!(m16x16, m16x8); };
|
|
+ (m32x8) => { x86_m32x8_impl!(m32x8, m32x4); };
|
|
+ (m64x4) => { x86_m64x4_impl!(m64x4, m64x2); };
|
|
+ (m128x2) => { x86_intr_impl!(m128x2); };
|
|
+ (msizex2) => {
|
|
+ cfg_if! {
|
|
+ if #[cfg(target_pointer_width = "64")] {
|
|
+ fallback_to_other_impl!(msizex2, m64x2);
|
|
+ } else if #[cfg(target_pointer_width = "32")] {
|
|
+ fallback_to_other_impl!(msizex2, m32x2);
|
|
+ } else {
|
|
+ compile_error!("unsupported target_pointer_width");
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+ (msizex4) => {
|
|
+ cfg_if! {
|
|
+ if #[cfg(target_pointer_width = "64")] {
|
|
+ fallback_to_other_impl!(msizex4, m64x4);
|
|
+ } else if #[cfg(target_pointer_width = "32")] {
|
|
+ fallback_to_other_impl!(msizex4, m32x4);
|
|
+ } else {
|
|
+ compile_error!("unsupported target_pointer_width");
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+ (msizex8) => {
|
|
+ cfg_if! {
|
|
+ if #[cfg(target_pointer_width = "64")] {
|
|
+ fallback_to_other_impl!(msizex8, m64x8);
|
|
+ } else if #[cfg(target_pointer_width = "32")] {
|
|
+ fallback_to_other_impl!(msizex8, m32x8);
|
|
+ } else {
|
|
+ compile_error!("unsupported target_pointer_width");
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+
|
|
+ // Fallback to LLVM's default code-generation:
|
|
+ ($id:ident) => { fallback_impl!($id); };
|
|
+}
|
|
diff --git a/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/avx.rs b/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/avx.rs
|
|
new file mode 100644
|
|
index 000000000000..d18736fb0399
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/avx.rs
|
|
@@ -0,0 +1,101 @@
|
|
+//! Mask reductions implementation for `x86` and `x86_64` targets with `AVX`
|
|
+
|
|
+/// `x86`/`x86_64` 256-bit `AVX` implementation
|
|
+/// FIXME: it might be faster here to do two `_mm_movmask_epi8`
|
|
+#[cfg(target_feature = "avx")]
|
|
+macro_rules! x86_m8x32_avx_impl {
|
|
+ ($id:ident) => {
|
|
+ impl All for $id {
|
|
+ #[inline]
|
|
+ #[target_feature(enable = "avx")]
|
|
+ unsafe fn all(self) -> bool {
|
|
+ #[cfg(target_arch = "x86")]
|
|
+ use crate::arch::x86::_mm256_testc_si256;
|
|
+ #[cfg(target_arch = "x86_64")]
|
|
+ use crate::arch::x86_64::_mm256_testc_si256;
|
|
+ _mm256_testc_si256(
|
|
+ crate::mem::transmute(self),
|
|
+ crate::mem::transmute($id::splat(true)),
|
|
+ ) != 0
|
|
+ }
|
|
+ }
|
|
+ impl Any for $id {
|
|
+ #[inline]
|
|
+ #[target_feature(enable = "avx")]
|
|
+ unsafe fn any(self) -> bool {
|
|
+ #[cfg(target_arch = "x86")]
|
|
+ use crate::arch::x86::_mm256_testz_si256;
|
|
+ #[cfg(target_arch = "x86_64")]
|
|
+ use crate::arch::x86_64::_mm256_testz_si256;
|
|
+ _mm256_testz_si256(
|
|
+ crate::mem::transmute(self),
|
|
+ crate::mem::transmute(self),
|
|
+ ) == 0
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+}
|
|
+
|
|
+/// `x86`/`x86_64` 256-bit m32x8 `AVX` implementation
|
|
+macro_rules! x86_m32x8_avx_impl {
|
|
+ ($id:ident) => {
|
|
+ impl All for $id {
|
|
+ #[inline]
|
|
+ #[target_feature(enable = "sse")]
|
|
+ unsafe fn all(self) -> bool {
|
|
+ #[cfg(target_arch = "x86")]
|
|
+ use crate::arch::x86::_mm256_movemask_ps;
|
|
+ #[cfg(target_arch = "x86_64")]
|
|
+ use crate::arch::x86_64::_mm256_movemask_ps;
|
|
+ // _mm256_movemask_ps(a) creates a 8bit mask containing the
|
|
+ // most significant bit of each lane of `a`. If all bits are
|
|
+ // set, then all 8 lanes of the mask are true.
|
|
+ _mm256_movemask_ps(crate::mem::transmute(self)) == 0b_1111_1111_i32
|
|
+ }
|
|
+ }
|
|
+ impl Any for $id {
|
|
+ #[inline]
|
|
+ #[target_feature(enable = "sse")]
|
|
+ unsafe fn any(self) -> bool {
|
|
+ #[cfg(target_arch = "x86")]
|
|
+ use crate::arch::x86::_mm256_movemask_ps;
|
|
+ #[cfg(target_arch = "x86_64")]
|
|
+ use crate::arch::x86_64::_mm256_movemask_ps;
|
|
+
|
|
+ _mm256_movemask_ps(crate::mem::transmute(self)) != 0
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+}
|
|
+
|
|
+/// `x86`/`x86_64` 256-bit m64x4 `AVX` implementation
|
|
+macro_rules! x86_m64x4_avx_impl {
|
|
+ ($id:ident) => {
|
|
+ impl All for $id {
|
|
+ #[inline]
|
|
+ #[target_feature(enable = "sse")]
|
|
+ unsafe fn all(self) -> bool {
|
|
+ #[cfg(target_arch = "x86")]
|
|
+ use crate::arch::x86::_mm256_movemask_pd;
|
|
+ #[cfg(target_arch = "x86_64")]
|
|
+ use crate::arch::x86_64::_mm256_movemask_pd;
|
|
+ // _mm256_movemask_pd(a) creates a 4bit mask containing the
|
|
+ // most significant bit of each lane of `a`. If all bits are
|
|
+ // set, then all 4 lanes of the mask are true.
|
|
+ _mm256_movemask_pd(crate::mem::transmute(self)) == 0b_1111_i32
|
|
+ }
|
|
+ }
|
|
+ impl Any for $id {
|
|
+ #[inline]
|
|
+ #[target_feature(enable = "sse")]
|
|
+ unsafe fn any(self) -> bool {
|
|
+ #[cfg(target_arch = "x86")]
|
|
+ use crate::arch::x86::_mm256_movemask_pd;
|
|
+ #[cfg(target_arch = "x86_64")]
|
|
+ use crate::arch::x86_64::_mm256_movemask_pd;
|
|
+
|
|
+ _mm256_movemask_pd(crate::mem::transmute(self)) != 0
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+}
|
|
diff --git a/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/avx2.rs b/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/avx2.rs
|
|
new file mode 100644
|
|
index 000000000000..d37d02342092
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/avx2.rs
|
|
@@ -0,0 +1,35 @@
|
|
+//! Mask reductions implementation for `x86` and `x86_64` targets with `AVX2`.
|
|
+#![allow(unused)]
|
|
+
|
|
+/// x86/x86_64 256-bit m8x32 AVX2 implementation
|
|
+macro_rules! x86_m8x32_avx2_impl {
|
|
+ ($id:ident) => {
|
|
+ impl All for $id {
|
|
+ #[inline]
|
|
+ #[target_feature(enable = "sse2")]
|
|
+ unsafe fn all(self) -> bool {
|
|
+ #[cfg(target_arch = "x86")]
|
|
+ use crate::arch::x86::_mm256_movemask_epi8;
|
|
+ #[cfg(target_arch = "x86_64")]
|
|
+ use crate::arch::x86_64::_mm256_movemask_epi8;
|
|
+ // _mm256_movemask_epi8(a) creates a 32bit mask containing the
|
|
+ // most significant bit of each byte of `a`. If all
|
|
+ // bits are set, then all 32 lanes of the mask are
|
|
+ // true.
|
|
+ _mm256_movemask_epi8(crate::mem::transmute(self)) == -1_i32
|
|
+ }
|
|
+ }
|
|
+ impl Any for $id {
|
|
+ #[inline]
|
|
+ #[target_feature(enable = "sse2")]
|
|
+ unsafe fn any(self) -> bool {
|
|
+ #[cfg(target_arch = "x86")]
|
|
+ use crate::arch::x86::_mm256_movemask_epi8;
|
|
+ #[cfg(target_arch = "x86_64")]
|
|
+ use crate::arch::x86_64::_mm256_movemask_epi8;
|
|
+
|
|
+ _mm256_movemask_epi8(crate::mem::transmute(self)) != 0
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+}
|
|
diff --git a/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/sse.rs b/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/sse.rs
|
|
new file mode 100644
|
|
index 000000000000..7482f9430a14
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/sse.rs
|
|
@@ -0,0 +1,68 @@
|
|
+//! Mask reductions implementation for `x86` and `x86_64` targets with `SSE`.
|
|
+#![allow(unused)]
|
|
+
|
|
+/// `x86`/`x86_64` 128-bit `m32x4` `SSE` implementation
|
|
+macro_rules! x86_m32x4_sse_impl {
|
|
+ ($id:ident) => {
|
|
+ impl All for $id {
|
|
+ #[inline]
|
|
+ #[target_feature(enable = "sse")]
|
|
+ unsafe fn all(self) -> bool {
|
|
+ #[cfg(target_arch = "x86")]
|
|
+ use crate::arch::x86::_mm_movemask_ps;
|
|
+ #[cfg(target_arch = "x86_64")]
|
|
+ use crate::arch::x86_64::_mm_movemask_ps;
|
|
+ // _mm_movemask_ps(a) creates a 4bit mask containing the
|
|
+ // most significant bit of each lane of `a`. If all
|
|
+ // bits are set, then all 4 lanes of the mask are
|
|
+ // true.
|
|
+ _mm_movemask_ps(crate::mem::transmute(self))
|
|
+ == 0b_1111_i32
|
|
+ }
|
|
+ }
|
|
+ impl Any for $id {
|
|
+ #[inline]
|
|
+ #[target_feature(enable = "sse")]
|
|
+ unsafe fn any(self) -> bool {
|
|
+ #[cfg(target_arch = "x86")]
|
|
+ use crate::arch::x86::_mm_movemask_ps;
|
|
+ #[cfg(target_arch = "x86_64")]
|
|
+ use crate::arch::x86_64::_mm_movemask_ps;
|
|
+
|
|
+ _mm_movemask_ps(crate::mem::transmute(self)) != 0
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+}
|
|
+
|
|
+macro_rules! x86_m8x8_sse_impl {
|
|
+ ($id:ident) => {
|
|
+ impl All for $id {
|
|
+ #[inline]
|
|
+ #[target_feature(enable = "sse")]
|
|
+ unsafe fn all(self) -> bool {
|
|
+ #[cfg(target_arch = "x86")]
|
|
+ use crate::arch::x86::_mm_movemask_pi8;
|
|
+ #[cfg(target_arch = "x86_64")]
|
|
+ use crate::arch::x86_64::_mm_movemask_pi8;
|
|
+ // _mm_movemask_pi8(a) creates an 8bit mask containing the most
|
|
+ // significant bit of each byte of `a`. If all bits are set,
|
|
+ // then all 8 lanes of the mask are true.
|
|
+ _mm_movemask_pi8(crate::mem::transmute(self))
|
|
+ == u8::max_value() as i32
|
|
+ }
|
|
+ }
|
|
+ impl Any for $id {
|
|
+ #[inline]
|
|
+ #[target_feature(enable = "sse")]
|
|
+ unsafe fn any(self) -> bool {
|
|
+ #[cfg(target_arch = "x86")]
|
|
+ use crate::arch::x86::_mm_movemask_pi8;
|
|
+ #[cfg(target_arch = "x86_64")]
|
|
+ use crate::arch::x86_64::_mm_movemask_pi8;
|
|
+
|
|
+ _mm_movemask_pi8(crate::mem::transmute(self)) != 0
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+}
|
|
diff --git a/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/sse2.rs b/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/sse2.rs
|
|
new file mode 100644
|
|
index 000000000000..a99c606f5268
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/sse2.rs
|
|
@@ -0,0 +1,70 @@
|
|
+//! Mask reductions implementation for `x86` and `x86_64` targets with `SSE2`.
|
|
+#![allow(unused)]
|
|
+
|
|
+/// `x86`/`x86_64` 128-bit m64x2 `SSE2` implementation
|
|
+macro_rules! x86_m64x2_sse2_impl {
|
|
+ ($id:ident) => {
|
|
+ impl All for $id {
|
|
+ #[inline]
|
|
+ #[target_feature(enable = "sse")]
|
|
+ unsafe fn all(self) -> bool {
|
|
+ #[cfg(target_arch = "x86")]
|
|
+ use crate::arch::x86::_mm_movemask_pd;
|
|
+ #[cfg(target_arch = "x86_64")]
|
|
+ use crate::arch::x86_64::_mm_movemask_pd;
|
|
+ // _mm_movemask_pd(a) creates a 2bit mask containing the
|
|
+ // most significant bit of each lane of `a`. If all
|
|
+ // bits are set, then all 2 lanes of the mask are
|
|
+ // true.
|
|
+ _mm_movemask_pd(crate::mem::transmute(self))
|
|
+ == 0b_11_i32
|
|
+ }
|
|
+ }
|
|
+ impl Any for $id {
|
|
+ #[inline]
|
|
+ #[target_feature(enable = "sse")]
|
|
+ unsafe fn any(self) -> bool {
|
|
+ #[cfg(target_arch = "x86")]
|
|
+ use crate::arch::x86::_mm_movemask_pd;
|
|
+ #[cfg(target_arch = "x86_64")]
|
|
+ use crate::arch::x86_64::_mm_movemask_pd;
|
|
+
|
|
+ _mm_movemask_pd(crate::mem::transmute(self)) != 0
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+}
|
|
+
|
|
+/// `x86`/`x86_64` 128-bit m8x16 `SSE2` implementation
|
|
+macro_rules! x86_m8x16_sse2_impl {
|
|
+ ($id:ident) => {
|
|
+ impl All for $id {
|
|
+ #[inline]
|
|
+ #[target_feature(enable = "sse2")]
|
|
+ unsafe fn all(self) -> bool {
|
|
+ #[cfg(target_arch = "x86")]
|
|
+ use crate::arch::x86::_mm_movemask_epi8;
|
|
+ #[cfg(target_arch = "x86_64")]
|
|
+ use crate::arch::x86_64::_mm_movemask_epi8;
|
|
+ // _mm_movemask_epi8(a) creates a 16bit mask containing the
|
|
+ // most significant bit of each byte of `a`. If all
|
|
+ // bits are set, then all 16 lanes of the mask are
|
|
+ // true.
|
|
+ _mm_movemask_epi8(crate::mem::transmute(self))
|
|
+ == i32::from(u16::max_value())
|
|
+ }
|
|
+ }
|
|
+ impl Any for $id {
|
|
+ #[inline]
|
|
+ #[target_feature(enable = "sse2")]
|
|
+ unsafe fn any(self) -> bool {
|
|
+ #[cfg(target_arch = "x86")]
|
|
+ use crate::arch::x86::_mm_movemask_epi8;
|
|
+ #[cfg(target_arch = "x86_64")]
|
|
+ use crate::arch::x86_64::_mm_movemask_epi8;
|
|
+
|
|
+ _mm_movemask_epi8(crate::mem::transmute(self)) != 0
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+}
|
|
diff --git a/third_party/rust/packed_simd/src/codegen/shuffle.rs b/third_party/rust/packed_simd/src/codegen/shuffle.rs
|
|
new file mode 100644
|
|
index 000000000000..35a9db905339
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/codegen/shuffle.rs
|
|
@@ -0,0 +1,302 @@
|
|
+//! Implementations of the `ShuffleResult` trait for the different numbers of
|
|
+//! lanes and vector element types.
|
|
+
|
|
+use crate::masks::*;
|
|
+use crate::sealed::Shuffle;
|
|
+
|
|
+impl Shuffle<[u32; 2]> for i8 {
|
|
+ type Output = crate::codegen::i8x2;
|
|
+}
|
|
+impl Shuffle<[u32; 4]> for i8 {
|
|
+ type Output = crate::codegen::i8x4;
|
|
+}
|
|
+impl Shuffle<[u32; 8]> for i8 {
|
|
+ type Output = crate::codegen::i8x8;
|
|
+}
|
|
+impl Shuffle<[u32; 16]> for i8 {
|
|
+ type Output = crate::codegen::i8x16;
|
|
+}
|
|
+impl Shuffle<[u32; 32]> for i8 {
|
|
+ type Output = crate::codegen::i8x32;
|
|
+}
|
|
+impl Shuffle<[u32; 64]> for i8 {
|
|
+ type Output = crate::codegen::i8x64;
|
|
+}
|
|
+
|
|
+impl Shuffle<[u32; 2]> for u8 {
|
|
+ type Output = crate::codegen::u8x2;
|
|
+}
|
|
+impl Shuffle<[u32; 4]> for u8 {
|
|
+ type Output = crate::codegen::u8x4;
|
|
+}
|
|
+impl Shuffle<[u32; 8]> for u8 {
|
|
+ type Output = crate::codegen::u8x8;
|
|
+}
|
|
+impl Shuffle<[u32; 16]> for u8 {
|
|
+ type Output = crate::codegen::u8x16;
|
|
+}
|
|
+impl Shuffle<[u32; 32]> for u8 {
|
|
+ type Output = crate::codegen::u8x32;
|
|
+}
|
|
+impl Shuffle<[u32; 64]> for u8 {
|
|
+ type Output = crate::codegen::u8x64;
|
|
+}
|
|
+
|
|
+impl Shuffle<[u32; 2]> for m8 {
|
|
+ type Output = crate::codegen::m8x2;
|
|
+}
|
|
+impl Shuffle<[u32; 4]> for m8 {
|
|
+ type Output = crate::codegen::m8x4;
|
|
+}
|
|
+impl Shuffle<[u32; 8]> for m8 {
|
|
+ type Output = crate::codegen::m8x8;
|
|
+}
|
|
+impl Shuffle<[u32; 16]> for m8 {
|
|
+ type Output = crate::codegen::m8x16;
|
|
+}
|
|
+impl Shuffle<[u32; 32]> for m8 {
|
|
+ type Output = crate::codegen::m8x32;
|
|
+}
|
|
+impl Shuffle<[u32; 64]> for m8 {
|
|
+ type Output = crate::codegen::m8x64;
|
|
+}
|
|
+
|
|
+impl Shuffle<[u32; 2]> for i16 {
|
|
+ type Output = crate::codegen::i16x2;
|
|
+}
|
|
+impl Shuffle<[u32; 4]> for i16 {
|
|
+ type Output = crate::codegen::i16x4;
|
|
+}
|
|
+impl Shuffle<[u32; 8]> for i16 {
|
|
+ type Output = crate::codegen::i16x8;
|
|
+}
|
|
+impl Shuffle<[u32; 16]> for i16 {
|
|
+ type Output = crate::codegen::i16x16;
|
|
+}
|
|
+impl Shuffle<[u32; 32]> for i16 {
|
|
+ type Output = crate::codegen::i16x32;
|
|
+}
|
|
+
|
|
+impl Shuffle<[u32; 2]> for u16 {
|
|
+ type Output = crate::codegen::u16x2;
|
|
+}
|
|
+impl Shuffle<[u32; 4]> for u16 {
|
|
+ type Output = crate::codegen::u16x4;
|
|
+}
|
|
+impl Shuffle<[u32; 8]> for u16 {
|
|
+ type Output = crate::codegen::u16x8;
|
|
+}
|
|
+impl Shuffle<[u32; 16]> for u16 {
|
|
+ type Output = crate::codegen::u16x16;
|
|
+}
|
|
+impl Shuffle<[u32; 32]> for u16 {
|
|
+ type Output = crate::codegen::u16x32;
|
|
+}
|
|
+
|
|
+impl Shuffle<[u32; 2]> for m16 {
|
|
+ type Output = crate::codegen::m16x2;
|
|
+}
|
|
+impl Shuffle<[u32; 4]> for m16 {
|
|
+ type Output = crate::codegen::m16x4;
|
|
+}
|
|
+impl Shuffle<[u32; 8]> for m16 {
|
|
+ type Output = crate::codegen::m16x8;
|
|
+}
|
|
+impl Shuffle<[u32; 16]> for m16 {
|
|
+ type Output = crate::codegen::m16x16;
|
|
+}
|
|
+impl Shuffle<[u32; 32]> for m16 {
|
|
+ type Output = crate::codegen::m16x32;
|
|
+}
|
|
+
|
|
+impl Shuffle<[u32; 2]> for i32 {
|
|
+ type Output = crate::codegen::i32x2;
|
|
+}
|
|
+impl Shuffle<[u32; 4]> for i32 {
|
|
+ type Output = crate::codegen::i32x4;
|
|
+}
|
|
+impl Shuffle<[u32; 8]> for i32 {
|
|
+ type Output = crate::codegen::i32x8;
|
|
+}
|
|
+impl Shuffle<[u32; 16]> for i32 {
|
|
+ type Output = crate::codegen::i32x16;
|
|
+}
|
|
+
|
|
+impl Shuffle<[u32; 2]> for u32 {
|
|
+ type Output = crate::codegen::u32x2;
|
|
+}
|
|
+impl Shuffle<[u32; 4]> for u32 {
|
|
+ type Output = crate::codegen::u32x4;
|
|
+}
|
|
+impl Shuffle<[u32; 8]> for u32 {
|
|
+ type Output = crate::codegen::u32x8;
|
|
+}
|
|
+impl Shuffle<[u32; 16]> for u32 {
|
|
+ type Output = crate::codegen::u32x16;
|
|
+}
|
|
+
|
|
+impl Shuffle<[u32; 2]> for f32 {
|
|
+ type Output = crate::codegen::f32x2;
|
|
+}
|
|
+impl Shuffle<[u32; 4]> for f32 {
|
|
+ type Output = crate::codegen::f32x4;
|
|
+}
|
|
+impl Shuffle<[u32; 8]> for f32 {
|
|
+ type Output = crate::codegen::f32x8;
|
|
+}
|
|
+impl Shuffle<[u32; 16]> for f32 {
|
|
+ type Output = crate::codegen::f32x16;
|
|
+}
|
|
+
|
|
+impl Shuffle<[u32; 2]> for m32 {
|
|
+ type Output = crate::codegen::m32x2;
|
|
+}
|
|
+impl Shuffle<[u32; 4]> for m32 {
|
|
+ type Output = crate::codegen::m32x4;
|
|
+}
|
|
+impl Shuffle<[u32; 8]> for m32 {
|
|
+ type Output = crate::codegen::m32x8;
|
|
+}
|
|
+impl Shuffle<[u32; 16]> for m32 {
|
|
+ type Output = crate::codegen::m32x16;
|
|
+}
|
|
+
|
|
+/* FIXME: 64-bit single element vector
|
|
+impl Shuffle<[u32; 1]> for i64 {
|
|
+ type Output = crate::codegen::i64x1;
|
|
+}
|
|
+*/
|
|
+impl Shuffle<[u32; 2]> for i64 {
|
|
+ type Output = crate::codegen::i64x2;
|
|
+}
|
|
+impl Shuffle<[u32; 4]> for i64 {
|
|
+ type Output = crate::codegen::i64x4;
|
|
+}
|
|
+impl Shuffle<[u32; 8]> for i64 {
|
|
+ type Output = crate::codegen::i64x8;
|
|
+}
|
|
+
|
|
+/* FIXME: 64-bit single element vector
|
|
+impl Shuffle<[u32; 1]> for u64 {
|
|
+ type Output = crate::codegen::u64x1;
|
|
+}
|
|
+*/
|
|
+impl Shuffle<[u32; 2]> for u64 {
|
|
+ type Output = crate::codegen::u64x2;
|
|
+}
|
|
+impl Shuffle<[u32; 4]> for u64 {
|
|
+ type Output = crate::codegen::u64x4;
|
|
+}
|
|
+impl Shuffle<[u32; 8]> for u64 {
|
|
+ type Output = crate::codegen::u64x8;
|
|
+}
|
|
+
|
|
+/* FIXME: 64-bit single element vector
|
|
+impl Shuffle<[u32; 1]> for f64 {
|
|
+ type Output = crate::codegen::f64x1;
|
|
+}
|
|
+*/
|
|
+impl Shuffle<[u32; 2]> for f64 {
|
|
+ type Output = crate::codegen::f64x2;
|
|
+}
|
|
+impl Shuffle<[u32; 4]> for f64 {
|
|
+ type Output = crate::codegen::f64x4;
|
|
+}
|
|
+impl Shuffle<[u32; 8]> for f64 {
|
|
+ type Output = crate::codegen::f64x8;
|
|
+}
|
|
+
|
|
+/* FIXME: 64-bit single element vector
|
|
+impl Shuffle<[u32; 1]> for m64 {
|
|
+ type Output = crate::codegen::m64x1;
|
|
+}
|
|
+*/
|
|
+impl Shuffle<[u32; 2]> for m64 {
|
|
+ type Output = crate::codegen::m64x2;
|
|
+}
|
|
+impl Shuffle<[u32; 4]> for m64 {
|
|
+ type Output = crate::codegen::m64x4;
|
|
+}
|
|
+impl Shuffle<[u32; 8]> for m64 {
|
|
+ type Output = crate::codegen::m64x8;
|
|
+}
|
|
+
|
|
+impl Shuffle<[u32; 2]> for isize {
|
|
+ type Output = crate::codegen::isizex2;
|
|
+}
|
|
+impl Shuffle<[u32; 4]> for isize {
|
|
+ type Output = crate::codegen::isizex4;
|
|
+}
|
|
+impl Shuffle<[u32; 8]> for isize {
|
|
+ type Output = crate::codegen::isizex8;
|
|
+}
|
|
+
|
|
+impl Shuffle<[u32; 2]> for usize {
|
|
+ type Output = crate::codegen::usizex2;
|
|
+}
|
|
+impl Shuffle<[u32; 4]> for usize {
|
|
+ type Output = crate::codegen::usizex4;
|
|
+}
|
|
+impl Shuffle<[u32; 8]> for usize {
|
|
+ type Output = crate::codegen::usizex8;
|
|
+}
|
|
+
|
|
+impl<T> Shuffle<[u32; 2]> for *const T {
|
|
+ type Output = crate::codegen::cptrx2<T>;
|
|
+}
|
|
+impl<T> Shuffle<[u32; 4]> for *const T {
|
|
+ type Output = crate::codegen::cptrx4<T>;
|
|
+}
|
|
+impl<T> Shuffle<[u32; 8]> for *const T {
|
|
+ type Output = crate::codegen::cptrx8<T>;
|
|
+}
|
|
+
|
|
+impl<T> Shuffle<[u32; 2]> for *mut T {
|
|
+ type Output = crate::codegen::mptrx2<T>;
|
|
+}
|
|
+impl<T> Shuffle<[u32; 4]> for *mut T {
|
|
+ type Output = crate::codegen::mptrx4<T>;
|
|
+}
|
|
+impl<T> Shuffle<[u32; 8]> for *mut T {
|
|
+ type Output = crate::codegen::mptrx8<T>;
|
|
+}
|
|
+
|
|
+impl Shuffle<[u32; 2]> for msize {
|
|
+ type Output = crate::codegen::msizex2;
|
|
+}
|
|
+impl Shuffle<[u32; 4]> for msize {
|
|
+ type Output = crate::codegen::msizex4;
|
|
+}
|
|
+impl Shuffle<[u32; 8]> for msize {
|
|
+ type Output = crate::codegen::msizex8;
|
|
+}
|
|
+
|
|
+impl Shuffle<[u32; 1]> for i128 {
|
|
+ type Output = crate::codegen::i128x1;
|
|
+}
|
|
+impl Shuffle<[u32; 2]> for i128 {
|
|
+ type Output = crate::codegen::i128x2;
|
|
+}
|
|
+impl Shuffle<[u32; 4]> for i128 {
|
|
+ type Output = crate::codegen::i128x4;
|
|
+}
|
|
+
|
|
+impl Shuffle<[u32; 1]> for u128 {
|
|
+ type Output = crate::codegen::u128x1;
|
|
+}
|
|
+impl Shuffle<[u32; 2]> for u128 {
|
|
+ type Output = crate::codegen::u128x2;
|
|
+}
|
|
+impl Shuffle<[u32; 4]> for u128 {
|
|
+ type Output = crate::codegen::u128x4;
|
|
+}
|
|
+
|
|
+impl Shuffle<[u32; 1]> for m128 {
|
|
+ type Output = crate::codegen::m128x1;
|
|
+}
|
|
+impl Shuffle<[u32; 2]> for m128 {
|
|
+ type Output = crate::codegen::m128x2;
|
|
+}
|
|
+impl Shuffle<[u32; 4]> for m128 {
|
|
+ type Output = crate::codegen::m128x4;
|
|
+}
|
|
diff --git a/third_party/rust/packed_simd/src/codegen/shuffle1_dyn.rs b/third_party/rust/packed_simd/src/codegen/shuffle1_dyn.rs
|
|
new file mode 100644
|
|
index 000000000000..1e9f5816371a
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/codegen/shuffle1_dyn.rs
|
|
@@ -0,0 +1,432 @@
|
|
+//! Shuffle vector lanes with run-time indices.
|
|
+
|
|
+use crate::*;
|
|
+
|
|
+pub trait Shuffle1Dyn {
|
|
+ type Indices;
|
|
+ fn shuffle1_dyn(self, _: Self::Indices) -> Self;
|
|
+}
|
|
+
|
|
+// Fallback implementation
|
|
+macro_rules! impl_fallback {
|
|
+ ($id:ident) => {
|
|
+ impl Shuffle1Dyn for $id {
|
|
+ type Indices = Self;
|
|
+ #[inline]
|
|
+ fn shuffle1_dyn(self, indices: Self::Indices) -> Self {
|
|
+ let mut result = Self::splat(0);
|
|
+ for i in 0..$id::lanes() {
|
|
+ result = result
|
|
+ .replace(i, self.extract(indices.extract(i) as usize));
|
|
+ }
|
|
+ result
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+}
|
|
+
|
|
+macro_rules! impl_shuffle1_dyn {
|
|
+ (u8x8) => {
|
|
+ cfg_if! {
|
|
+ if #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"),
|
|
+ target_feature = "ssse3"))] {
|
|
+ impl Shuffle1Dyn for u8x8 {
|
|
+ type Indices = Self;
|
|
+ #[inline]
|
|
+ fn shuffle1_dyn(self, indices: Self::Indices) -> Self {
|
|
+ #[cfg(target_arch = "x86")]
|
|
+ use crate::arch::x86::_mm_shuffle_pi8;
|
|
+ #[cfg(target_arch = "x86_64")]
|
|
+ use crate::arch::x86_64::_mm_shuffle_pi8;
|
|
+
|
|
+ unsafe {
|
|
+ crate::mem::transmute(
|
|
+ _mm_shuffle_pi8(
|
|
+ crate::mem::transmute(self.0),
|
|
+ crate::mem::transmute(indices.0)
|
|
+ )
|
|
+ )
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ } else if #[cfg(all(
|
|
+ any(
|
|
+ all(target_aarch = "aarch64", target_feature = "neon"),
|
|
+ all(target_aarch = "arm", target_feature = "v7",
|
|
+ target_feature = "neon")
|
|
+ ),
|
|
+ any(feature = "core_arch", libcore_neon)
|
|
+ )
|
|
+ )] {
|
|
+ impl Shuffle1Dyn for u8x8 {
|
|
+ type Indices = Self;
|
|
+ #[inline]
|
|
+ fn shuffle1_dyn(self, indices: Self::Indices) -> Self {
|
|
+ #[cfg(targt_arch = "aarch64")]
|
|
+ use crate::arch::aarch64::vtbl1_u8;
|
|
+ #[cfg(targt_arch = "arm")]
|
|
+ use crate::arch::arm::vtbl1_u8;
|
|
+
|
|
+ // This is safe because the binary is compiled with
|
|
+ // neon enabled at compile-time and can therefore only
|
|
+ // run on CPUs that have it enabled.
|
|
+ unsafe {
|
|
+ Simd(mem::transmute(
|
|
+ vtbl1_u8(mem::transmute(self.0),
|
|
+ crate::mem::transmute(indices.0))
|
|
+ ))
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ } else {
|
|
+ impl_fallback!(u8x8);
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+ (u8x16) => {
|
|
+ cfg_if! {
|
|
+ if #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"),
|
|
+ target_feature = "ssse3"))] {
|
|
+ impl Shuffle1Dyn for u8x16 {
|
|
+ type Indices = Self;
|
|
+ #[inline]
|
|
+ fn shuffle1_dyn(self, indices: Self::Indices) -> Self {
|
|
+ #[cfg(target_arch = "x86")]
|
|
+ use crate::arch::x86::_mm_shuffle_epi8;
|
|
+ #[cfg(target_arch = "x86_64")]
|
|
+ use crate::arch::x86_64::_mm_shuffle_epi8;
|
|
+ // This is safe because the binary is compiled with
|
|
+ // ssse3 enabled at compile-time and can therefore only
|
|
+ // run on CPUs that have it enabled.
|
|
+ unsafe {
|
|
+ Simd(mem::transmute(
|
|
+ _mm_shuffle_epi8(mem::transmute(self.0),
|
|
+ crate::mem::transmute(indices))
|
|
+ ))
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ } else if #[cfg(all(target_aarch = "aarch64", target_feature = "neon",
|
|
+ any(feature = "core_arch", libcore_neon)))] {
|
|
+ impl Shuffle1Dyn for u8x16 {
|
|
+ type Indices = Self;
|
|
+ #[inline]
|
|
+ fn shuffle1_dyn(self, indices: Self::Indices) -> Self {
|
|
+ use crate::arch::aarch64::vqtbl1q_u8;
|
|
+
|
|
+ // This is safe because the binary is compiled with
|
|
+ // neon enabled at compile-time and can therefore only
|
|
+ // run on CPUs that have it enabled.
|
|
+ unsafe {
|
|
+ Simd(mem::transmute(
|
|
+ vqtbl1q_u8(mem::transmute(self.0),
|
|
+ crate::mem::transmute(indices.0))
|
|
+ ))
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ } else if #[cfg(all(target_aarch = "arm", target_feature = "v7",
|
|
+ target_feature = "neon",
|
|
+ any(feature = "core_arch", libcore_neon)))] {
|
|
+ impl Shuffle1Dyn for u8x16 {
|
|
+ type Indices = Self;
|
|
+ #[inline]
|
|
+ fn shuffle1_dyn(self, indices: Self::Indices) -> Self {
|
|
+ use crate::arch::arm::vtbl2_u8;
|
|
+
|
|
+ // This is safe because the binary is compiled with
|
|
+ // neon enabled at compile-time and can therefore only
|
|
+ // run on CPUs that have it enabled.
|
|
+ unsafe {
|
|
+ union U {
|
|
+ j: u8x16,
|
|
+ s: (u8x8, u8x8),
|
|
+ }
|
|
+
|
|
+ let (i0, i1) = U { j: y }.s;
|
|
+
|
|
+ let r0 = vtbl2_u8(
|
|
+ mem::transmute(x),
|
|
+ crate::mem::transmute(i0)
|
|
+ );
|
|
+ let r1 = vtbl2_u8(
|
|
+ mem::transmute(x),
|
|
+ crate::mem::transmute(i1)
|
|
+ );
|
|
+
|
|
+ let r = U { s: (r0, r1) }.j;
|
|
+
|
|
+ Simd(mem::transmute(r))
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ } else {
|
|
+ impl_fallback!(u8x16);
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+ (u16x8) => {
|
|
+ impl Shuffle1Dyn for u16x8 {
|
|
+ type Indices = Self;
|
|
+ #[inline]
|
|
+ fn shuffle1_dyn(self, indices: Self::Indices) -> Self {
|
|
+ let indices: u8x8 = (indices * 2).cast();
|
|
+ let indices: u8x16 = shuffle!(
|
|
+ indices, [0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7]
|
|
+ );
|
|
+ let v = u8x16::new(
|
|
+ 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1
|
|
+ );
|
|
+ let indices = indices + v;
|
|
+ unsafe {
|
|
+ let s: u8x16 =crate::mem::transmute(self);
|
|
+ crate::mem::transmute(s.shuffle1_dyn(indices))
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+ (u32x4) => {
|
|
+ cfg_if! {
|
|
+ if #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"),
|
|
+ target_feature = "avx"))] {
|
|
+ impl Shuffle1Dyn for u32x4 {
|
|
+ type Indices = Self;
|
|
+ #[inline]
|
|
+ fn shuffle1_dyn(self, indices: Self::Indices) -> Self {
|
|
+ #[cfg(target_arch = "x86")]
|
|
+ use crate::arch::x86::{_mm_permutevar_ps};
|
|
+ #[cfg(target_arch = "x86_64")]
|
|
+ use crate::arch::x86_64::{_mm_permutevar_ps};
|
|
+
|
|
+ unsafe {
|
|
+ crate::mem::transmute(
|
|
+ _mm_permutevar_ps(
|
|
+ crate::mem::transmute(self.0),
|
|
+ crate::mem::transmute(indices.0)
|
|
+ )
|
|
+ )
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ } else {
|
|
+ impl Shuffle1Dyn for u32x4 {
|
|
+ type Indices = Self;
|
|
+ #[inline]
|
|
+ fn shuffle1_dyn(self, indices: Self::Indices) -> Self {
|
|
+ let indices: u8x4 = (indices * 4).cast();
|
|
+ let indices: u8x16 = shuffle!(
|
|
+ indices,
|
|
+ [0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3]
|
|
+ );
|
|
+ let v = u8x16::new(
|
|
+ 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3
|
|
+ );
|
|
+ let indices = indices + v;
|
|
+ unsafe {
|
|
+ let s: u8x16 =crate::mem::transmute(self);
|
|
+ crate::mem::transmute(s.shuffle1_dyn(indices))
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+ (u64x2) => {
|
|
+ cfg_if! {
|
|
+ if #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"),
|
|
+ target_feature = "avx"))] {
|
|
+ impl Shuffle1Dyn for u64x2 {
|
|
+ type Indices = Self;
|
|
+ #[inline]
|
|
+ fn shuffle1_dyn(self, indices: Self::Indices) -> Self {
|
|
+ #[cfg(target_arch = "x86")]
|
|
+ use crate::arch::x86::{_mm_permutevar_pd};
|
|
+ #[cfg(target_arch = "x86_64")]
|
|
+ use crate::arch::x86_64::{_mm_permutevar_pd};
|
|
+ // _mm_permutevar_pd uses the _second_ bit of each
|
|
+ // element to perform the selection, that is: 0b00 => 0,
|
|
+ // 0b10 => 1:
|
|
+ let indices = indices << 1;
|
|
+ unsafe {
|
|
+ crate::mem::transmute(
|
|
+ _mm_permutevar_pd(
|
|
+ crate::mem::transmute(self),
|
|
+ crate::mem::transmute(indices)
|
|
+ )
|
|
+ )
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ } else {
|
|
+ impl Shuffle1Dyn for u64x2 {
|
|
+ type Indices = Self;
|
|
+ #[inline]
|
|
+ fn shuffle1_dyn(self, indices: Self::Indices) -> Self {
|
|
+ let indices: u8x2 = (indices * 8).cast();
|
|
+ let indices: u8x16 = shuffle!(
|
|
+ indices,
|
|
+ [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
|
|
+ );
|
|
+ let v = u8x16::new(
|
|
+ 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7
|
|
+ );
|
|
+ let indices = indices + v;
|
|
+ unsafe {
|
|
+ let s: u8x16 =crate::mem::transmute(self);
|
|
+ crate::mem::transmute(s.shuffle1_dyn(indices))
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+ (u128x1) => {
|
|
+ impl Shuffle1Dyn for u128x1 {
|
|
+ type Indices = Self;
|
|
+ #[inline]
|
|
+ fn shuffle1_dyn(self, _indices: Self::Indices) -> Self {
|
|
+ self
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+ ($id:ident) => { impl_fallback!($id); }
|
|
+}
|
|
+
|
|
+impl_shuffle1_dyn!(u8x2);
|
|
+impl_shuffle1_dyn!(u8x4);
|
|
+impl_shuffle1_dyn!(u8x8);
|
|
+impl_shuffle1_dyn!(u8x16);
|
|
+impl_shuffle1_dyn!(u8x32);
|
|
+impl_shuffle1_dyn!(u8x64);
|
|
+
|
|
+impl_shuffle1_dyn!(u16x2);
|
|
+impl_shuffle1_dyn!(u16x4);
|
|
+impl_shuffle1_dyn!(u16x8);
|
|
+impl_shuffle1_dyn!(u16x16);
|
|
+impl_shuffle1_dyn!(u16x32);
|
|
+
|
|
+impl_shuffle1_dyn!(u32x2);
|
|
+impl_shuffle1_dyn!(u32x4);
|
|
+impl_shuffle1_dyn!(u32x8);
|
|
+impl_shuffle1_dyn!(u32x16);
|
|
+
|
|
+impl_shuffle1_dyn!(u64x2);
|
|
+impl_shuffle1_dyn!(u64x4);
|
|
+impl_shuffle1_dyn!(u64x8);
|
|
+
|
|
+impl_shuffle1_dyn!(usizex2);
|
|
+impl_shuffle1_dyn!(usizex4);
|
|
+impl_shuffle1_dyn!(usizex8);
|
|
+
|
|
+impl_shuffle1_dyn!(u128x1);
|
|
+impl_shuffle1_dyn!(u128x2);
|
|
+impl_shuffle1_dyn!(u128x4);
|
|
+
|
|
+// Implementation for non-unsigned vector types
|
|
+macro_rules! impl_shuffle1_dyn_non_u {
|
|
+ ($id:ident, $uid:ident) => {
|
|
+ impl Shuffle1Dyn for $id {
|
|
+ type Indices = $uid;
|
|
+ #[inline]
|
|
+ fn shuffle1_dyn(self, indices: Self::Indices) -> Self {
|
|
+ unsafe {
|
|
+ let u: $uid = crate::mem::transmute(self);
|
|
+ crate::mem::transmute(u.shuffle1_dyn(indices))
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+}
|
|
+
|
|
+impl_shuffle1_dyn_non_u!(i8x2, u8x2);
|
|
+impl_shuffle1_dyn_non_u!(i8x4, u8x4);
|
|
+impl_shuffle1_dyn_non_u!(i8x8, u8x8);
|
|
+impl_shuffle1_dyn_non_u!(i8x16, u8x16);
|
|
+impl_shuffle1_dyn_non_u!(i8x32, u8x32);
|
|
+impl_shuffle1_dyn_non_u!(i8x64, u8x64);
|
|
+
|
|
+impl_shuffle1_dyn_non_u!(i16x2, u16x2);
|
|
+impl_shuffle1_dyn_non_u!(i16x4, u16x4);
|
|
+impl_shuffle1_dyn_non_u!(i16x8, u16x8);
|
|
+impl_shuffle1_dyn_non_u!(i16x16, u16x16);
|
|
+impl_shuffle1_dyn_non_u!(i16x32, u16x32);
|
|
+
|
|
+impl_shuffle1_dyn_non_u!(i32x2, u32x2);
|
|
+impl_shuffle1_dyn_non_u!(i32x4, u32x4);
|
|
+impl_shuffle1_dyn_non_u!(i32x8, u32x8);
|
|
+impl_shuffle1_dyn_non_u!(i32x16, u32x16);
|
|
+
|
|
+impl_shuffle1_dyn_non_u!(i64x2, u64x2);
|
|
+impl_shuffle1_dyn_non_u!(i64x4, u64x4);
|
|
+impl_shuffle1_dyn_non_u!(i64x8, u64x8);
|
|
+
|
|
+impl_shuffle1_dyn_non_u!(isizex2, usizex2);
|
|
+impl_shuffle1_dyn_non_u!(isizex4, usizex4);
|
|
+impl_shuffle1_dyn_non_u!(isizex8, usizex8);
|
|
+
|
|
+impl_shuffle1_dyn_non_u!(i128x1, u128x1);
|
|
+impl_shuffle1_dyn_non_u!(i128x2, u128x2);
|
|
+impl_shuffle1_dyn_non_u!(i128x4, u128x4);
|
|
+
|
|
+impl_shuffle1_dyn_non_u!(m8x2, u8x2);
|
|
+impl_shuffle1_dyn_non_u!(m8x4, u8x4);
|
|
+impl_shuffle1_dyn_non_u!(m8x8, u8x8);
|
|
+impl_shuffle1_dyn_non_u!(m8x16, u8x16);
|
|
+impl_shuffle1_dyn_non_u!(m8x32, u8x32);
|
|
+impl_shuffle1_dyn_non_u!(m8x64, u8x64);
|
|
+
|
|
+impl_shuffle1_dyn_non_u!(m16x2, u16x2);
|
|
+impl_shuffle1_dyn_non_u!(m16x4, u16x4);
|
|
+impl_shuffle1_dyn_non_u!(m16x8, u16x8);
|
|
+impl_shuffle1_dyn_non_u!(m16x16, u16x16);
|
|
+impl_shuffle1_dyn_non_u!(m16x32, u16x32);
|
|
+
|
|
+impl_shuffle1_dyn_non_u!(m32x2, u32x2);
|
|
+impl_shuffle1_dyn_non_u!(m32x4, u32x4);
|
|
+impl_shuffle1_dyn_non_u!(m32x8, u32x8);
|
|
+impl_shuffle1_dyn_non_u!(m32x16, u32x16);
|
|
+
|
|
+impl_shuffle1_dyn_non_u!(m64x2, u64x2);
|
|
+impl_shuffle1_dyn_non_u!(m64x4, u64x4);
|
|
+impl_shuffle1_dyn_non_u!(m64x8, u64x8);
|
|
+
|
|
+impl_shuffle1_dyn_non_u!(msizex2, usizex2);
|
|
+impl_shuffle1_dyn_non_u!(msizex4, usizex4);
|
|
+impl_shuffle1_dyn_non_u!(msizex8, usizex8);
|
|
+
|
|
+impl_shuffle1_dyn_non_u!(m128x1, u128x1);
|
|
+impl_shuffle1_dyn_non_u!(m128x2, u128x2);
|
|
+impl_shuffle1_dyn_non_u!(m128x4, u128x4);
|
|
+
|
|
+impl_shuffle1_dyn_non_u!(f32x2, u32x2);
|
|
+impl_shuffle1_dyn_non_u!(f32x4, u32x4);
|
|
+impl_shuffle1_dyn_non_u!(f32x8, u32x8);
|
|
+impl_shuffle1_dyn_non_u!(f32x16, u32x16);
|
|
+
|
|
+impl_shuffle1_dyn_non_u!(f64x2, u64x2);
|
|
+impl_shuffle1_dyn_non_u!(f64x4, u64x4);
|
|
+impl_shuffle1_dyn_non_u!(f64x8, u64x8);
|
|
+
|
|
+// Implementation for non-unsigned vector types
|
|
+macro_rules! impl_shuffle1_dyn_ptr {
|
|
+ ($id:ident, $uid:ident) => {
|
|
+ impl<T> Shuffle1Dyn for $id<T> {
|
|
+ type Indices = $uid;
|
|
+ #[inline]
|
|
+ fn shuffle1_dyn(self, indices: Self::Indices) -> Self {
|
|
+ unsafe {
|
|
+ let u: $uid = crate::mem::transmute(self);
|
|
+ crate::mem::transmute(u.shuffle1_dyn(indices))
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+}
|
|
+
|
|
+impl_shuffle1_dyn_ptr!(cptrx2, usizex2);
|
|
+impl_shuffle1_dyn_ptr!(cptrx4, usizex4);
|
|
+impl_shuffle1_dyn_ptr!(cptrx8, usizex8);
|
|
+
|
|
+impl_shuffle1_dyn_ptr!(mptrx2, usizex2);
|
|
+impl_shuffle1_dyn_ptr!(mptrx4, usizex4);
|
|
+impl_shuffle1_dyn_ptr!(mptrx8, usizex8);
|
|
diff --git a/third_party/rust/packed_simd/src/codegen/swap_bytes.rs b/third_party/rust/packed_simd/src/codegen/swap_bytes.rs
|
|
new file mode 100644
|
|
index 000000000000..b435fb5da120
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/codegen/swap_bytes.rs
|
|
@@ -0,0 +1,189 @@
|
|
+//! Horizontal swap bytes reductions.
|
|
+
|
|
+// FIXME: investigate using `llvm.bswap`
|
|
+// https://github.com/rust-lang-nursery/packed_simd/issues/19
|
|
+
|
|
+use crate::*;
|
|
+
|
|
+crate trait SwapBytes {
|
|
+ fn swap_bytes(self) -> Self;
|
|
+}
|
|
+
|
|
+macro_rules! impl_swap_bytes {
|
|
+ (v16: $($id:ident,)+) => {
|
|
+ $(
|
|
+ impl SwapBytes for $id {
|
|
+ #[inline]
|
|
+ fn swap_bytes(self) -> Self {
|
|
+ unsafe { shuffle!(self, [1, 0]) }
|
|
+ }
|
|
+ }
|
|
+ )+
|
|
+ };
|
|
+ (v32: $($id:ident,)+) => {
|
|
+ $(
|
|
+ impl SwapBytes for $id {
|
|
+ #[inline]
|
|
+ #[allow(clippy::useless_transmute)]
|
|
+ fn swap_bytes(self) -> Self {
|
|
+ unsafe {
|
|
+ let bytes: u8x4 = crate::mem::transmute(self);
|
|
+ let result: u8x4 = shuffle!(bytes, [3, 2, 1, 0]);
|
|
+ crate::mem::transmute(result)
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ )+
|
|
+ };
|
|
+ (v64: $($id:ident,)+) => {
|
|
+ $(
|
|
+ impl SwapBytes for $id {
|
|
+ #[inline]
|
|
+ #[allow(clippy::useless_transmute)]
|
|
+ fn swap_bytes(self) -> Self {
|
|
+ unsafe {
|
|
+ let bytes: u8x8 = crate::mem::transmute(self);
|
|
+ let result: u8x8 = shuffle!(
|
|
+ bytes, [7, 6, 5, 4, 3, 2, 1, 0]
|
|
+ );
|
|
+ crate::mem::transmute(result)
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ )+
|
|
+ };
|
|
+ (v128: $($id:ident,)+) => {
|
|
+ $(
|
|
+ impl SwapBytes for $id {
|
|
+ #[inline]
|
|
+ #[allow(clippy::useless_transmute)]
|
|
+ fn swap_bytes(self) -> Self {
|
|
+ unsafe {
|
|
+ let bytes: u8x16 = crate::mem::transmute(self);
|
|
+ let result: u8x16 = shuffle!(bytes, [
|
|
+ 15, 14, 13, 12, 11, 10, 9, 8,
|
|
+ 7, 6, 5, 4, 3, 2, 1, 0
|
|
+ ]);
|
|
+ crate::mem::transmute(result)
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ )+
|
|
+ };
|
|
+ (v256: $($id:ident,)+) => {
|
|
+ $(
|
|
+ impl SwapBytes for $id {
|
|
+ #[inline]
|
|
+ #[allow(clippy::useless_transmute)]
|
|
+ fn swap_bytes(self) -> Self {
|
|
+ unsafe {
|
|
+ let bytes: u8x32 = crate::mem::transmute(self);
|
|
+ let result: u8x32 = shuffle!(bytes, [
|
|
+ 31, 30, 29, 28, 27, 26, 25, 24,
|
|
+ 23, 22, 21, 20, 19, 18, 17, 16,
|
|
+ 15, 14, 13, 12, 11, 10, 9, 8,
|
|
+ 7, 6, 5, 4, 3, 2, 1, 0
|
|
+ ]);
|
|
+ crate::mem::transmute(result)
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ )+
|
|
+ };
|
|
+ (v512: $($id:ident,)+) => {
|
|
+ $(
|
|
+ impl SwapBytes for $id {
|
|
+ #[inline]
|
|
+ #[allow(clippy::useless_transmute)]
|
|
+ fn swap_bytes(self) -> Self {
|
|
+ unsafe {
|
|
+ let bytes: u8x64 = crate::mem::transmute(self);
|
|
+ let result: u8x64 = shuffle!(bytes, [
|
|
+ 63, 62, 61, 60, 59, 58, 57, 56,
|
|
+ 55, 54, 53, 52, 51, 50, 49, 48,
|
|
+ 47, 46, 45, 44, 43, 42, 41, 40,
|
|
+ 39, 38, 37, 36, 35, 34, 33, 32,
|
|
+ 31, 30, 29, 28, 27, 26, 25, 24,
|
|
+ 23, 22, 21, 20, 19, 18, 17, 16,
|
|
+ 15, 14, 13, 12, 11, 10, 9, 8,
|
|
+ 7, 6, 5, 4, 3, 2, 1, 0
|
|
+ ]);
|
|
+ crate::mem::transmute(result)
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ )+
|
|
+ };
|
|
+}
|
|
+
|
|
+impl_swap_bytes!(v16: u8x2, i8x2,);
|
|
+impl_swap_bytes!(v32: u8x4, i8x4, u16x2, i16x2,);
|
|
+// FIXME: 64-bit single element vector
|
|
+impl_swap_bytes!(
|
|
+ v64: u8x8,
|
|
+ i8x8,
|
|
+ u16x4,
|
|
+ i16x4,
|
|
+ u32x2,
|
|
+ i32x2, /* u64x1, i64x1, */
|
|
+);
|
|
+
|
|
+impl_swap_bytes!(
|
|
+ v128: u8x16,
|
|
+ i8x16,
|
|
+ u16x8,
|
|
+ i16x8,
|
|
+ u32x4,
|
|
+ i32x4,
|
|
+ u64x2,
|
|
+ i64x2,
|
|
+ u128x1,
|
|
+ i128x1,
|
|
+);
|
|
+impl_swap_bytes!(
|
|
+ v256: u8x32,
|
|
+ i8x32,
|
|
+ u16x16,
|
|
+ i16x16,
|
|
+ u32x8,
|
|
+ i32x8,
|
|
+ u64x4,
|
|
+ i64x4,
|
|
+ u128x2,
|
|
+ i128x2,
|
|
+);
|
|
+
|
|
+impl_swap_bytes!(
|
|
+ v512: u8x64,
|
|
+ i8x64,
|
|
+ u16x32,
|
|
+ i16x32,
|
|
+ u32x16,
|
|
+ i32x16,
|
|
+ u64x8,
|
|
+ i64x8,
|
|
+ u128x4,
|
|
+ i128x4,
|
|
+);
|
|
+
|
|
+cfg_if! {
|
|
+ if #[cfg(target_pointer_width = "8")] {
|
|
+ impl_swap_bytes!(v16: isizex2, usizex2,);
|
|
+ impl_swap_bytes!(v32: isizex4, usizex4,);
|
|
+ impl_swap_bytes!(v64: isizex8, usizex8,);
|
|
+ } else if #[cfg(target_pointer_width = "16")] {
|
|
+ impl_swap_bytes!(v32: isizex2, usizex2,);
|
|
+ impl_swap_bytes!(v64: isizex4, usizex4,);
|
|
+ impl_swap_bytes!(v128: isizex8, usizex8,);
|
|
+ } else if #[cfg(target_pointer_width = "32")] {
|
|
+ impl_swap_bytes!(v64: isizex2, usizex2,);
|
|
+ impl_swap_bytes!(v128: isizex4, usizex4,);
|
|
+ impl_swap_bytes!(v256: isizex8, usizex8,);
|
|
+ } else if #[cfg(target_pointer_width = "64")] {
|
|
+ impl_swap_bytes!(v128: isizex2, usizex2,);
|
|
+ impl_swap_bytes!(v256: isizex4, usizex4,);
|
|
+ impl_swap_bytes!(v512: isizex8, usizex8,);
|
|
+ } else {
|
|
+ compile_error!("unsupported target_pointer_width");
|
|
+ }
|
|
+}
|
|
diff --git a/third_party/rust/packed_simd/src/codegen/v128.rs b/third_party/rust/packed_simd/src/codegen/v128.rs
|
|
new file mode 100644
|
|
index 000000000000..9506424fadad
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/codegen/v128.rs
|
|
@@ -0,0 +1,46 @@
|
|
+//! Internal 128-bit wide vector types
|
|
+
|
|
+use crate::masks::*;
|
|
+
|
|
+#[rustfmt::skip]
|
|
+impl_simd_array!(
|
|
+ [i8; 16]: i8x16 |
|
|
+ i8, i8, i8, i8,
|
|
+ i8, i8, i8, i8,
|
|
+ i8, i8, i8, i8,
|
|
+ i8, i8, i8, i8
|
|
+);
|
|
+#[rustfmt::skip]
|
|
+impl_simd_array!(
|
|
+ [u8; 16]: u8x16 |
|
|
+ u8, u8, u8, u8,
|
|
+ u8, u8, u8, u8,
|
|
+ u8, u8, u8, u8,
|
|
+ u8, u8, u8, u8
|
|
+);
|
|
+#[rustfmt::skip]
|
|
+impl_simd_array!(
|
|
+ [m8; 16]: m8x16 |
|
|
+ i8, i8, i8, i8,
|
|
+ i8, i8, i8, i8,
|
|
+ i8, i8, i8, i8,
|
|
+ i8, i8, i8, i8
|
|
+);
|
|
+
|
|
+impl_simd_array!([i16; 8]: i16x8 | i16, i16, i16, i16, i16, i16, i16, i16);
|
|
+impl_simd_array!([u16; 8]: u16x8 | u16, u16, u16, u16, u16, u16, u16, u16);
|
|
+impl_simd_array!([m16; 8]: m16x8 | i16, i16, i16, i16, i16, i16, i16, i16);
|
|
+
|
|
+impl_simd_array!([i32; 4]: i32x4 | i32, i32, i32, i32);
|
|
+impl_simd_array!([u32; 4]: u32x4 | u32, u32, u32, u32);
|
|
+impl_simd_array!([f32; 4]: f32x4 | f32, f32, f32, f32);
|
|
+impl_simd_array!([m32; 4]: m32x4 | i32, i32, i32, i32);
|
|
+
|
|
+impl_simd_array!([i64; 2]: i64x2 | i64, i64);
|
|
+impl_simd_array!([u64; 2]: u64x2 | u64, u64);
|
|
+impl_simd_array!([f64; 2]: f64x2 | f64, f64);
|
|
+impl_simd_array!([m64; 2]: m64x2 | i64, i64);
|
|
+
|
|
+impl_simd_array!([i128; 1]: i128x1 | i128);
|
|
+impl_simd_array!([u128; 1]: u128x1 | u128);
|
|
+impl_simd_array!([m128; 1]: m128x1 | i128);
|
|
diff --git a/third_party/rust/packed_simd/src/codegen/v16.rs b/third_party/rust/packed_simd/src/codegen/v16.rs
|
|
new file mode 100644
|
|
index 000000000000..4d55a6d8998e
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/codegen/v16.rs
|
|
@@ -0,0 +1,7 @@
|
|
+//! Internal 16-bit wide vector types
|
|
+
|
|
+use crate::masks::*;
|
|
+
|
|
+impl_simd_array!([i8; 2]: i8x2 | i8, i8);
|
|
+impl_simd_array!([u8; 2]: u8x2 | u8, u8);
|
|
+impl_simd_array!([m8; 2]: m8x2 | i8, i8);
|
|
diff --git a/third_party/rust/packed_simd/src/codegen/v256.rs b/third_party/rust/packed_simd/src/codegen/v256.rs
|
|
new file mode 100644
|
|
index 000000000000..5ca4759f0c0a
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/codegen/v256.rs
|
|
@@ -0,0 +1,78 @@
|
|
+//! Internal 256-bit wide vector types
|
|
+
|
|
+use crate::masks::*;
|
|
+
|
|
+#[rustfmt::skip]
|
|
+impl_simd_array!(
|
|
+ [i8; 32]: i8x32 |
|
|
+ i8, i8, i8, i8,
|
|
+ i8, i8, i8, i8,
|
|
+ i8, i8, i8, i8,
|
|
+ i8, i8, i8, i8,
|
|
+ i8, i8, i8, i8,
|
|
+ i8, i8, i8, i8,
|
|
+ i8, i8, i8, i8,
|
|
+ i8, i8, i8, i8
|
|
+);
|
|
+#[rustfmt::skip]
|
|
+impl_simd_array!(
|
|
+ [u8; 32]: u8x32 |
|
|
+ u8, u8, u8, u8,
|
|
+ u8, u8, u8, u8,
|
|
+ u8, u8, u8, u8,
|
|
+ u8, u8, u8, u8,
|
|
+ u8, u8, u8, u8,
|
|
+ u8, u8, u8, u8,
|
|
+ u8, u8, u8, u8,
|
|
+ u8, u8, u8, u8
|
|
+);
|
|
+#[rustfmt::skip]
|
|
+impl_simd_array!(
|
|
+ [m8; 32]: m8x32 |
|
|
+ i8, i8, i8, i8,
|
|
+ i8, i8, i8, i8,
|
|
+ i8, i8, i8, i8,
|
|
+ i8, i8, i8, i8,
|
|
+ i8, i8, i8, i8,
|
|
+ i8, i8, i8, i8,
|
|
+ i8, i8, i8, i8,
|
|
+ i8, i8, i8, i8
|
|
+);
|
|
+#[rustfmt::skip]
|
|
+impl_simd_array!(
|
|
+ [i16; 16]: i16x16 |
|
|
+ i16, i16, i16, i16,
|
|
+ i16, i16, i16, i16,
|
|
+ i16, i16, i16, i16,
|
|
+ i16, i16, i16, i16
|
|
+);
|
|
+#[rustfmt::skip]
|
|
+impl_simd_array!(
|
|
+ [u16; 16]: u16x16 |
|
|
+ u16, u16, u16, u16,
|
|
+ u16, u16, u16, u16,
|
|
+ u16, u16, u16, u16,
|
|
+ u16, u16, u16, u16
|
|
+);
|
|
+#[rustfmt::skip]
|
|
+impl_simd_array!(
|
|
+ [m16; 16]: m16x16 |
|
|
+ i16, i16, i16, i16,
|
|
+ i16, i16, i16, i16,
|
|
+ i16, i16, i16, i16,
|
|
+ i16, i16, i16, i16
|
|
+);
|
|
+
|
|
+impl_simd_array!([i32; 8]: i32x8 | i32, i32, i32, i32, i32, i32, i32, i32);
|
|
+impl_simd_array!([u32; 8]: u32x8 | u32, u32, u32, u32, u32, u32, u32, u32);
|
|
+impl_simd_array!([f32; 8]: f32x8 | f32, f32, f32, f32, f32, f32, f32, f32);
|
|
+impl_simd_array!([m32; 8]: m32x8 | i32, i32, i32, i32, i32, i32, i32, i32);
|
|
+
|
|
+impl_simd_array!([i64; 4]: i64x4 | i64, i64, i64, i64);
|
|
+impl_simd_array!([u64; 4]: u64x4 | u64, u64, u64, u64);
|
|
+impl_simd_array!([f64; 4]: f64x4 | f64, f64, f64, f64);
|
|
+impl_simd_array!([m64; 4]: m64x4 | i64, i64, i64, i64);
|
|
+
|
|
+impl_simd_array!([i128; 2]: i128x2 | i128, i128);
|
|
+impl_simd_array!([u128; 2]: u128x2 | u128, u128);
|
|
+impl_simd_array!([m128; 2]: m128x2 | i128, i128);
|
|
diff --git a/third_party/rust/packed_simd/src/codegen/v32.rs b/third_party/rust/packed_simd/src/codegen/v32.rs
|
|
new file mode 100644
|
|
index 000000000000..ae1dabd00c22
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/codegen/v32.rs
|
|
@@ -0,0 +1,11 @@
|
|
+//! Internal 32-bit wide vector types
|
|
+
|
|
+use crate::masks::*;
|
|
+
|
|
+impl_simd_array!([i8; 4]: i8x4 | i8, i8, i8, i8);
|
|
+impl_simd_array!([u8; 4]: u8x4 | u8, u8, u8, u8);
|
|
+impl_simd_array!([m8; 4]: m8x4 | i8, i8, i8, i8);
|
|
+
|
|
+impl_simd_array!([i16; 2]: i16x2 | i16, i16);
|
|
+impl_simd_array!([u16; 2]: u16x2 | u16, u16);
|
|
+impl_simd_array!([m16; 2]: m16x2 | i16, i16);
|
|
diff --git a/third_party/rust/packed_simd/src/codegen/v512.rs b/third_party/rust/packed_simd/src/codegen/v512.rs
|
|
new file mode 100644
|
|
index 000000000000..bf95110340d6
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/codegen/v512.rs
|
|
@@ -0,0 +1,145 @@
|
|
+//! Internal 512-bit wide vector types
|
|
+
|
|
+use crate::masks::*;
|
|
+
|
|
+#[rustfmt::skip]
|
|
+impl_simd_array!(
|
|
+ [i8; 64]: i8x64 |
|
|
+ i8, i8, i8, i8,
|
|
+ i8, i8, i8, i8,
|
|
+ i8, i8, i8, i8,
|
|
+ i8, i8, i8, i8,
|
|
+ i8, i8, i8, i8,
|
|
+ i8, i8, i8, i8,
|
|
+ i8, i8, i8, i8,
|
|
+ i8, i8, i8, i8,
|
|
+
|
|
+ i8, i8, i8, i8,
|
|
+ i8, i8, i8, i8,
|
|
+ i8, i8, i8, i8,
|
|
+ i8, i8, i8, i8,
|
|
+ i8, i8, i8, i8,
|
|
+ i8, i8, i8, i8,
|
|
+ i8, i8, i8, i8,
|
|
+ i8, i8, i8, i8
|
|
+);
|
|
+#[rustfmt::skip]
|
|
+impl_simd_array!(
|
|
+ [u8; 64]: u8x64 |
|
|
+ u8, u8, u8, u8,
|
|
+ u8, u8, u8, u8,
|
|
+ u8, u8, u8, u8,
|
|
+ u8, u8, u8, u8,
|
|
+ u8, u8, u8, u8,
|
|
+ u8, u8, u8, u8,
|
|
+ u8, u8, u8, u8,
|
|
+ u8, u8, u8, u8,
|
|
+
|
|
+ u8, u8, u8, u8,
|
|
+ u8, u8, u8, u8,
|
|
+ u8, u8, u8, u8,
|
|
+ u8, u8, u8, u8,
|
|
+ u8, u8, u8, u8,
|
|
+ u8, u8, u8, u8,
|
|
+ u8, u8, u8, u8,
|
|
+ u8, u8, u8, u8
|
|
+);
|
|
+#[rustfmt::skip]
|
|
+impl_simd_array!(
|
|
+ [m8; 64]: m8x64 |
|
|
+ i8, i8, i8, i8,
|
|
+ i8, i8, i8, i8,
|
|
+ i8, i8, i8, i8,
|
|
+ i8, i8, i8, i8,
|
|
+ i8, i8, i8, i8,
|
|
+ i8, i8, i8, i8,
|
|
+ i8, i8, i8, i8,
|
|
+ i8, i8, i8, i8,
|
|
+
|
|
+ i8, i8, i8, i8,
|
|
+ i8, i8, i8, i8,
|
|
+ i8, i8, i8, i8,
|
|
+ i8, i8, i8, i8,
|
|
+ i8, i8, i8, i8,
|
|
+ i8, i8, i8, i8,
|
|
+ i8, i8, i8, i8,
|
|
+ i8, i8, i8, i8
|
|
+);
|
|
+#[rustfmt::skip]
|
|
+impl_simd_array!(
|
|
+ [i16; 32]: i16x32 |
|
|
+ i16, i16, i16, i16,
|
|
+ i16, i16, i16, i16,
|
|
+ i16, i16, i16, i16,
|
|
+ i16, i16, i16, i16,
|
|
+ i16, i16, i16, i16,
|
|
+ i16, i16, i16, i16,
|
|
+ i16, i16, i16, i16,
|
|
+ i16, i16, i16, i16
|
|
+);
|
|
+#[rustfmt::skip]
|
|
+impl_simd_array!(
|
|
+ [u16; 32]: u16x32 |
|
|
+ u16, u16, u16, u16,
|
|
+ u16, u16, u16, u16,
|
|
+ u16, u16, u16, u16,
|
|
+ u16, u16, u16, u16,
|
|
+ u16, u16, u16, u16,
|
|
+ u16, u16, u16, u16,
|
|
+ u16, u16, u16, u16,
|
|
+ u16, u16, u16, u16
|
|
+);
|
|
+#[rustfmt::skip]
|
|
+impl_simd_array!(
|
|
+ [m16; 32]: m16x32 |
|
|
+ i16, i16, i16, i16,
|
|
+ i16, i16, i16, i16,
|
|
+ i16, i16, i16, i16,
|
|
+ i16, i16, i16, i16,
|
|
+ i16, i16, i16, i16,
|
|
+ i16, i16, i16, i16,
|
|
+ i16, i16, i16, i16,
|
|
+ i16, i16, i16, i16
|
|
+);
|
|
+
|
|
+#[rustfmt::skip]
|
|
+impl_simd_array!(
|
|
+ [i32; 16]: i32x16 |
|
|
+ i32, i32, i32, i32,
|
|
+ i32, i32, i32, i32,
|
|
+ i32, i32, i32, i32,
|
|
+ i32, i32, i32, i32
|
|
+);
|
|
+#[rustfmt::skip]
|
|
+impl_simd_array!(
|
|
+ [u32; 16]: u32x16 |
|
|
+ u32, u32, u32, u32,
|
|
+ u32, u32, u32, u32,
|
|
+ u32, u32, u32, u32,
|
|
+ u32, u32, u32, u32
|
|
+);
|
|
+#[rustfmt::skip]
|
|
+impl_simd_array!(
|
|
+ [f32; 16]: f32x16 |
|
|
+ f32, f32, f32, f32,
|
|
+ f32, f32, f32, f32,
|
|
+ f32, f32, f32, f32,
|
|
+ f32, f32, f32, f32
|
|
+);
|
|
+#[rustfmt::skip]
|
|
+impl_simd_array!(
|
|
+ [m32; 16]: m32x16 |
|
|
+ i32, i32, i32, i32,
|
|
+ i32, i32, i32, i32,
|
|
+ i32, i32, i32, i32,
|
|
+ i32, i32, i32, i32
|
|
+);
|
|
+
|
|
+impl_simd_array!([i64; 8]: i64x8 | i64, i64, i64, i64, i64, i64, i64, i64);
|
|
+impl_simd_array!([u64; 8]: u64x8 | u64, u64, u64, u64, u64, u64, u64, u64);
|
|
+impl_simd_array!([f64; 8]: f64x8 | f64, f64, f64, f64, f64, f64, f64, f64);
|
|
+impl_simd_array!([m64; 8]: m64x8 | i64, i64, i64, i64, i64, i64, i64, i64);
|
|
+
|
|
+impl_simd_array!([i128; 4]: i128x4 | i128, i128, i128, i128);
|
|
+impl_simd_array!([u128; 4]: u128x4 | u128, u128, u128, u128);
|
|
+impl_simd_array!([m128; 4]: m128x4 | i128, i128, i128, i128);
|
|
diff --git a/third_party/rust/packed_simd/src/codegen/v64.rs b/third_party/rust/packed_simd/src/codegen/v64.rs
|
|
new file mode 100644
|
|
index 000000000000..3cfb67c1a013
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/codegen/v64.rs
|
|
@@ -0,0 +1,21 @@
|
|
+//! Internal 64-bit wide vector types
|
|
+
|
|
+use crate::masks::*;
|
|
+
|
|
+impl_simd_array!([i8; 8]: i8x8 | i8, i8, i8, i8, i8, i8, i8, i8);
|
|
+impl_simd_array!([u8; 8]: u8x8 | u8, u8, u8, u8, u8, u8, u8, u8);
|
|
+impl_simd_array!([m8; 8]: m8x8 | i8, i8, i8, i8, i8, i8, i8, i8);
|
|
+
|
|
+impl_simd_array!([i16; 4]: i16x4 | i16, i16, i16, i16);
|
|
+impl_simd_array!([u16; 4]: u16x4 | u16, u16, u16, u16);
|
|
+impl_simd_array!([m16; 4]: m16x4 | i16, i16, i16, i16);
|
|
+
|
|
+impl_simd_array!([i32; 2]: i32x2 | i32, i32);
|
|
+impl_simd_array!([u32; 2]: u32x2 | u32, u32);
|
|
+impl_simd_array!([f32; 2]: f32x2 | f32, f32);
|
|
+impl_simd_array!([m32; 2]: m32x2 | i32, i32);
|
|
+
|
|
+impl_simd_array!([i64; 1]: i64x1 | i64);
|
|
+impl_simd_array!([u64; 1]: u64x1 | u64);
|
|
+impl_simd_array!([f64; 1]: f64x1 | f64);
|
|
+impl_simd_array!([m64; 1]: m64x1 | i64);
|
|
diff --git a/third_party/rust/packed_simd/src/codegen/vPtr.rs b/third_party/rust/packed_simd/src/codegen/vPtr.rs
|
|
new file mode 100644
|
|
index 000000000000..1f2bc7714dd9
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/codegen/vPtr.rs
|
|
@@ -0,0 +1,33 @@
|
|
+//! Pointer vector types
|
|
+
|
|
+macro_rules! impl_simd_ptr {
|
|
+ ([$ptr_ty:ty; $elem_count:expr]: $tuple_id:ident | $ty:ident
|
|
+ | $($tys:ty),*) => {
|
|
+ #[derive(Copy, Clone)]
|
|
+ #[repr(simd)]
|
|
+ pub struct $tuple_id<$ty>($(crate $tys),*);
|
|
+ //^^^^^^^ leaked through SimdArray
|
|
+
|
|
+ impl<$ty> crate::sealed::SimdArray for [$ptr_ty; $elem_count] {
|
|
+ type Tuple = $tuple_id<$ptr_ty>;
|
|
+ type T = $ptr_ty;
|
|
+ const N: usize = $elem_count;
|
|
+ type NT = [u32; $elem_count];
|
|
+ }
|
|
+
|
|
+ impl<$ty> crate::sealed::Simd for $tuple_id<$ptr_ty> {
|
|
+ type Element = $ptr_ty;
|
|
+ const LANES: usize = $elem_count;
|
|
+ type LanesType = [u32; $elem_count];
|
|
+ }
|
|
+
|
|
+ }
|
|
+}
|
|
+
|
|
+impl_simd_ptr!([*const T; 2]: cptrx2 | T | T, T);
|
|
+impl_simd_ptr!([*const T; 4]: cptrx4 | T | T, T, T, T);
|
|
+impl_simd_ptr!([*const T; 8]: cptrx8 | T | T, T, T, T, T, T, T, T);
|
|
+
|
|
+impl_simd_ptr!([*mut T; 2]: mptrx2 | T | T, T);
|
|
+impl_simd_ptr!([*mut T; 4]: mptrx4 | T | T, T, T, T);
|
|
+impl_simd_ptr!([*mut T; 8]: mptrx8 | T | T, T, T, T, T, T, T, T);
|
|
diff --git a/third_party/rust/packed_simd/src/codegen/vSize.rs b/third_party/rust/packed_simd/src/codegen/vSize.rs
|
|
new file mode 100644
|
|
index 000000000000..3911b21340c8
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/codegen/vSize.rs
|
|
@@ -0,0 +1,43 @@
|
|
+//! Vector types with pointer-sized elements
|
|
+
|
|
+use crate::codegen::pointer_sized_int::{isize_, usize_};
|
|
+use crate::masks::*;
|
|
+
|
|
+impl_simd_array!([isize; 2]: isizex2 | isize_, isize_);
|
|
+impl_simd_array!([usize; 2]: usizex2 | usize_, usize_);
|
|
+impl_simd_array!([msize; 2]: msizex2 | isize_, isize_);
|
|
+
|
|
+impl_simd_array!([isize; 4]: isizex4 | isize_, isize_, isize_, isize_);
|
|
+impl_simd_array!([usize; 4]: usizex4 | usize_, usize_, usize_, usize_);
|
|
+impl_simd_array!([msize; 4]: msizex4 | isize_, isize_, isize_, isize_);
|
|
+
|
|
+impl_simd_array!(
|
|
+ [isize; 8]: isizex8 | isize_,
|
|
+ isize_,
|
|
+ isize_,
|
|
+ isize_,
|
|
+ isize_,
|
|
+ isize_,
|
|
+ isize_,
|
|
+ isize_
|
|
+);
|
|
+impl_simd_array!(
|
|
+ [usize; 8]: usizex8 | usize_,
|
|
+ usize_,
|
|
+ usize_,
|
|
+ usize_,
|
|
+ usize_,
|
|
+ usize_,
|
|
+ usize_,
|
|
+ usize_
|
|
+);
|
|
+impl_simd_array!(
|
|
+ [msize; 8]: msizex8 | isize_,
|
|
+ isize_,
|
|
+ isize_,
|
|
+ isize_,
|
|
+ isize_,
|
|
+ isize_,
|
|
+ isize_,
|
|
+ isize_
|
|
+);
|
|
diff --git a/third_party/rust/packed_simd/src/lib.rs b/third_party/rust/packed_simd/src/lib.rs
|
|
new file mode 100644
|
|
index 000000000000..d73645e72fbe
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/lib.rs
|
|
@@ -0,0 +1,327 @@
|
|
+//! # Portable packed SIMD vectors
|
|
+//!
|
|
+//! This crate is proposed for stabilization as `std::packed_simd` in [RFC2366:
|
|
+//! `std::simd`](https://github.com/rust-lang/rfcs/pull/2366) .
|
|
+//!
|
|
+//! The examples available in the
|
|
+//! [`examples/`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples)
|
|
+//! sub-directory of the crate showcase how to use the library in practice.
|
|
+//!
|
|
+//! ## Table of contents
|
|
+//!
|
|
+//! - [Introduction](#introduction)
|
|
+//! - [Vector types](#vector-types)
|
|
+//! - [Conditional operations](#conditional-operations)
|
|
+//! - [Conversions](#conversions)
|
|
+//! - [Performance
|
|
+//! guide](https://rust-lang-nursery.github.io/packed_simd/perf-guide/)
|
|
+//!
|
|
+//! ## Introduction
|
|
+//!
|
|
+//! This crate exports [`Simd<[T; N]>`][`Simd`]: a packed vector of `N`
|
|
+//! elements of type `T` as well as many type aliases for this type: for
|
|
+//! example, [`f32x4`], which is just an alias for `Simd<[f32; 4]>`.
|
|
+//!
|
|
+//! The operations on packed vectors are, by default, "vertical", that is, they
|
|
+//! are applied to each vector lane in isolation of the others:
|
|
+//!
|
|
+//! ```
|
|
+//! # use packed_simd::*;
|
|
+//! let a = i32x4::new(1, 2, 3, 4);
|
|
+//! let b = i32x4::new(5, 6, 7, 8);
|
|
+//! assert_eq!(a + b, i32x4::new(6, 8, 10, 12));
|
|
+//! ```
|
|
+//!
|
|
+//! Many "horizontal" operations are also provided:
|
|
+//!
|
|
+//! ```
|
|
+//! # use packed_simd::*;
|
|
+//! # let a = i32x4::new(1, 2, 3, 4);
|
|
+//! assert_eq!(a.wrapping_sum(), 10);
|
|
+//! ```
|
|
+//!
|
|
+//! In virtually all architectures vertical operations are fast, while
|
|
+//! horizontal operations are, by comparison, much slower. That is, the
|
|
+//! most portably-efficient way of performing a reduction over a slice
|
|
+//! is to collect the results into a vector using vertical operations,
|
|
+//! and performing a single horizontal operation at the end:
|
|
+//!
|
|
+//! ```
|
|
+//! # use packed_simd::*;
|
|
+//! fn reduce(x: &[i32]) -> i32 {
|
|
+//! assert!(x.len() % 4 == 0);
|
|
+//! let mut sum = i32x4::splat(0); // [0, 0, 0, 0]
|
|
+//! for i in (0..x.len()).step_by(4) {
|
|
+//! sum += i32x4::from_slice_unaligned(&x[i..]);
|
|
+//! }
|
|
+//! sum.wrapping_sum()
|
|
+//! }
|
|
+//!
|
|
+//! let x = [0, 1, 2, 3, 4, 5, 6, 7];
|
|
+//! assert_eq!(reduce(&x), 28);
|
|
+//! ```
|
|
+//!
|
|
+//! ## Vector types
|
|
+//!
|
|
+//! The vector type aliases are named according to the following scheme:
|
|
+//!
|
|
+//! > `{element_type}x{number_of_lanes} == Simd<[element_type;
|
|
+//! number_of_lanes]>`
|
|
+//!
|
|
+//! where the following element types are supported:
|
|
+//!
|
|
+//! * `i{element_width}`: signed integer
|
|
+//! * `u{element_width}`: unsigned integer
|
|
+//! * `f{element_width}`: float
|
|
+//! * `m{element_width}`: mask (see below)
|
|
+//! * `*{const,mut} T`: `const` and `mut` pointers
|
|
+//!
|
|
+//! ## Basic operations
|
|
+//!
|
|
+//! ```
|
|
+//! # use packed_simd::*;
|
|
+//! // Sets all elements to `0`:
|
|
+//! let a = i32x4::splat(0);
|
|
+//!
|
|
+//! // Reads a vector from a slice:
|
|
+//! let mut arr = [0, 0, 0, 1, 2, 3, 4, 5];
|
|
+//! let b = i32x4::from_slice_unaligned(&arr);
|
|
+//!
|
|
+//! // Reads the 4-th element of a vector:
|
|
+//! assert_eq!(b.extract(3), 1);
|
|
+//!
|
|
+//! // Returns a new vector where the 4-th element is replaced with `1`:
|
|
+//! let a = a.replace(3, 1);
|
|
+//! assert_eq!(a, b);
|
|
+//!
|
|
+//! // Writes a vector to a slice:
|
|
+//! let a = a.replace(2, 1);
|
|
+//! a.write_to_slice_unaligned(&mut arr[4..]);
|
|
+//! assert_eq!(arr, [0, 0, 0, 1, 0, 0, 1, 1]);
|
|
+//! ```
|
|
+//!
|
|
+//! ## Conditional operations
|
|
+//!
|
|
+//! One often needs to perform an operation on some lanes of the vector. Vector
|
|
+//! masks, like `m32x4`, allow selecting on which vector lanes an operation is
|
|
+//! to be performed:
|
|
+//!
|
|
+//! ```
|
|
+//! # use packed_simd::*;
|
|
+//! let a = i32x4::new(1, 1, 2, 2);
|
|
+//!
|
|
+//! // Add `1` to the first two lanes of the vector.
|
|
+//! let m = m16x4::new(true, true, false, false);
|
|
+//! let a = m.select(a + 1, a);
|
|
+//! assert_eq!(a, i32x4::splat(2));
|
|
+//! ```
|
|
+//!
|
|
+//! The elements of a vector mask are either `true` or `false`. Here `true`
|
|
+//! means that a lane is "selected", while `false` means that a lane is not
|
|
+//! selected.
|
|
+//!
|
|
+//! All vector masks implement a `mask.select(a: T, b: T) -> T` method that
|
|
+//! works on all vectors that have the same number of lanes as the mask. The
|
|
+//! resulting vector contains the elements of `a` for those lanes for which the
|
|
+//! mask is `true`, and the elements of `b` otherwise.
|
|
+//!
|
|
+//! The example constructs a mask with the first two lanes set to `true` and
|
|
+//! the last two lanes set to `false`. This selects the first two lanes of `a +
|
|
+//! 1` and the last two lanes of `a`, producing a vector where the first two
|
|
+//! lanes have been incremented by `1`.
|
|
+//!
|
|
+//! > note: mask `select` can be used on vector types that have the same number
|
|
+//! > of lanes as the mask. The example shows this by using [`m16x4`] instead
|
|
+//! > of [`m32x4`]. It is _typically_ more performant to use a mask element
|
|
+//! > width equal to the element width of the vectors being operated upon.
|
|
+//! > This is, however, not true for 512-bit wide vectors when targetting
|
|
+//! > AVX-512, where the most efficient masks use only 1-bit per element.
|
|
+//!
|
|
+//! All vertical comparison operations returns masks:
|
|
+//!
|
|
+//! ```
|
|
+//! # use packed_simd::*;
|
|
+//! let a = i32x4::new(1, 1, 3, 3);
|
|
+//! let b = i32x4::new(2, 2, 0, 0);
|
|
+//!
|
|
+//! // ge: >= (Greater Eequal; see also lt, le, gt, eq, ne).
|
|
+//! let m = a.ge(i32x4::splat(2));
|
|
+//!
|
|
+//! if m.any() {
|
|
+//! // all / any / none allow coherent control flow
|
|
+//! let d = m.select(a, b);
|
|
+//! assert_eq!(d, i32x4::new(2, 2, 3, 3));
|
|
+//! }
|
|
+//! ```
|
|
+//!
|
|
+//! ## Conversions
|
|
+//!
|
|
+//! * **lossless widening conversions**: [`From`]/[`Into`] are implemented for
|
|
+//! vectors with the same number of lanes when the conversion is value
|
|
+//! preserving (same as in `std`).
|
|
+//!
|
|
+//! * **safe bitwise conversions**: The cargo feature `into_bits` provides the
|
|
+//! `IntoBits/FromBits` traits (`x.into_bits()`). These perform safe bitwise
|
|
+//! `transmute`s when all bit patterns of the source type are valid bit
|
|
+//! patterns of the target type and are also implemented for the
|
|
+//! architecture-specific vector types of `std::arch`. For example, `let x:
|
|
+//! u8x8 = m8x8::splat(true).into_bits();` is provided because all `m8x8` bit
|
|
+//! patterns are valid `u8x8` bit patterns. However, the opposite is not
|
|
+//! true, not all `u8x8` bit patterns are valid `m8x8` bit-patterns, so this
|
|
+//! operation cannot be peformed safely using `x.into_bits()`; one needs to
|
|
+//! use `unsafe { crate::mem::transmute(x) }` for that, making sure that the
|
|
+//! value in the `u8x8` is a valid bit-pattern of `m8x8`.
|
|
+//!
|
|
+//! * **numeric casts** (`as`): are peformed using [`FromCast`]/[`Cast`]
|
|
+//! (`x.cast()`), just like `as`:
|
|
+//!
|
|
+//! * casting integer vectors whose lane types have the same size (e.g.
|
|
+//! `i32xN` -> `u32xN`) is a **no-op**,
|
|
+//!
|
|
+//! * casting from a larger integer to a smaller integer (e.g. `u32xN` ->
|
|
+//! `u8xN`) will **truncate**,
|
|
+//!
|
|
+//! * casting from a smaller integer to a larger integer (e.g. `u8xN` ->
|
|
+//! `u32xN`) will:
|
|
+//! * **zero-extend** if the source is unsigned, or
|
|
+//! * **sign-extend** if the source is signed,
|
|
+//!
|
|
+//! * casting from a float to an integer will **round the float towards
|
|
+//! zero**,
|
|
+//!
|
|
+//! * casting from an integer to float will produce the floating point
|
|
+//! representation of the integer, **rounding to nearest, ties to even**,
|
|
+//!
|
|
+//! * casting from an `f32` to an `f64` is perfect and lossless,
|
|
+//!
|
|
+//! * casting from an `f64` to an `f32` **rounds to nearest, ties to even**.
|
|
+//!
|
|
+//! Numeric casts are not very "precise": sometimes lossy, sometimes value
|
|
+//! preserving, etc.
|
|
+
|
|
+#![feature(
|
|
+ repr_simd,
|
|
+ const_fn,
|
|
+ platform_intrinsics,
|
|
+ stdsimd,
|
|
+ aarch64_target_feature,
|
|
+ arm_target_feature,
|
|
+ link_llvm_intrinsics,
|
|
+ core_intrinsics,
|
|
+ stmt_expr_attributes,
|
|
+ align_offset,
|
|
+ mmx_target_feature,
|
|
+ crate_visibility_modifier,
|
|
+ custom_inner_attributes
|
|
+)]
|
|
+#![allow(non_camel_case_types, non_snake_case,
|
|
+ clippy::cast_possible_truncation,
|
|
+ clippy::cast_lossless,
|
|
+ clippy::cast_possible_wrap,
|
|
+ clippy::cast_precision_loss,
|
|
+ // This lint is currently broken for generic code
|
|
+ // See https://github.com/rust-lang/rust-clippy/issues/3410
|
|
+ clippy::use_self
|
|
+)]
|
|
+#![cfg_attr(test, feature(hashmap_internals))]
|
|
+#![deny(warnings, rust_2018_idioms, clippy::missing_inline_in_public_items)]
|
|
+#![no_std]
|
|
+
|
|
+use cfg_if::cfg_if;
|
|
+
|
|
+cfg_if! {
|
|
+ if #[cfg(feature = "core_arch")] {
|
|
+ #[allow(unused_imports)]
|
|
+ use core_arch as arch;
|
|
+ } else {
|
|
+ #[allow(unused_imports)]
|
|
+ use core::arch;
|
|
+ }
|
|
+}
|
|
+
|
|
+#[cfg(all(target_arch = "wasm32", test))]
|
|
+use wasm_bindgen_test::*;
|
|
+
|
|
+#[allow(unused_imports)]
|
|
+use core::{
|
|
+ /* arch (handled above), */ cmp, f32, f64, fmt, hash, hint, i128,
|
|
+ i16, i32, i64, i8, intrinsics, isize, iter, marker, mem, ops, ptr, slice,
|
|
+ u128, u16, u32, u64, u8, usize,
|
|
+};
|
|
+
|
|
+#[macro_use]
|
|
+mod testing;
|
|
+#[macro_use]
|
|
+mod api;
|
|
+mod codegen;
|
|
+mod sealed;
|
|
+
|
|
+/// Packed SIMD vector type.
|
|
+///
|
|
+/// # Examples
|
|
+///
|
|
+/// ```
|
|
+/// # use packed_simd::Simd;
|
|
+/// let v = Simd::<[i32; 4]>::new(0, 1, 2, 3);
|
|
+/// assert_eq!(v.extract(2), 2);
|
|
+/// ```
|
|
+#[repr(transparent)]
|
|
+#[derive(Copy, Clone)]
|
|
+pub struct Simd<A: sealed::SimdArray>(
|
|
+ // FIXME: this type should be private,
|
|
+ // but it currently must be public for the
|
|
+ // `shuffle!` macro to work: it needs to
|
|
+ // access the internal `repr(simd)` type
|
|
+ // to call the shuffle intrinsics.
|
|
+ #[doc(hidden)] pub <A as sealed::SimdArray>::Tuple,
|
|
+);
|
|
+
|
|
+/// Wrapper over `T` implementing a lexicoraphical order via the `PartialOrd`
|
|
+/// and/or `Ord` traits.
|
|
+#[repr(transparent)]
|
|
+#[derive(Copy, Clone, Debug)]
|
|
+#[allow(clippy::missing_inline_in_public_items)]
|
|
+pub struct LexicographicallyOrdered<T>(T);
|
|
+
|
|
+mod masks;
|
|
+pub use self::masks::*;
|
|
+
|
|
+mod v16;
|
|
+pub use self::v16::*;
|
|
+
|
|
+mod v32;
|
|
+pub use self::v32::*;
|
|
+
|
|
+mod v64;
|
|
+pub use self::v64::*;
|
|
+
|
|
+mod v128;
|
|
+pub use self::v128::*;
|
|
+
|
|
+mod v256;
|
|
+pub use self::v256::*;
|
|
+
|
|
+mod v512;
|
|
+pub use self::v512::*;
|
|
+
|
|
+mod vSize;
|
|
+pub use self::vSize::*;
|
|
+
|
|
+mod vPtr;
|
|
+pub use self::vPtr::*;
|
|
+
|
|
+pub use self::api::cast::*;
|
|
+
|
|
+#[cfg(feature = "into_bits")]
|
|
+pub use self::api::into_bits::*;
|
|
+
|
|
+// Re-export the shuffle intrinsics required by the `shuffle!` macro.
|
|
+#[doc(hidden)]
|
|
+pub use self::codegen::llvm::{
|
|
+ __shuffle_vector16, __shuffle_vector2, __shuffle_vector32,
|
|
+ __shuffle_vector4, __shuffle_vector64, __shuffle_vector8,
|
|
+};
|
|
+
|
|
+crate mod llvm {
|
|
+ crate use crate::codegen::llvm::*;
|
|
+}
|
|
diff --git a/third_party/rust/packed_simd/src/masks.rs b/third_party/rust/packed_simd/src/masks.rs
|
|
new file mode 100644
|
|
index 000000000000..f83c4da95750
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/masks.rs
|
|
@@ -0,0 +1,128 @@
|
|
+//! Mask types
|
|
+
|
|
+macro_rules! impl_mask_ty {
|
|
+ ($id:ident : $elem_ty:ident | #[$doc:meta]) => {
|
|
+ #[$doc]
|
|
+ #[derive(Copy, Clone)]
|
|
+ pub struct $id($elem_ty);
|
|
+
|
|
+ impl crate::sealed::Mask for $id {
|
|
+ fn test(&self) -> bool {
|
|
+ $id::test(self)
|
|
+ }
|
|
+ }
|
|
+
|
|
+ impl $id {
|
|
+ /// Instantiate a mask with `value`
|
|
+ #[inline]
|
|
+ pub fn new(x: bool) -> Self {
|
|
+ if x {
|
|
+ $id(!0)
|
|
+ } else {
|
|
+ $id(0)
|
|
+ }
|
|
+ }
|
|
+ /// Test if the mask is set
|
|
+ #[inline]
|
|
+ pub fn test(&self) -> bool {
|
|
+ self.0 != 0
|
|
+ }
|
|
+ }
|
|
+
|
|
+ impl Default for $id {
|
|
+ #[inline]
|
|
+ fn default() -> Self {
|
|
+ $id(0)
|
|
+ }
|
|
+ }
|
|
+
|
|
+ #[allow(clippy::partialeq_ne_impl)]
|
|
+ impl PartialEq<$id> for $id {
|
|
+ #[inline]
|
|
+ fn eq(&self, other: &Self) -> bool {
|
|
+ self.0 == other.0
|
|
+ }
|
|
+ #[inline]
|
|
+ fn ne(&self, other: &Self) -> bool {
|
|
+ self.0 != other.0
|
|
+ }
|
|
+ }
|
|
+
|
|
+ impl Eq for $id {}
|
|
+
|
|
+ impl PartialOrd<$id> for $id {
|
|
+ #[inline]
|
|
+ fn partial_cmp(
|
|
+ &self, other: &Self,
|
|
+ ) -> Option<crate::cmp::Ordering> {
|
|
+ use crate::cmp::Ordering;
|
|
+ if self == other {
|
|
+ Some(Ordering::Equal)
|
|
+ } else if self.0 > other.0 {
|
|
+ // Note:
|
|
+ // * false = 0_i
|
|
+ // * true == !0_i == -1_i
|
|
+ Some(Ordering::Less)
|
|
+ } else {
|
|
+ Some(Ordering::Greater)
|
|
+ }
|
|
+ }
|
|
+
|
|
+ #[inline]
|
|
+ fn lt(&self, other: &Self) -> bool {
|
|
+ self.0 > other.0
|
|
+ }
|
|
+ #[inline]
|
|
+ fn gt(&self, other: &Self) -> bool {
|
|
+ self.0 < other.0
|
|
+ }
|
|
+ #[inline]
|
|
+ fn le(&self, other: &Self) -> bool {
|
|
+ self.0 >= other.0
|
|
+ }
|
|
+ #[inline]
|
|
+ fn ge(&self, other: &Self) -> bool {
|
|
+ self.0 <= other.0
|
|
+ }
|
|
+ }
|
|
+
|
|
+ impl Ord for $id {
|
|
+ #[inline]
|
|
+ fn cmp(&self, other: &Self) -> crate::cmp::Ordering {
|
|
+ match self.partial_cmp(other) {
|
|
+ Some(x) => x,
|
|
+ None => unsafe { crate::hint::unreachable_unchecked() },
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ impl crate::hash::Hash for $id {
|
|
+ #[inline]
|
|
+ fn hash<H: crate::hash::Hasher>(&self, state: &mut H) {
|
|
+ (self.0 != 0).hash(state);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ impl crate::fmt::Debug for $id {
|
|
+ #[inline]
|
|
+ fn fmt(
|
|
+ &self, fmtter: &mut crate::fmt::Formatter<'_>,
|
|
+ ) -> Result<(), crate::fmt::Error> {
|
|
+ write!(fmtter, "{}({})", stringify!($id), self.0 != 0)
|
|
+ }
|
|
+ }
|
|
+ };
|
|
+}
|
|
+
|
|
+impl_mask_ty!(m8: i8 | /// 8-bit wide mask.
|
|
+);
|
|
+impl_mask_ty!(m16: i16 | /// 16-bit wide mask.
|
|
+);
|
|
+impl_mask_ty!(m32: i32 | /// 32-bit wide mask.
|
|
+);
|
|
+impl_mask_ty!(m64: i64 | /// 64-bit wide mask.
|
|
+);
|
|
+impl_mask_ty!(m128: i128 | /// 128-bit wide mask.
|
|
+);
|
|
+impl_mask_ty!(msize: isize | /// isize-wide mask.
|
|
+);
|
|
diff --git a/third_party/rust/packed_simd/src/sealed.rs b/third_party/rust/packed_simd/src/sealed.rs
|
|
new file mode 100644
|
|
index 000000000000..832acd3f1d54
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/sealed.rs
|
|
@@ -0,0 +1,41 @@
|
|
+//! Sealed traits
|
|
+
|
|
+/// Trait implemented by arrays that can be SIMD types.
|
|
+#[doc(hidden)]
|
|
+pub trait SimdArray {
|
|
+ /// The type of the #[repr(simd)] type.
|
|
+ type Tuple: Copy + Clone;
|
|
+ /// The element type of the vector.
|
|
+ type T;
|
|
+ /// The number of elements in the array.
|
|
+ const N: usize;
|
|
+ /// The type: `[u32; Self::N]`.
|
|
+ type NT;
|
|
+}
|
|
+
|
|
+/// This traits is used to constraint the arguments
|
|
+/// and result type of the portable shuffles.
|
|
+#[doc(hidden)]
|
|
+pub trait Shuffle<Lanes> {
|
|
+ // Lanes is a `[u32; N]` where `N` is the number of vector lanes
|
|
+
|
|
+ /// The result type of the shuffle.
|
|
+ type Output;
|
|
+}
|
|
+
|
|
+/// This trait is implemented by all SIMD vector types.
|
|
+#[doc(hidden)]
|
|
+pub trait Simd {
|
|
+ /// Element type of the SIMD vector
|
|
+ type Element;
|
|
+ /// The number of elements in the SIMD vector.
|
|
+ const LANES: usize;
|
|
+ /// The type: `[u32; Self::N]`.
|
|
+ type LanesType;
|
|
+}
|
|
+
|
|
+/// This trait is implemented by all mask types
|
|
+#[doc(hidden)]
|
|
+pub trait Mask {
|
|
+ fn test(&self) -> bool;
|
|
+}
|
|
diff --git a/third_party/rust/packed_simd/src/testing.rs b/third_party/rust/packed_simd/src/testing.rs
|
|
new file mode 100644
|
|
index 000000000000..fcbcf9e2ac8e
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/testing.rs
|
|
@@ -0,0 +1,8 @@
|
|
+//! Testing macros and other utilities.
|
|
+
|
|
+#[macro_use]
|
|
+mod macros;
|
|
+
|
|
+#[cfg(test)]
|
|
+#[macro_use]
|
|
+crate mod utils;
|
|
diff --git a/third_party/rust/packed_simd/src/testing/macros.rs b/third_party/rust/packed_simd/src/testing/macros.rs
|
|
new file mode 100644
|
|
index 000000000000..6008634c76ce
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/testing/macros.rs
|
|
@@ -0,0 +1,44 @@
|
|
+//! Testing macros
|
|
+
|
|
+macro_rules! test_if {
|
|
+ ($cfg_tt:tt: $it:item) => {
|
|
+ #[cfg(any(
|
|
+ // Test everything if:
|
|
+ //
|
|
+ // * tests are enabled,
|
|
+ // * no features about exclusively testing
|
|
+ // specific vector classes are enabled
|
|
+ all(test, not(any(
|
|
+ test_v16,
|
|
+ test_v32,
|
|
+ test_v64,
|
|
+ test_v128,
|
|
+ test_v256,
|
|
+ test_v512,
|
|
+ test_none, // disables all tests
|
|
+ ))),
|
|
+ // Test if:
|
|
+ //
|
|
+ // * tests are enabled
|
|
+ // * a particular cfg token tree returns true
|
|
+ all(test, $cfg_tt),
|
|
+ ))]
|
|
+ $it
|
|
+ };
|
|
+}
|
|
+
|
|
+#[cfg(test)]
|
|
+#[allow(unused)]
|
|
+macro_rules! ref_ {
|
|
+ ($anything:tt) => {
|
|
+ &$anything
|
|
+ };
|
|
+}
|
|
+
|
|
+#[cfg(test)]
|
|
+#[allow(unused)]
|
|
+macro_rules! ref_mut_ {
|
|
+ ($anything:tt) => {
|
|
+ &mut $anything
|
|
+ };
|
|
+}
|
|
diff --git a/third_party/rust/packed_simd/src/testing/utils.rs b/third_party/rust/packed_simd/src/testing/utils.rs
|
|
new file mode 100644
|
|
index 000000000000..7b8f21ac1c55
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/testing/utils.rs
|
|
@@ -0,0 +1,135 @@
|
|
+//! Testing utilities
|
|
+
|
|
+#![allow(dead_code)]
|
|
+
|
|
+use crate::{cmp::PartialOrd, fmt::Debug, LexicographicallyOrdered};
|
|
+
|
|
+/// Tests PartialOrd for `a` and `b` where `a < b` is true.
|
|
+pub fn test_lt<T>(
|
|
+ a: LexicographicallyOrdered<T>, b: LexicographicallyOrdered<T>,
|
|
+) where
|
|
+ LexicographicallyOrdered<T>: Debug + PartialOrd,
|
|
+{
|
|
+ assert!(a < b, "{:?}, {:?}", a, b);
|
|
+ assert!(b > a, "{:?}, {:?}", a, b);
|
|
+
|
|
+ assert!(!(a == b), "{:?}, {:?}", a, b);
|
|
+ assert!(a != b, "{:?}, {:?}", a, b);
|
|
+
|
|
+ assert!(a <= b, "{:?}, {:?}", a, b);
|
|
+ assert!(b >= a, "{:?}, {:?}", a, b);
|
|
+
|
|
+ // Irreflexivity
|
|
+ assert!(!(a < a), "{:?}, {:?}", a, b);
|
|
+ assert!(!(b < b), "{:?}, {:?}", a, b);
|
|
+ assert!(!(a > a), "{:?}, {:?}", a, b);
|
|
+ assert!(!(b > b), "{:?}, {:?}", a, b);
|
|
+
|
|
+ assert!(a <= a, "{:?}, {:?}", a, b);
|
|
+ assert!(b <= b, "{:?}, {:?}", a, b);
|
|
+}
|
|
+
|
|
+/// Tests PartialOrd for `a` and `b` where `a <= b` is true.
|
|
+pub fn test_le<T>(
|
|
+ a: LexicographicallyOrdered<T>, b: LexicographicallyOrdered<T>,
|
|
+) where
|
|
+ LexicographicallyOrdered<T>: Debug + PartialOrd,
|
|
+{
|
|
+ assert!(a <= b, "{:?}, {:?}", a, b);
|
|
+ assert!(b >= a, "{:?}, {:?}", a, b);
|
|
+
|
|
+ assert!(a == b || a < b, "{:?}, {:?}", a, b);
|
|
+ assert!(a == b || b > a, "{:?}, {:?}", a, b);
|
|
+
|
|
+ if a == b {
|
|
+ assert!(!(a < b), "{:?}, {:?}", a, b);
|
|
+ assert!(!(b > a), "{:?}, {:?}", a, b);
|
|
+
|
|
+ assert!(!(a != b), "{:?}, {:?}", a, b);
|
|
+ } else {
|
|
+ assert!(a != b, "{:?}, {:?}", a, b);
|
|
+ test_lt(a, b);
|
|
+ }
|
|
+}
|
|
+
|
|
+/// Test PartialOrd::partial_cmp for `a` and `b` returning `Ordering`
|
|
+pub fn test_cmp<T>(
|
|
+ a: LexicographicallyOrdered<T>, b: LexicographicallyOrdered<T>,
|
|
+ o: Option<crate::cmp::Ordering>,
|
|
+) where
|
|
+ LexicographicallyOrdered<T>: PartialOrd + Debug,
|
|
+ T: Debug + crate::sealed::Simd + Copy + Clone,
|
|
+ <T as crate::sealed::Simd>::Element: Default + Copy + Clone + PartialOrd,
|
|
+{
|
|
+ assert!(T::LANES <= 64, "array length in these two arrays needs updating");
|
|
+ let mut arr_a: [T::Element; 64] = [Default::default(); 64];
|
|
+ let mut arr_b: [T::Element; 64] = [Default::default(); 64];
|
|
+
|
|
+ unsafe {
|
|
+ crate::ptr::write_unaligned(
|
|
+ arr_a.as_mut_ptr() as *mut LexicographicallyOrdered<T>,
|
|
+ a,
|
|
+ )
|
|
+ }
|
|
+ unsafe {
|
|
+ crate::ptr::write_unaligned(
|
|
+ arr_b.as_mut_ptr() as *mut LexicographicallyOrdered<T>,
|
|
+ b,
|
|
+ )
|
|
+ }
|
|
+ let expected = arr_a[0..T::LANES].partial_cmp(&arr_b[0..T::LANES]);
|
|
+ let result = a.partial_cmp(&b);
|
|
+ assert_eq!(expected, result, "{:?}, {:?}", a, b);
|
|
+ assert_eq!(o, result, "{:?}, {:?}", a, b);
|
|
+ match o {
|
|
+ Some(crate::cmp::Ordering::Less) => {
|
|
+ test_lt(a, b);
|
|
+ test_le(a, b);
|
|
+ }
|
|
+ Some(crate::cmp::Ordering::Greater) => {
|
|
+ test_lt(b, a);
|
|
+ test_le(b, a);
|
|
+ }
|
|
+ Some(crate::cmp::Ordering::Equal) => {
|
|
+ assert!(a == b, "{:?}, {:?}", a, b);
|
|
+ assert!(!(a != b), "{:?}, {:?}", a, b);
|
|
+ assert!(!(a < b), "{:?}, {:?}", a, b);
|
|
+ assert!(!(b < a), "{:?}, {:?}", a, b);
|
|
+ assert!(!(a > b), "{:?}, {:?}", a, b);
|
|
+ assert!(!(b > a), "{:?}, {:?}", a, b);
|
|
+
|
|
+ test_le(a, b);
|
|
+ test_le(b, a);
|
|
+ }
|
|
+ None => {
|
|
+ assert!(!(a == b), "{:?}, {:?}", a, b);
|
|
+ assert!(!(a != b), "{:?}, {:?}", a, b);
|
|
+ assert!(!(a < b), "{:?}, {:?}", a, b);
|
|
+ assert!(!(a > b), "{:?}, {:?}", a, b);
|
|
+ assert!(!(b < a), "{:?}, {:?}", a, b);
|
|
+ assert!(!(b > a), "{:?}, {:?}", a, b);
|
|
+ assert!(!(a <= b), "{:?}, {:?}", a, b);
|
|
+ assert!(!(b <= a), "{:?}, {:?}", a, b);
|
|
+ assert!(!(a >= b), "{:?}, {:?}", a, b);
|
|
+ assert!(!(b >= a), "{:?}, {:?}", a, b);
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
+// Returns a tuple containing two distinct pointer values of the same type as
|
|
+// the element type of the Simd vector `$id`.
|
|
+#[allow(unused)]
|
|
+macro_rules! ptr_vals {
|
|
+ ($id:ty) => {
|
|
+ // expands to an expression
|
|
+ #[allow(unused_unsafe)]
|
|
+ unsafe {
|
|
+ // all bits cleared
|
|
+ let clear: <$id as sealed::Simd>::Element = crate::mem::zeroed();
|
|
+ // all bits set
|
|
+ let set: <$id as sealed::Simd>::Element =
|
|
+ crate::mem::transmute(-1_isize);
|
|
+ (clear, set)
|
|
+ }
|
|
+ };
|
|
+}
|
|
diff --git a/third_party/rust/packed_simd/src/v128.rs b/third_party/rust/packed_simd/src/v128.rs
|
|
new file mode 100644
|
|
index 000000000000..1d0282dc4278
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/v128.rs
|
|
@@ -0,0 +1,80 @@
|
|
+//! 128-bit wide vector types
|
|
+#![rustfmt::skip]
|
|
+
|
|
+use crate::*;
|
|
+
|
|
+impl_i!([i8; 16]: i8x16, m8x16 | i8 | test_v128 |
|
|
+ x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 |
|
|
+ From: |
|
|
+ /// A 128-bit vector with 16 `i8` lanes.
|
|
+);
|
|
+impl_u!([u8; 16]: u8x16, m8x16 | u8 | test_v128 |
|
|
+ x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 |
|
|
+ From: |
|
|
+ /// A 128-bit vector with 16 `u8` lanes.
|
|
+);
|
|
+impl_m!([m8; 16]: m8x16 | i8 | test_v128 |
|
|
+ x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 |
|
|
+ From: m16x16 |
|
|
+ /// A 128-bit vector mask with 16 `m8` lanes.
|
|
+);
|
|
+
|
|
+impl_i!([i16; 8]: i16x8, m16x8 | i16 | test_v128 | x0, x1, x2, x3, x4, x5, x6, x7 |
|
|
+ From: i8x8, u8x8 |
|
|
+ /// A 128-bit vector with 8 `i16` lanes.
|
|
+);
|
|
+impl_u!([u16; 8]: u16x8, m16x8 | u16| test_v128 | x0, x1, x2, x3, x4, x5, x6, x7 |
|
|
+ From: u8x8 |
|
|
+ /// A 128-bit vector with 8 `u16` lanes.
|
|
+);
|
|
+impl_m!([m16; 8]: m16x8 | i16 | test_v128 | x0, x1, x2, x3, x4, x5, x6, x7 |
|
|
+ From: m8x8, m32x8 |
|
|
+ /// A 128-bit vector mask with 8 `m16` lanes.
|
|
+);
|
|
+
|
|
+impl_i!([i32; 4]: i32x4, m32x4 | i32 | test_v128 | x0, x1, x2, x3 |
|
|
+ From: i8x4, u8x4, i16x4, u16x4 |
|
|
+ /// A 128-bit vector with 4 `i32` lanes.
|
|
+);
|
|
+impl_u!([u32; 4]: u32x4, m32x4 | u32| test_v128 | x0, x1, x2, x3 |
|
|
+ From: u8x4, u16x4 |
|
|
+ /// A 128-bit vector with 4 `u32` lanes.
|
|
+);
|
|
+impl_f!([f32; 4]: f32x4, m32x4 | f32 | test_v128 | x0, x1, x2, x3 |
|
|
+ From: i8x4, u8x4, i16x4, u16x4 |
|
|
+ /// A 128-bit vector with 4 `f32` lanes.
|
|
+);
|
|
+impl_m!([m32; 4]: m32x4 | i32 | test_v128 | x0, x1, x2, x3 |
|
|
+ From: m8x4, m16x4, m64x4 |
|
|
+ /// A 128-bit vector mask with 4 `m32` lanes.
|
|
+);
|
|
+
|
|
+impl_i!([i64; 2]: i64x2, m64x2 | i64 | test_v128 | x0, x1 |
|
|
+ From: i8x2, u8x2, i16x2, u16x2, i32x2, u32x2 |
|
|
+ /// A 128-bit vector with 2 `i64` lanes.
|
|
+);
|
|
+impl_u!([u64; 2]: u64x2, m64x2 | u64 | test_v128 | x0, x1 |
|
|
+ From: u8x2, u16x2, u32x2 |
|
|
+ /// A 128-bit vector with 2 `u64` lanes.
|
|
+);
|
|
+impl_f!([f64; 2]: f64x2, m64x2 | f64 | test_v128 | x0, x1 |
|
|
+ From: i8x2, u8x2, i16x2, u16x2, i32x2, u32x2, f32x2 |
|
|
+ /// A 128-bit vector with 2 `f64` lanes.
|
|
+);
|
|
+impl_m!([m64; 2]: m64x2 | i64 | test_v128 | x0, x1 |
|
|
+ From: m8x2, m16x2, m32x2, m128x2 |
|
|
+ /// A 128-bit vector mask with 2 `m64` lanes.
|
|
+);
|
|
+
|
|
+impl_i!([i128; 1]: i128x1, m128x1 | i128 | test_v128 | x0 |
|
|
+ From: /*i8x1, u8x1, i16x1, u16x1, i32x1, u32x1, i64x1, u64x1 */ | // FIXME: unary small vector types
|
|
+ /// A 128-bit vector with 1 `i128` lane.
|
|
+);
|
|
+impl_u!([u128; 1]: u128x1, m128x1 | u128 | test_v128 | x0 |
|
|
+ From: /*u8x1, u16x1, u32x1, u64x1 */ | // FIXME: unary small vector types
|
|
+ /// A 128-bit vector with 1 `u128` lane.
|
|
+);
|
|
+impl_m!([m128; 1]: m128x1 | i128 | test_v128 | x0 |
|
|
+ From: /*m8x1, m16x1, m32x1, m64x1 */ | // FIXME: unary small vector types
|
|
+ /// A 128-bit vector mask with 1 `m128` lane.
|
|
+);
|
|
diff --git a/third_party/rust/packed_simd/src/v16.rs b/third_party/rust/packed_simd/src/v16.rs
|
|
new file mode 100644
|
|
index 000000000000..67a3832d2530
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/v16.rs
|
|
@@ -0,0 +1,16 @@
|
|
+//! 16-bit wide vector types
|
|
+
|
|
+use crate::*;
|
|
+
|
|
+impl_i!([i8; 2]: i8x2, m8x2 | i8 | test_v16 | x0, x1 |
|
|
+ From: |
|
|
+ /// A 16-bit vector with 2 `i8` lanes.
|
|
+);
|
|
+impl_u!([u8; 2]: u8x2, m8x2 | u8 | test_v16 | x0, x1 |
|
|
+ From: |
|
|
+ /// A 16-bit vector with 2 `u8` lanes.
|
|
+);
|
|
+impl_m!([m8; 2]: m8x2 | i8 | test_v16 | x0, x1 |
|
|
+ From: m16x2, m32x2, m64x2, m128x2 |
|
|
+ /// A 16-bit vector mask with 2 `m8` lanes.
|
|
+);
|
|
diff --git a/third_party/rust/packed_simd/src/v256.rs b/third_party/rust/packed_simd/src/v256.rs
|
|
new file mode 100644
|
|
index 000000000000..6b59336f68b6
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/v256.rs
|
|
@@ -0,0 +1,86 @@
|
|
+//! 256-bit wide vector types
|
|
+#![rustfmt::skip]
|
|
+
|
|
+use crate::*;
|
|
+
|
|
+impl_i!([i8; 32]: i8x32, m8x32 | i8 | test_v256 |
|
|
+ x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15,
|
|
+ x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31 |
|
|
+ From: |
|
|
+ /// A 256-bit vector with 32 `i8` lanes.
|
|
+);
|
|
+impl_u!([u8; 32]: u8x32, m8x32 | u8 | test_v256 |
|
|
+ x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15,
|
|
+ x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31 |
|
|
+ From: |
|
|
+ /// A 256-bit vector with 32 `u8` lanes.
|
|
+);
|
|
+impl_m!([m8; 32]: m8x32 | i8 | test_v256 |
|
|
+ x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15,
|
|
+ x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31 |
|
|
+ From: |
|
|
+ /// A 256-bit vector mask with 32 `m8` lanes.
|
|
+);
|
|
+
|
|
+impl_i!([i16; 16]: i16x16, m16x16 | i16 | test_v256 |
|
|
+ x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 |
|
|
+ From: i8x16, u8x16 |
|
|
+ /// A 256-bit vector with 16 `i16` lanes.
|
|
+);
|
|
+impl_u!([u16; 16]: u16x16, m16x16 | u16 | test_v256 |
|
|
+ x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 |
|
|
+ From: u8x16 |
|
|
+ /// A 256-bit vector with 16 `u16` lanes.
|
|
+);
|
|
+impl_m!([m16; 16]: m16x16 | i16 | test_v256 |
|
|
+ x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 |
|
|
+ From: m8x16 |
|
|
+ /// A 256-bit vector mask with 16 `m16` lanes.
|
|
+);
|
|
+
|
|
+impl_i!([i32; 8]: i32x8, m32x8 | i32 | test_v256 | x0, x1, x2, x3, x4, x5, x6, x7 |
|
|
+ From: i8x8, u8x8, i16x8, u16x8 |
|
|
+ /// A 256-bit vector with 8 `i32` lanes.
|
|
+);
|
|
+impl_u!([u32; 8]: u32x8, m32x8 | u32 | test_v256 | x0, x1, x2, x3, x4, x5, x6, x7 |
|
|
+ From: u8x8, u16x8 |
|
|
+ /// A 256-bit vector with 8 `u32` lanes.
|
|
+);
|
|
+impl_f!([f32; 8]: f32x8, m32x8 | f32 | test_v256 | x0, x1, x2, x3, x4, x5, x6, x7 |
|
|
+ From: i8x8, u8x8, i16x8, u16x8 |
|
|
+ /// A 256-bit vector with 8 `f32` lanes.
|
|
+);
|
|
+impl_m!([m32; 8]: m32x8 | i32 | test_v256 | x0, x1, x2, x3, x4, x5, x6, x7 |
|
|
+ From: m8x8, m16x8 |
|
|
+ /// A 256-bit vector mask with 8 `m32` lanes.
|
|
+);
|
|
+
|
|
+impl_i!([i64; 4]: i64x4, m64x4 | i64 | test_v256 | x0, x1, x2, x3 |
|
|
+ From: i8x4, u8x4, i16x4, u16x4, i32x4, u32x4 |
|
|
+ /// A 256-bit vector with 4 `i64` lanes.
|
|
+);
|
|
+impl_u!([u64; 4]: u64x4, m64x4 | u64 | test_v256 | x0, x1, x2, x3 |
|
|
+ From: u8x4, u16x4, u32x4 |
|
|
+ /// A 256-bit vector with 4 `u64` lanes.
|
|
+);
|
|
+impl_f!([f64; 4]: f64x4, m64x4 | f64 | test_v256 | x0, x1, x2, x3 |
|
|
+ From: i8x4, u8x4, i16x4, u16x4, i32x4, u32x4, f32x4 |
|
|
+ /// A 256-bit vector with 4 `f64` lanes.
|
|
+);
|
|
+impl_m!([m64; 4]: m64x4 | i64 | test_v256 | x0, x1, x2, x3 |
|
|
+ From: m8x4, m16x4, m32x4 |
|
|
+ /// A 256-bit vector mask with 4 `m64` lanes.
|
|
+);
|
|
+
|
|
+impl_i!([i128; 2]: i128x2, m128x2 | i128 | test_v256 | x0, x1 |
|
|
+ From: i8x2, u8x2, i16x2, u16x2, i32x2, u32x2, i64x2, u64x2 |
|
|
+ /// A 256-bit vector with 2 `i128` lanes.
|
|
+);
|
|
+impl_u!([u128; 2]: u128x2, m128x2 | u128 | test_v256 | x0, x1 |
|
|
+ From: u8x2, u16x2, u32x2, u64x2 |
|
|
+ /// A 256-bit vector with 2 `u128` lanes.
|
|
+);
|
|
+impl_m!([m128; 2]: m128x2 | i128 | test_v256 | x0, x1 |
|
|
+ From: m8x2, m16x2, m32x2, m64x2 |
|
|
+ /// A 256-bit vector mask with 2 `m128` lanes.
|
|
+);
|
|
diff --git a/third_party/rust/packed_simd/src/v32.rs b/third_party/rust/packed_simd/src/v32.rs
|
|
new file mode 100644
|
|
index 000000000000..09cef9bdd472
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/v32.rs
|
|
@@ -0,0 +1,29 @@
|
|
+//! 32-bit wide vector types
|
|
+
|
|
+use crate::*;
|
|
+
|
|
+impl_i!([i8; 4]: i8x4, m8x4 | i8 | test_v32 | x0, x1, x2, x3 |
|
|
+ From: |
|
|
+ /// A 32-bit vector with 4 `i8` lanes.
|
|
+);
|
|
+impl_u!([u8; 4]: u8x4, m8x4 | u8 | test_v32 | x0, x1, x2, x3 |
|
|
+ From: |
|
|
+ /// A 32-bit vector with 4 `u8` lanes.
|
|
+);
|
|
+impl_m!([m8; 4]: m8x4 | i8 | test_v32 | x0, x1, x2, x3 |
|
|
+ From: m16x4, m32x4, m64x4 |
|
|
+ /// A 32-bit vector mask with 4 `m8` lanes.
|
|
+);
|
|
+
|
|
+impl_i!([i16; 2]: i16x2, m16x2 | i16 | test_v32 | x0, x1 |
|
|
+ From: i8x2, u8x2 |
|
|
+ /// A 32-bit vector with 2 `i16` lanes.
|
|
+);
|
|
+impl_u!([u16; 2]: u16x2, m16x2 | u16 | test_v32 | x0, x1 |
|
|
+ From: u8x2 |
|
|
+ /// A 32-bit vector with 2 `u16` lanes.
|
|
+);
|
|
+impl_m!([m16; 2]: m16x2 | i16 | test_v32 | x0, x1 |
|
|
+ From: m8x2, m32x2, m64x2, m128x2 |
|
|
+ /// A 32-bit vector mask with 2 `m16` lanes.
|
|
+);
|
|
diff --git a/third_party/rust/packed_simd/src/v512.rs b/third_party/rust/packed_simd/src/v512.rs
|
|
new file mode 100644
|
|
index 000000000000..b1714aded369
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/v512.rs
|
|
@@ -0,0 +1,99 @@
|
|
+//! 512-bit wide vector types
|
|
+#![rustfmt::skip]
|
|
+
|
|
+use crate::*;
|
|
+
|
|
+impl_i!([i8; 64]: i8x64, m8x64 | i8 | test_v512 |
|
|
+ x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15,
|
|
+ x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31,
|
|
+ x32, x33, x34, x35, x36, x37, x38, x39, x40, x41, x42, x43, x44, x45, x46, x47,
|
|
+ x48, x49, x50, x51, x52, x53, x54, x55, x56, x57, x58, x59, x60, x61, x62, x63 |
|
|
+ From: |
|
|
+ /// A 512-bit vector with 64 `i8` lanes.
|
|
+);
|
|
+impl_u!([u8; 64]: u8x64, m8x64 | u8 | test_v512 |
|
|
+ x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15,
|
|
+ x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31,
|
|
+ x32, x33, x34, x35, x36, x37, x38, x39, x40, x41, x42, x43, x44, x45, x46, x47,
|
|
+ x48, x49, x50, x51, x52, x53, x54, x55, x56, x57, x58, x59, x60, x61, x62, x63 |
|
|
+ From: |
|
|
+ /// A 512-bit vector with 64 `u8` lanes.
|
|
+);
|
|
+impl_m!([m8; 64]: m8x64 | i8 | test_v512 |
|
|
+ x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15,
|
|
+ x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31,
|
|
+ x32, x33, x34, x35, x36, x37, x38, x39, x40, x41, x42, x43, x44, x45, x46, x47,
|
|
+ x48, x49, x50, x51, x52, x53, x54, x55, x56, x57, x58, x59, x60, x61, x62, x63 |
|
|
+ From: |
|
|
+ /// A 512-bit vector mask with 64 `m8` lanes.
|
|
+);
|
|
+
|
|
+impl_i!([i16; 32]: i16x32, m16x32 | i16 | test_v512 |
|
|
+ x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15,
|
|
+ x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31 |
|
|
+ From: i8x32, u8x32 |
|
|
+ /// A 512-bit vector with 32 `i16` lanes.
|
|
+);
|
|
+impl_u!([u16; 32]: u16x32, m16x32 | u16 | test_v512 |
|
|
+ x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15,
|
|
+ x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31 |
|
|
+ From: u8x32 |
|
|
+ /// A 512-bit vector with 32 `u16` lanes.
|
|
+);
|
|
+impl_m!([m16; 32]: m16x32 | i16 | test_v512 |
|
|
+ x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15,
|
|
+ x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31 |
|
|
+ From: m8x32 |
|
|
+ /// A 512-bit vector mask with 32 `m16` lanes.
|
|
+);
|
|
+
|
|
+impl_i!([i32; 16]: i32x16, m32x16 | i32 | test_v512 |
|
|
+ x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 |
|
|
+ From: i8x16, u8x16, i16x16, u16x16 |
|
|
+ /// A 512-bit vector with 16 `i32` lanes.
|
|
+);
|
|
+impl_u!([u32; 16]: u32x16, m32x16 | u32 | test_v512 |
|
|
+ x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 |
|
|
+ From: u8x16, u16x16 |
|
|
+ /// A 512-bit vector with 16 `u32` lanes.
|
|
+);
|
|
+impl_f!([f32; 16]: f32x16, m32x16 | f32 | test_v512 |
|
|
+ x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 |
|
|
+ From: i8x16, u8x16, i16x16, u16x16 |
|
|
+ /// A 512-bit vector with 16 `f32` lanes.
|
|
+);
|
|
+impl_m!([m32; 16]: m32x16 | i32 | test_v512 |
|
|
+ x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 |
|
|
+ From: m8x16, m16x16 |
|
|
+ /// A 512-bit vector mask with 16 `m32` lanes.
|
|
+);
|
|
+
|
|
+impl_i!([i64; 8]: i64x8, m64x8 | i64 | test_v512 | x0, x1, x2, x3, x4, x5, x6, x7 |
|
|
+ From: i8x8, u8x8, i16x8, u16x8, i32x8, u32x8 |
|
|
+ /// A 512-bit vector with 8 `i64` lanes.
|
|
+);
|
|
+impl_u!([u64; 8]: u64x8, m64x8 | u64 | test_v512 | x0, x1, x2, x3, x4, x5, x6, x7 |
|
|
+ From: u8x8, u16x8, u32x8 |
|
|
+ /// A 512-bit vector with 8 `u64` lanes.
|
|
+);
|
|
+impl_f!([f64; 8]: f64x8, m64x8 | f64 | test_v512 | x0, x1, x2, x3, x4, x5, x6, x7 |
|
|
+ From: i8x8, u8x8, i16x8, u16x8, i32x8, u32x8, f32x8 |
|
|
+ /// A 512-bit vector with 8 `f64` lanes.
|
|
+);
|
|
+impl_m!([m64; 8]: m64x8 | i64 | test_v512 | x0, x1, x2, x3, x4, x5, x6, x7 |
|
|
+ From: m8x8, m16x8, m32x8 |
|
|
+ /// A 512-bit vector mask with 8 `m64` lanes.
|
|
+);
|
|
+
|
|
+impl_i!([i128; 4]: i128x4, m128x4 | i128 | test_v512 | x0, x1, x2, x3 |
|
|
+ From: i8x4, u8x4, i16x4, u16x4, i32x4, u32x4, i64x4, u64x4 |
|
|
+ /// A 512-bit vector with 4 `i128` lanes.
|
|
+);
|
|
+impl_u!([u128; 4]: u128x4, m128x4 | u128 | test_v512 | x0, x1, x2, x3 |
|
|
+ From: u8x4, u16x4, u32x4, u64x4 |
|
|
+ /// A 512-bit vector with 4 `u128` lanes.
|
|
+);
|
|
+impl_m!([m128; 4]: m128x4 | i128 | test_v512 | x0, x1, x2, x3 |
|
|
+ From: m8x4, m16x4, m32x4, m64x4 |
|
|
+ /// A 512-bit vector mask with 4 `m128` lanes.
|
|
+);
|
|
diff --git a/third_party/rust/packed_simd/src/v64.rs b/third_party/rust/packed_simd/src/v64.rs
|
|
new file mode 100644
|
|
index 000000000000..1ee6219c040b
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/v64.rs
|
|
@@ -0,0 +1,66 @@
|
|
+//! 64-bit wide vector types
|
|
+#![rustfmt::skip]
|
|
+
|
|
+use super::*;
|
|
+
|
|
+impl_i!([i8; 8]: i8x8, m8x8 | i8 | test_v64 | x0, x1, x2, x3, x4, x5, x6, x7 |
|
|
+ From: |
|
|
+ /// A 64-bit vector with 8 `i8` lanes.
|
|
+);
|
|
+impl_u!([u8; 8]: u8x8, m8x8 | u8 | test_v64 | x0, x1, x2, x3, x4, x5, x6, x7 |
|
|
+ From: |
|
|
+ /// A 64-bit vector with 8 `u8` lanes.
|
|
+);
|
|
+impl_m!([m8; 8]: m8x8 | i8 | test_v64 | x0, x1, x2, x3, x4, x5, x6, x7 |
|
|
+ From: m16x8, m32x8 |
|
|
+ /// A 64-bit vector mask with 8 `m8` lanes.
|
|
+);
|
|
+
|
|
+impl_i!([i16; 4]: i16x4, m16x4 | i16 | test_v64 | x0, x1, x2, x3 |
|
|
+ From: i8x4, u8x4 |
|
|
+ /// A 64-bit vector with 4 `i16` lanes.
|
|
+);
|
|
+impl_u!([u16; 4]: u16x4, m16x4 | u16 | test_v64 | x0, x1, x2, x3 |
|
|
+ From: u8x4 |
|
|
+ /// A 64-bit vector with 4 `u16` lanes.
|
|
+);
|
|
+impl_m!([m16; 4]: m16x4 | i16 | test_v64 | x0, x1, x2, x3 |
|
|
+ From: m8x4, m32x4, m64x4 |
|
|
+ /// A 64-bit vector mask with 4 `m16` lanes.
|
|
+);
|
|
+
|
|
+impl_i!([i32; 2]: i32x2, m32x2 | i32 | test_v64 | x0, x1 |
|
|
+ From: i8x2, u8x2, i16x2, u16x2 |
|
|
+ /// A 64-bit vector with 2 `i32` lanes.
|
|
+);
|
|
+impl_u!([u32; 2]: u32x2, m32x2 | u32 | test_v64 | x0, x1 |
|
|
+ From: u8x2, u16x2 |
|
|
+ /// A 64-bit vector with 2 `u32` lanes.
|
|
+);
|
|
+impl_m!([m32; 2]: m32x2 | i32 | test_v64 | x0, x1 |
|
|
+ From: m8x2, m16x2, m64x2, m128x2 |
|
|
+ /// A 64-bit vector mask with 2 `m32` lanes.
|
|
+);
|
|
+impl_f!([f32; 2]: f32x2, m32x2 | f32 | test_v64 | x0, x1 |
|
|
+ From: i8x2, u8x2, i16x2, u16x2 |
|
|
+ /// A 64-bit vector with 2 `f32` lanes.
|
|
+);
|
|
+
|
|
+/*
|
|
+impl_i!([i64; 1]: i64x1, m64x1 | i64 | test_v64 | x0 |
|
|
+ From: /*i8x1, u8x1, i16x1, u16x1, i32x1, u32x1*/ | // FIXME: primitive to vector conversion
|
|
+ /// A 64-bit vector with 1 `i64` lanes.
|
|
+);
|
|
+impl_u!([u64; 1]: u64x1, m64x1 | u64 | test_v64 | x0 |
|
|
+ From: /*u8x1, u16x1, u32x1*/ | // FIXME: primitive to vector conversion
|
|
+ /// A 64-bit vector with 1 `u64` lanes.
|
|
+);
|
|
+impl_m!([m64; 1]: m64x1 | i64 | test_v64 | x0 |
|
|
+ From: /*m8x1, m16x1, m32x1, */ m128x1 | // FIXME: unary small vector types
|
|
+ /// A 64-bit vector mask with 1 `m64` lanes.
|
|
+);
|
|
+impl_f!([f64; 1]: f64x1, m64x1 | f64 | test_v64 | x0 |
|
|
+ From: /*i8x1, u8x1, i16x1, u16x1, i32x1, u32x1, f32x1*/ | // FIXME: unary small vector types
|
|
+ /// A 64-bit vector with 1 `f64` lanes.
|
|
+);
|
|
+*/
|
|
diff --git a/third_party/rust/packed_simd/src/vPtr.rs b/third_party/rust/packed_simd/src/vPtr.rs
|
|
new file mode 100644
|
|
index 000000000000..fe9fb28ffa89
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/vPtr.rs
|
|
@@ -0,0 +1,34 @@
|
|
+//! Vectors of pointers
|
|
+#![rustfmt::skip]
|
|
+
|
|
+use crate::*;
|
|
+
|
|
+impl_const_p!(
|
|
+ [*const T; 2]: cptrx2, msizex2, usizex2, isizex2 | test_v128 | x0, x1 | From: |
|
|
+ /// A vector with 2 `*const T` lanes
|
|
+);
|
|
+
|
|
+impl_mut_p!(
|
|
+ [*mut T; 2]: mptrx2, msizex2, usizex2, isizex2 | test_v128 | x0, x1 | From: |
|
|
+ /// A vector with 2 `*mut T` lanes
|
|
+);
|
|
+
|
|
+impl_const_p!(
|
|
+ [*const T; 4]: cptrx4, msizex4, usizex4, isizex4 | test_v256 | x0, x1, x2, x3 | From: |
|
|
+ /// A vector with 4 `*const T` lanes
|
|
+);
|
|
+
|
|
+impl_mut_p!(
|
|
+ [*mut T; 4]: mptrx4, msizex4, usizex4, isizex4 | test_v256 | x0, x1, x2, x3 | From: |
|
|
+ /// A vector with 4 `*mut T` lanes
|
|
+);
|
|
+
|
|
+impl_const_p!(
|
|
+ [*const T; 8]: cptrx8, msizex8, usizex8, isizex8 | test_v512 | x0, x1, x2, x3, x4, x5, x6, x7 | From: |
|
|
+ /// A vector with 8 `*const T` lanes
|
|
+);
|
|
+
|
|
+impl_mut_p!(
|
|
+ [*mut T; 8]: mptrx8, msizex8, usizex8, isizex8 | test_v512 | x0, x1, x2, x3, x4, x5, x6, x7 | From: |
|
|
+ /// A vector with 8 `*mut T` lanes
|
|
+);
|
|
diff --git a/third_party/rust/packed_simd/src/vSize.rs b/third_party/rust/packed_simd/src/vSize.rs
|
|
new file mode 100644
|
|
index 000000000000..5594323372b4
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/src/vSize.rs
|
|
@@ -0,0 +1,53 @@
|
|
+//! Vectors with pointer-sized elements
|
|
+
|
|
+use crate::codegen::pointer_sized_int::{isize_, usize_};
|
|
+use crate::*;
|
|
+
|
|
+impl_i!([isize; 2]: isizex2, msizex2 | isize_ | test_v128 |
|
|
+ x0, x1|
|
|
+ From: |
|
|
+ /// A vector with 2 `isize` lanes.
|
|
+);
|
|
+
|
|
+impl_u!([usize; 2]: usizex2, msizex2 | usize_ | test_v128 |
|
|
+ x0, x1|
|
|
+ From: |
|
|
+ /// A vector with 2 `usize` lanes.
|
|
+);
|
|
+impl_m!([msize; 2]: msizex2 | isize_ | test_v128 |
|
|
+ x0, x1 |
|
|
+ From: |
|
|
+ /// A vector mask with 2 `msize` lanes.
|
|
+);
|
|
+
|
|
+impl_i!([isize; 4]: isizex4, msizex4 | isize_ | test_v256 |
|
|
+ x0, x1, x2, x3 |
|
|
+ From: |
|
|
+ /// A vector with 4 `isize` lanes.
|
|
+);
|
|
+impl_u!([usize; 4]: usizex4, msizex4 | usize_ | test_v256 |
|
|
+ x0, x1, x2, x3|
|
|
+ From: |
|
|
+ /// A vector with 4 `usize` lanes.
|
|
+);
|
|
+impl_m!([msize; 4]: msizex4 | isize_ | test_v256 |
|
|
+ x0, x1, x2, x3 |
|
|
+ From: |
|
|
+ /// A vector mask with 4 `msize` lanes.
|
|
+);
|
|
+
|
|
+impl_i!([isize; 8]: isizex8, msizex8 | isize_ | test_v512 |
|
|
+ x0, x1, x2, x3, x4, x5, x6, x7 |
|
|
+ From: |
|
|
+ /// A vector with 4 `isize` lanes.
|
|
+);
|
|
+impl_u!([usize; 8]: usizex8, msizex8 | usize_ | test_v512 |
|
|
+ x0, x1, x2, x3, x4, x5, x6, x7 |
|
|
+ From: |
|
|
+ /// A vector with 8 `usize` lanes.
|
|
+);
|
|
+impl_m!([msize; 8]: msizex8 | isize_ | test_v512 |
|
|
+ x0, x1, x2, x3, x4, x5, x6, x7 |
|
|
+ From: |
|
|
+ /// A vector mask with 8 `msize` lanes.
|
|
+);
|
|
diff --git a/third_party/rust/packed_simd/tests/endianness.rs b/third_party/rust/packed_simd/tests/endianness.rs
|
|
new file mode 100644
|
|
index 000000000000..1e6b4f354301
|
|
--- /dev/null
|
|
+++ b/third_party/rust/packed_simd/tests/endianness.rs
|
|
@@ -0,0 +1,262 @@
|
|
+#[cfg(target_arch = "wasm32")]
|
|
+use wasm_bindgen_test::*;
|
|
+
|
|
+use packed_simd::*;
|
|
+use std::{mem, slice};
|
|
+
|
|
+#[cfg_attr(not(target_arch = "wasm32"), test)]
|
|
+#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+fn endian_indexing() {
|
|
+ let v = i32x4::new(0, 1, 2, 3);
|
|
+ assert_eq!(v.extract(0), 0);
|
|
+ assert_eq!(v.extract(1), 1);
|
|
+ assert_eq!(v.extract(2), 2);
|
|
+ assert_eq!(v.extract(3), 3);
|
|
+}
|
|
+
|
|
+#[cfg_attr(not(target_arch = "wasm32"), test)]
|
|
+#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+fn endian_bitcasts() {
|
|
+ #[cfg_attr(rustfmt, rustfmt_skip)]
|
|
+ let x = i8x16::new(
|
|
+ 0, 1, 2, 3, 4, 5, 6, 7,
|
|
+ 8, 9, 10, 11, 12, 13, 14, 15,
|
|
+ );
|
|
+ let t: i16x8 = unsafe { mem::transmute(x) };
|
|
+ let e: i16x8 = if cfg!(target_endian = "little") {
|
|
+ i16x8::new(256, 770, 1284, 1798, 2312, 2826, 3340, 3854)
|
|
+ } else {
|
|
+ i16x8::new(1, 515, 1029, 1543, 2057, 2571, 3085, 3599)
|
|
+ };
|
|
+ assert_eq!(t, e);
|
|
+}
|
|
+
|
|
+#[cfg_attr(not(target_arch = "wasm32"), test)]
|
|
+#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+fn endian_casts() {
|
|
+ #[cfg_attr(rustfmt, rustfmt_skip)]
|
|
+ let x = i8x16::new(
|
|
+ 0, 1, 2, 3, 4, 5, 6, 7,
|
|
+ 8, 9, 10, 11, 12, 13, 14, 15,
|
|
+ );
|
|
+ let t: i16x16 = x.into(); // simd_cast
|
|
+ #[cfg_attr(rustfmt, rustfmt_skip)]
|
|
+ let e = i16x16::new(
|
|
+ 0, 1, 2, 3, 4, 5, 6, 7,
|
|
+ 8, 9, 10, 11, 12, 13, 14, 15,
|
|
+ );
|
|
+ assert_eq!(t, e);
|
|
+}
|
|
+
|
|
+#[cfg_attr(not(target_arch = "wasm32"), test)]
|
|
+#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+fn endian_load_and_stores() {
|
|
+ #[cfg_attr(rustfmt, rustfmt_skip)]
|
|
+ let x = i8x16::new(
|
|
+ 0, 1, 2, 3, 4, 5, 6, 7,
|
|
+ 8, 9, 10, 11, 12, 13, 14, 15,
|
|
+ );
|
|
+ let mut y: [i16; 8] = [0; 8];
|
|
+ x.write_to_slice_unaligned(unsafe {
|
|
+ slice::from_raw_parts_mut(&mut y as *mut _ as *mut i8, 16)
|
|
+ });
|
|
+
|
|
+ let e: [i16; 8] = if cfg!(target_endian = "little") {
|
|
+ [256, 770, 1284, 1798, 2312, 2826, 3340, 3854]
|
|
+ } else {
|
|
+ [1, 515, 1029, 1543, 2057, 2571, 3085, 3599]
|
|
+ };
|
|
+ assert_eq!(y, e);
|
|
+
|
|
+ let z = i8x16::from_slice_unaligned(unsafe {
|
|
+ slice::from_raw_parts(&y as *const _ as *const i8, 16)
|
|
+ });
|
|
+ assert_eq!(z, x);
|
|
+}
|
|
+
|
|
+#[cfg_attr(not(target_arch = "wasm32"), test)]
|
|
+#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+fn endian_array_union() {
|
|
+ union A {
|
|
+ data: [f32; 4],
|
|
+ vec: f32x4,
|
|
+ }
|
|
+ let x: [f32; 4] = unsafe { A { vec: f32x4::new(0., 1., 2., 3.) }.data };
|
|
+ assert_eq!(x[0], 0_f32);
|
|
+ assert_eq!(x[1], 1_f32);
|
|
+ assert_eq!(x[2], 2_f32);
|
|
+ assert_eq!(x[3], 3_f32);
|
|
+ let y: f32x4 = unsafe { A { data: [3., 2., 1., 0.] }.vec };
|
|
+ assert_eq!(y, f32x4::new(3., 2., 1., 0.));
|
|
+
|
|
+ union B {
|
|
+ data: [i8; 16],
|
|
+ vec: i8x16,
|
|
+ }
|
|
+ #[cfg_attr(rustfmt, rustfmt_skip)]
|
|
+ let x = i8x16::new(
|
|
+ 0, 1, 2, 3, 4, 5, 6, 7,
|
|
+ 8, 9, 10, 11, 12, 13, 14, 15,
|
|
+ );
|
|
+ let x: [i8; 16] = unsafe { B { vec: x }.data };
|
|
+
|
|
+ for i in 0..16 {
|
|
+ assert_eq!(x[i], i as i8);
|
|
+ }
|
|
+
|
|
+ #[cfg_attr(rustfmt, rustfmt_skip)]
|
|
+ let y = [
|
|
+ 15, 14, 13, 12, 11, 19, 9, 8,
|
|
+ 7, 6, 5, 4, 3, 2, 1, 0
|
|
+ ];
|
|
+ #[cfg_attr(rustfmt, rustfmt_skip)]
|
|
+ let e = i8x16::new(
|
|
+ 15, 14, 13, 12, 11, 19, 9, 8,
|
|
+ 7, 6, 5, 4, 3, 2, 1, 0
|
|
+ );
|
|
+ let z = unsafe { B { data: y }.vec };
|
|
+ assert_eq!(z, e);
|
|
+
|
|
+ union C {
|
|
+ data: [i16; 8],
|
|
+ vec: i8x16,
|
|
+ }
|
|
+ #[cfg_attr(rustfmt, rustfmt_skip)]
|
|
+ let x = i8x16::new(
|
|
+ 0, 1, 2, 3, 4, 5, 6, 7,
|
|
+ 8, 9, 10, 11, 12, 13, 14, 15,
|
|
+ );
|
|
+ let x: [i16; 8] = unsafe { C { vec: x }.data };
|
|
+
|
|
+ let e: [i16; 8] = if cfg!(target_endian = "little") {
|
|
+ [256, 770, 1284, 1798, 2312, 2826, 3340, 3854]
|
|
+ } else {
|
|
+ [1, 515, 1029, 1543, 2057, 2571, 3085, 3599]
|
|
+ };
|
|
+ assert_eq!(x, e);
|
|
+}
|
|
+
|
|
+#[cfg_attr(not(target_arch = "wasm32"), test)]
|
|
+#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
|
+fn endian_tuple_access() {
|
|
+ type F32x4T = (f32, f32, f32, f32);
|
|
+ union A {
|
|
+ data: F32x4T,
|
|
+ vec: f32x4,
|
|
+ }
|
|
+ let x: F32x4T = unsafe { A { vec: f32x4::new(0., 1., 2., 3.) }.data };
|
|
+ assert_eq!(x.0, 0_f32);
|
|
+ assert_eq!(x.1, 1_f32);
|
|
+ assert_eq!(x.2, 2_f32);
|
|
+ assert_eq!(x.3, 3_f32);
|
|
+ let y: f32x4 = unsafe { A { data: (3., 2., 1., 0.) }.vec };
|
|
+ assert_eq!(y, f32x4::new(3., 2., 1., 0.));
|
|
+
|
|
+ #[cfg_attr(rustfmt, rustfmt_skip)]
|
|
+ type I8x16T = (i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8);
|
|
+ union B {
|
|
+ data: I8x16T,
|
|
+ vec: i8x16,
|
|
+ }
|
|
+
|
|
+ #[cfg_attr(rustfmt, rustfmt_skip)]
|
|
+ let x = i8x16::new(
|
|
+ 0, 1, 2, 3, 4, 5, 6, 7,
|
|
+ 8, 9, 10, 11, 12, 13, 14, 15,
|
|
+ );
|
|
+ let x: I8x16T = unsafe { B { vec: x }.data };
|
|
+
|
|
+ assert_eq!(x.0, 0);
|
|
+ assert_eq!(x.1, 1);
|
|
+ assert_eq!(x.2, 2);
|
|
+ assert_eq!(x.3, 3);
|
|
+ assert_eq!(x.4, 4);
|
|
+ assert_eq!(x.5, 5);
|
|
+ assert_eq!(x.6, 6);
|
|
+ assert_eq!(x.7, 7);
|
|
+ assert_eq!(x.8, 8);
|
|
+ assert_eq!(x.9, 9);
|
|
+ assert_eq!(x.10, 10);
|
|
+ assert_eq!(x.11, 11);
|
|
+ assert_eq!(x.12, 12);
|
|
+ assert_eq!(x.13, 13);
|
|
+ assert_eq!(x.14, 14);
|
|
+ assert_eq!(x.15, 15);
|
|
+
|
|
+ #[cfg_attr(rustfmt, rustfmt_skip)]
|
|
+ let y = (
|
|
+ 15, 14, 13, 12, 11, 10, 9, 8,
|
|
+ 7, 6, 5, 4, 3, 2, 1, 0
|
|
+ );
|
|
+ let z: i8x16 = unsafe { B { data: y }.vec };
|
|
+ #[cfg_attr(rustfmt, rustfmt_skip)]
|
|
+ let e = i8x16::new(
|
|
+ 15, 14, 13, 12, 11, 10, 9, 8,
|
|
+ 7, 6, 5, 4, 3, 2, 1, 0
|
|
+ );
|
|
+ assert_eq!(e, z);
|
|
+
|
|
+ #[cfg_attr(rustfmt, rustfmt_skip)]
|
|
+ type I16x8T = (i16, i16, i16, i16, i16, i16, i16, i16);
|
|
+ union C {
|
|
+ data: I16x8T,
|
|
+ vec: i8x16,
|
|
+ }
|
|
+
|
|
+ #[cfg_attr(rustfmt, rustfmt_skip)]
|
|
+ let x = i8x16::new(
|
|
+ 0, 1, 2, 3, 4, 5, 6, 7,
|
|
+ 8, 9, 10, 11, 12, 13, 14, 15,
|
|
+ );
|
|
+ let x: I16x8T = unsafe { C { vec: x }.data };
|
|
+
|
|
+ let e: [i16; 8] = if cfg!(target_endian = "little") {
|
|
+ [256, 770, 1284, 1798, 2312, 2826, 3340, 3854]
|
|
+ } else {
|
|
+ [1, 515, 1029, 1543, 2057, 2571, 3085, 3599]
|
|
+ };
|
|
+ assert_eq!(x.0, e[0]);
|
|
+ assert_eq!(x.1, e[1]);
|
|
+ assert_eq!(x.2, e[2]);
|
|
+ assert_eq!(x.3, e[3]);
|
|
+ assert_eq!(x.4, e[4]);
|
|
+ assert_eq!(x.5, e[5]);
|
|
+ assert_eq!(x.6, e[6]);
|
|
+ assert_eq!(x.7, e[7]);
|
|
+
|
|
+ #[cfg_attr(rustfmt, rustfmt_skip)]
|
|
+ #[repr(C)]
|
|
+ #[derive(Copy ,Clone)]
|
|
+ pub struct Tup(pub i8, pub i8, pub i16, pub i8, pub i8, pub i16,
|
|
+ pub i8, pub i8, pub i16, pub i8, pub i8, pub i16);
|
|
+
|
|
+ union D {
|
|
+ data: Tup,
|
|
+ vec: i8x16,
|
|
+ }
|
|
+
|
|
+ #[cfg_attr(rustfmt, rustfmt_skip)]
|
|
+ let x = i8x16::new(
|
|
+ 0, 1, 2, 3, 4, 5, 6, 7,
|
|
+ 8, 9, 10, 11, 12, 13, 14, 15,
|
|
+ );
|
|
+ let x: Tup = unsafe { D { vec: x }.data };
|
|
+
|
|
+ let e: [i16; 12] = if cfg!(target_endian = "little") {
|
|
+ [0, 1, 770, 4, 5, 1798, 8, 9, 2826, 12, 13, 3854]
|
|
+ } else {
|
|
+ [0, 1, 515, 4, 5, 1543, 8, 9, 2571, 12, 13, 3599]
|
|
+ };
|
|
+ assert_eq!(x.0 as i16, e[0]);
|
|
+ assert_eq!(x.1 as i16, e[1]);
|
|
+ assert_eq!(x.2 as i16, e[2]);
|
|
+ assert_eq!(x.3 as i16, e[3]);
|
|
+ assert_eq!(x.4 as i16, e[4]);
|
|
+ assert_eq!(x.5 as i16, e[5]);
|
|
+ assert_eq!(x.6 as i16, e[6]);
|
|
+ assert_eq!(x.7 as i16, e[7]);
|
|
+ assert_eq!(x.8 as i16, e[8]);
|
|
+ assert_eq!(x.9 as i16, e[9]);
|
|
+ assert_eq!(x.10 as i16, e[10]);
|
|
+ assert_eq!(x.11 as i16, e[11]);
|
|
+}
|
|
diff --git a/third_party/rust/simd/.cargo-checksum.json b/third_party/rust/simd/.cargo-checksum.json
|
|
deleted file mode 100644
|
|
index 5e8c154cda72..000000000000
|
|
--- a/third_party/rust/simd/.cargo-checksum.json
|
|
+++ /dev/null
|
|
@@ -1 +0,0 @@
|
|
-{"files":{"Cargo.toml":"0c7a480c62d7b42604098fa1dd6453be79629112569c494efa75d7fd0998fd69","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"6d3a9431e65e69c73a8923e6517b889d17549b23db406b9ec027710d16af701f","README.md":"f426ca32bb44fee39d83d51e481fe6b2640d4b78cb097c952cd75800b886f2fd","benches/mandelbrot.rs":"051b5199e66bca6cf7774e9024915fd4e1349ab39726a10a14e06b60d65d87a4","benches/matrix.rs":"048a21dacdb62365e0105d00d2c8cd6bd2396ac81134f2bff7eb4f7d095fb735","build.rs":"69c9c9029ca969a4bb3f11429bc1424fa75af46143eb0d853b4db3a512859b32","examples/axpy.rs":"4307626045d64ec08361c97c9c72c5dc8d361bdc88f64453b97ac0212041a1b2","examples/convert.rs":"8e658fde050f8a0d8b84ad7570446b10fcf544afbd551b940ca340474f324840","examples/dot-product.rs":"6fe2e007c147af5353804173a593c5b9d57dbccec156e1da37e9e32537363f91","examples/fannkuch-redux-nosimd.rs":"7b2fbde35e8666929d14d67328471cb0483d038a5325232f8db148b30865312b","examples/fannkuch-redux.rs":"ea21fdbd2274488a62cc984acad6e0b65d52f24fb4ff63b7057a3a667e9c8aae","examples/mandelbrot.rs":"71be242543c1e487145d7f16341c05d05d86109de4d9e94c5d6bc9a9c6ed9766","examples/matrix-inverse.rs":"93dbc55c66a72e5f7bc730072f35682523fa20dd362755d8443ad6982143cb5d","examples/nbody-nosimd.rs":"9cf46ea02e266c20f811318f1c5856d5afb9575b2d48d552fbd978f5c1856bdb","examples/nbody.rs":"a864311affab262024479d6348ff51af43d809e9ad332ec30ea4aacceaa2eae1","examples/ops.rs":"b08ea83583df71d0052895d677320a9888da5b6729c9b70636d31ede5128bb7f","examples/spectral-norm-nosimd.rs":"ffc8512ecde779078ea467f38f423a0ea623c63da7078193f9dd370200773f79","examples/spectral-norm.rs":"edb09c9d477f83939098cfb77a27cc298bc7a0c8a8e29cece0cccae0d70d890e","src/aarch64/mod.rs":"83f52775364c98de0cecb7e1509530c18972e932469f5f1522aa24a735d0fa37","src/aarch64/neon.rs":"3c05ea43b7261b9af9c0d904b37de01c2ba99caedcb464700f16617b672965a1","src/arm/mod.rs":"dcdd90bc0b39abaf86a0c8946d442b16313563fbae1ff03248628275c74d8617","src/arm/neon.rs":"71d0bb6dac5f58599bb825449701a05cf32f6eca1918e80d060b746e69751c37","src/common.rs":"c5a7b937c5cd8c3bccf0fb20d5d77770c0d9b0dd9fa06a661c6f2ddf118e65c0","src/lib.rs":"a24a207e65468de2189297380747e2f2f33ec2317f4b83f0665d34b1c09feb08","src/sixty_four.rs":"d168776d02acf943bda8044b24e644b7a9584197a223eba1a7c3024b205dc87d","src/v256.rs":"34bfde3676e23f6925db5d0408ae838e3aab7706128fd7c33e855b8579c69318","src/x86/avx.rs":"efcf2120a904a89b0adf2d3d3bdd0ca17df2ec058410af23fb7e81915873f808","src/x86/avx2.rs":"3bcb3f391ad5f16f0a6da0bc1301329beb478ad6265bd3b2c9c124fc2e6198e5","src/x86/mod.rs":"0acc5a5e2672e2a0fddc11065663be8b8fa2da87320ea291fa86ff8c2f33edf5","src/x86/sse2.rs":"8807fb04bbfb404e17fcacf1e21d22616f8b377540a227b1fd03c121879122dd","src/x86/sse3.rs":"9bd01a4f08069ca4f445952e744d651efe887e3835b18872e757375f0d053bd2","src/x86/sse4_1.rs":"9ceb80dd70a7e7dfeef508cb935e1a2637175bc87a3b090f5dea691ff6aa0516","src/x86/sse4_2.rs":"c59321aed8decdce4d0d8570cff46aed02e1a8265647ef7702e9b180fc581254","src/x86/ssse3.rs":"2290f0269bae316b8e0491495645ee38a9bd73525c8572759c1328341c3bdb4c"},"package":"0048b17eb9577ac545c61d85c3559b41dfb4cbea41c9bd9ca6a4f73ff05fda84"}
|
|
\ No newline at end of file
|
|
diff --git a/third_party/rust/simd/Cargo.toml b/third_party/rust/simd/Cargo.toml
|
|
deleted file mode 100644
|
|
index 30279b93556c..000000000000
|
|
--- a/third_party/rust/simd/Cargo.toml
|
|
+++ /dev/null
|
|
@@ -1,37 +0,0 @@
|
|
-# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
|
|
-#
|
|
-# When uploading crates to the registry Cargo will automatically
|
|
-# "normalize" Cargo.toml files for maximal compatibility
|
|
-# with all versions of Cargo and also rewrite `path` dependencies
|
|
-# to registry (e.g. crates.io) dependencies
|
|
-#
|
|
-# If you believe there's an error in this file please file an
|
|
-# issue against the rust-lang/cargo repository. If you're
|
|
-# editing this file be aware that the upstream Cargo.toml
|
|
-# will likely look very different (and much more reasonable)
|
|
-
|
|
-[package]
|
|
-name = "simd"
|
|
-version = "0.2.3"
|
|
-authors = ["Huon Wilson <dbau.pp+github@gmail.com>"]
|
|
-description = "`simd` offers limited cross-platform access to SIMD instructions on\nCPUs, as well as raw interfaces to platform-specific instructions.\n(To be obsoleted by the `std::simd` implementation RFC 2366.)\n"
|
|
-documentation = "https://docs.rs/simd/"
|
|
-readme = "README.md"
|
|
-keywords = ["simd", "data-parallel"]
|
|
-license = "MIT/Apache-2.0"
|
|
-repository = "https://github.com/hsivonen/simd"
|
|
-[package.metadata.docs.rs]
|
|
-features = ["doc"]
|
|
-[dependencies.serde]
|
|
-version = "1.0"
|
|
-optional = true
|
|
-
|
|
-[dependencies.serde_derive]
|
|
-version = "1.0"
|
|
-optional = true
|
|
-[dev-dependencies.cfg-if]
|
|
-version = "0.1"
|
|
-
|
|
-[features]
|
|
-doc = []
|
|
-with-serde = ["serde", "serde_derive"]
|
|
diff --git a/third_party/rust/simd/README.md b/third_party/rust/simd/README.md
|
|
deleted file mode 100644
|
|
index 1c34f49bcd91..000000000000
|
|
--- a/third_party/rust/simd/README.md
|
|
+++ /dev/null
|
|
@@ -1,11 +0,0 @@
|
|
-# `simd`
|
|
-
|
|
-[![Build Status](https://travis-ci.org/hsivonen/simd.svg?branch=master)](https://travis-ci.org/hsivonen/simd)
|
|
-[![crates.io](https://meritbadge.herokuapp.com/simd)](https://crates.io/crates/simd)
|
|
-[![docs.rs](https://docs.rs/simd/badge.svg)](https://docs.rs/simd/)
|
|
-
|
|
-`simd` offers a basic interface to the SIMD functionality of CPUs. (Note: This crate fails to build unless the target is aarch64, x86_64, i686 (i.e. SSE2 enabled; not i586) or an ARMv7 target (thumb or not) with NEON enabled.)
|
|
-
|
|
-This crate is expected to become _obsolete_ once the implementation of [RFC 2366](https://github.com/rust-lang/rfcs/pull/2366) lands in the standard library.
|
|
-
|
|
-[Documentation](https://docs.rs/simd)
|
|
diff --git a/third_party/rust/simd/benches/mandelbrot.rs b/third_party/rust/simd/benches/mandelbrot.rs
|
|
deleted file mode 100755
|
|
index 61061a4a301f..000000000000
|
|
--- a/third_party/rust/simd/benches/mandelbrot.rs
|
|
+++ /dev/null
|
|
@@ -1,117 +0,0 @@
|
|
-#![feature(test)]
|
|
-#![feature(cfg_target_feature)]
|
|
-
|
|
-extern crate simd;
|
|
-extern crate test;
|
|
-
|
|
-use test::black_box as bb;
|
|
-use test::Bencher as B;
|
|
-use simd::{f32x4, u32x4};
|
|
-#[cfg(any(target_feature = "avx", target_feature = "avx2"))]
|
|
-use simd::x86::avx::{f32x8, u32x8};
|
|
-
|
|
-fn naive(c_x: f32, c_y: f32, max_iter: u32) -> u32 {
|
|
- let mut x = c_x;
|
|
- let mut y = c_y;
|
|
- let mut count = 0;
|
|
- while count < max_iter {
|
|
- let xy = x * y;
|
|
- let xx = x * x;
|
|
- let yy = y * y;
|
|
- let sum = xx + yy;
|
|
- if sum > 4.0 {
|
|
- break
|
|
- }
|
|
- count += 1;
|
|
- x = xx - yy + c_x;
|
|
- y = xy * 2.0 + c_y;
|
|
- }
|
|
- count
|
|
-}
|
|
-
|
|
-fn simd4(c_x: f32x4, c_y: f32x4, max_iter: u32) -> u32x4 {
|
|
- let mut x = c_x;
|
|
- let mut y = c_y;
|
|
-
|
|
- let mut count = u32x4::splat(0);
|
|
- for _ in 0..max_iter as usize {
|
|
- let xy = x * y;
|
|
- let xx = x * x;
|
|
- let yy = y * y;
|
|
- let sum = xx + yy;
|
|
- let mask = sum.lt(f32x4::splat(4.0));
|
|
-
|
|
- if !mask.any() { break }
|
|
- count = count + mask.to_i().select(u32x4::splat(1), u32x4::splat(0));
|
|
-
|
|
- x = xx - yy + c_x;
|
|
- y = xy + xy + c_y;
|
|
- }
|
|
- count
|
|
-}
|
|
-
|
|
-#[cfg(target_feature = "avx")]
|
|
-fn simd8(c_x: f32x8, c_y: f32x8, max_iter: u32) -> u32x8 {
|
|
- let mut x = c_x;
|
|
- let mut y = c_y;
|
|
-
|
|
- let mut count = u32x8::splat(0);
|
|
- for _ in 0..max_iter as usize {
|
|
- let xy = x * y;
|
|
- let xx = x * x;
|
|
- let yy = y * y;
|
|
- let sum = xx + yy;
|
|
- let mask = sum.lt(f32x8::splat(4.0));
|
|
-
|
|
- if !mask.any() { break }
|
|
- count = count + mask.to_i().select(u32x8::splat(1), u32x8::splat(0));
|
|
-
|
|
- x = xx - yy + c_x;
|
|
- y = xy + xy + c_y;
|
|
- }
|
|
- count
|
|
-}
|
|
-
|
|
-const SCALE: f32 = 3.0 / 100.0;
|
|
-const N: u32 = 100;
|
|
-#[bench]
|
|
-fn mandel_naive(b: &mut B) {
|
|
- b.iter(|| {
|
|
- for j in 0..100 {
|
|
- let y = -1.5 + (j as f32) * SCALE;
|
|
- for i in 0..100 {
|
|
- let x = -2.2 + (i as f32) * SCALE;
|
|
- bb(naive(x, y, N));
|
|
- }
|
|
- }
|
|
- })
|
|
-}
|
|
-#[bench]
|
|
-fn mandel_simd4(b: &mut B) {
|
|
- let tweak = u32x4::new(0, 1, 2, 3);
|
|
- b.iter(|| {
|
|
- for j in 0..100 {
|
|
- let y = f32x4::splat(-1.5) + f32x4::splat(SCALE) * u32x4::splat(j).to_f32();
|
|
- for i in 0..25 {
|
|
- let i = u32x4::splat(i * 4) + tweak;
|
|
- let x = f32x4::splat(-2.2) + f32x4::splat(SCALE) * i.to_f32();
|
|
- bb(simd4(x, y, N));
|
|
- }
|
|
- }
|
|
- })
|
|
-}
|
|
-#[cfg(any(target_feature = "avx", target_feature = "avx2"))]
|
|
-#[bench]
|
|
-fn mandel_simd8(b: &mut B) {
|
|
- let tweak = u32x8::new(0, 1, 2, 3, 4, 5, 6, 7);
|
|
- b.iter(|| {
|
|
- for j in 0..100 {
|
|
- let y = f32x8::splat(-1.5) + f32x8::splat(SCALE) * u32x8::splat(j).to_f32();
|
|
- for i in 0..13 { // 100 not divisible by 8 :(
|
|
- let i = u32x8::splat(i * 8) + tweak;
|
|
- let x = f32x8::splat(-2.2) + f32x8::splat(SCALE) * i.to_f32();
|
|
- bb(simd8(x, y, N));
|
|
- }
|
|
- }
|
|
- })
|
|
-}
|
|
diff --git a/third_party/rust/simd/benches/matrix.rs b/third_party/rust/simd/benches/matrix.rs
|
|
deleted file mode 100755
|
|
index 36aa88237492..000000000000
|
|
--- a/third_party/rust/simd/benches/matrix.rs
|
|
+++ /dev/null
|
|
@@ -1,485 +0,0 @@
|
|
-#![feature(test)]
|
|
-#![feature(cfg_target_feature)]
|
|
-extern crate test;
|
|
-extern crate simd;
|
|
-
|
|
-use test::black_box as bb;
|
|
-use test::Bencher as B;
|
|
-use simd::f32x4;
|
|
-#[cfg(target_feature = "avx")]
|
|
-use simd::x86::avx::{f32x8, f64x4};
|
|
-// #[cfg(target_feature = "avx2")]
|
|
-// use simd::x86::avx2::Avx2F32x8;
|
|
-
|
|
-
|
|
-#[bench]
|
|
-fn multiply_naive(b: &mut B) {
|
|
- let x = [[1.0_f32; 4]; 4];
|
|
- let y = [[2.0; 4]; 4];
|
|
- b.iter(|| {
|
|
- for _ in 0..100 {
|
|
- let (x, y) = bb((&x, &y));
|
|
-
|
|
- bb(&[[x[0][0] * y[0][0] + x[1][0] * y[0][1] + x[2][0] * y[0][2] + x[3][0] * y[0][3],
|
|
- x[0][1] * y[0][0] + x[1][1] * y[0][1] + x[2][1] * y[0][2] + x[3][1] * y[0][3],
|
|
- x[0][2] * y[0][0] + x[1][2] * y[0][1] + x[2][2] * y[0][2] + x[3][2] * y[0][3],
|
|
- x[0][3] * y[0][0] + x[1][3] * y[0][1] + x[2][3] * y[0][2] + x[3][3] * y[0][3]],
|
|
- [x[0][0] * y[1][0] + x[1][0] * y[1][1] + x[2][0] * y[1][2] + x[3][0] * y[1][3],
|
|
- x[0][1] * y[1][0] + x[1][1] * y[1][1] + x[2][1] * y[1][2] + x[3][1] * y[1][3],
|
|
- x[0][2] * y[1][0] + x[1][2] * y[1][1] + x[2][2] * y[1][2] + x[3][2] * y[1][3],
|
|
- x[0][3] * y[1][0] + x[1][3] * y[1][1] + x[2][3] * y[1][2] + x[3][3] * y[1][3]],
|
|
- [x[0][0] * y[2][0] + x[1][0] * y[2][1] + x[2][0] * y[2][2] + x[3][0] * y[2][3],
|
|
- x[0][1] * y[2][0] + x[1][1] * y[2][1] + x[2][1] * y[2][2] + x[3][1] * y[2][3],
|
|
- x[0][2] * y[2][0] + x[1][2] * y[2][1] + x[2][2] * y[2][2] + x[3][2] * y[2][3],
|
|
- x[0][3] * y[2][0] + x[1][3] * y[2][1] + x[2][3] * y[2][2] + x[3][3] * y[2][3]],
|
|
- [x[0][0] * y[3][0] + x[1][0] * y[3][1] + x[2][0] * y[3][2] + x[3][0] * y[3][3],
|
|
- x[0][1] * y[3][0] + x[1][1] * y[3][1] + x[2][1] * y[3][2] + x[3][1] * y[3][3],
|
|
- x[0][2] * y[3][0] + x[1][2] * y[3][1] + x[2][2] * y[3][2] + x[3][2] * y[3][3],
|
|
- x[0][3] * y[3][0] + x[1][3] * y[3][1] + x[2][3] * y[3][2] + x[3][3] * y[3][3]],
|
|
- ]);
|
|
- }
|
|
- })
|
|
-}
|
|
-
|
|
-#[bench]
|
|
-fn multiply_simd4_32(b: &mut B) {
|
|
- let x = [f32x4::splat(1.0_f32); 4];
|
|
- let y = [f32x4::splat(2.0); 4];
|
|
- b.iter(|| {
|
|
- for _ in 0..100 {
|
|
- let (x, y) = bb((&x, &y));
|
|
-
|
|
- let y0 = y[0];
|
|
- let y1 = y[1];
|
|
- let y2 = y[2];
|
|
- let y3 = y[3];
|
|
- bb(&[f32x4::splat(y0.extract(0)) * x[0] +
|
|
- f32x4::splat(y0.extract(1)) * x[1] +
|
|
- f32x4::splat(y0.extract(2)) * x[2] +
|
|
- f32x4::splat(y0.extract(3)) * x[3],
|
|
- f32x4::splat(y1.extract(0)) * x[0] +
|
|
- f32x4::splat(y1.extract(1)) * x[1] +
|
|
- f32x4::splat(y1.extract(2)) * x[2] +
|
|
- f32x4::splat(y1.extract(3)) * x[3],
|
|
- f32x4::splat(y2.extract(0)) * x[0] +
|
|
- f32x4::splat(y2.extract(1)) * x[1] +
|
|
- f32x4::splat(y2.extract(2)) * x[2] +
|
|
- f32x4::splat(y2.extract(3)) * x[3],
|
|
- f32x4::splat(y3.extract(0)) * x[0] +
|
|
- f32x4::splat(y3.extract(1)) * x[1] +
|
|
- f32x4::splat(y3.extract(2)) * x[2] +
|
|
- f32x4::splat(y3.extract(3)) * x[3],
|
|
- ]);
|
|
- }
|
|
- })
|
|
-}
|
|
-
|
|
-#[cfg(target_feature = "avx")]
|
|
-#[bench]
|
|
-fn multiply_simd4_64(b: &mut B) {
|
|
- let x = [f64x4::splat(1.0_f64); 4];
|
|
- let y = [f64x4::splat(2.0); 4];
|
|
- b.iter(|| {
|
|
- for _ in 0..100 {
|
|
- let (x, y) = bb((&x, &y));
|
|
-
|
|
- let y0 = y[0];
|
|
- let y1 = y[1];
|
|
- let y2 = y[2];
|
|
- let y3 = y[3];
|
|
- bb(&[f64x4::splat(y0.extract(0)) * x[0] +
|
|
- f64x4::splat(y0.extract(1)) * x[1] +
|
|
- f64x4::splat(y0.extract(2)) * x[2] +
|
|
- f64x4::splat(y0.extract(3)) * x[3],
|
|
- f64x4::splat(y1.extract(0)) * x[0] +
|
|
- f64x4::splat(y1.extract(1)) * x[1] +
|
|
- f64x4::splat(y1.extract(2)) * x[2] +
|
|
- f64x4::splat(y1.extract(3)) * x[3],
|
|
- f64x4::splat(y2.extract(0)) * x[0] +
|
|
- f64x4::splat(y2.extract(1)) * x[1] +
|
|
- f64x4::splat(y2.extract(2)) * x[2] +
|
|
- f64x4::splat(y2.extract(3)) * x[3],
|
|
- f64x4::splat(y3.extract(0)) * x[0] +
|
|
- f64x4::splat(y3.extract(1)) * x[1] +
|
|
- f64x4::splat(y3.extract(2)) * x[2] +
|
|
- f64x4::splat(y3.extract(3)) * x[3],
|
|
- ]);
|
|
- }
|
|
- })
|
|
-}
|
|
-
|
|
-#[bench]
|
|
-fn inverse_naive(b: &mut B) {
|
|
- let mut x = [[0_f32; 4]; 4];
|
|
- for i in 0..4 { x[i][i] = 1.0 }
|
|
-
|
|
- b.iter(|| {
|
|
- for _ in 0..100 {
|
|
- let x = bb(&x);
|
|
-
|
|
- let mut t = [[0_f32; 4]; 4];
|
|
- for i in 0..4 {
|
|
- t[0][i] = x[i][0];
|
|
- t[1][i] = x[i][1];
|
|
- t[2][i] = x[i][2];
|
|
- t[3][i] = x[i][3];
|
|
- }
|
|
-
|
|
- let _0 = t[2][2] * t[3][3];
|
|
- let _1 = t[2][3] * t[3][2];
|
|
- let _2 = t[2][1] * t[3][3];
|
|
- let _3 = t[2][3] * t[3][1];
|
|
- let _4 = t[2][1] * t[3][2];
|
|
- let _5 = t[2][2] * t[3][1];
|
|
- let _6 = t[2][0] * t[3][3];
|
|
- let _7 = t[2][3] * t[3][0];
|
|
- let _8 = t[2][0] * t[3][2];
|
|
- let _9 = t[2][2] * t[3][0];
|
|
- let _10 = t[2][0] * t[3][1];
|
|
- let _11 = t[2][1] * t[3][0];
|
|
-
|
|
- let d00 = _0 * t[1][1] + _3 * t[1][2] + _4 * t[1][3] -
|
|
- (_1 * t[1][1] + _2 * t[1][2] + _5 * t[1][3]);
|
|
- let d01 = _1 * t[1][0] + _6 * t[1][2] + _9 * t[1][3] -
|
|
- (_0 * t[1][0] + _7 * t[1][2] + _8 * t[1][3]);
|
|
- let d02 = _2 * t[1][0] + _7 * t[1][1] + _10 * t[1][3] -
|
|
- (_3 * t[1][0] + _6 * t[1][1] + _11 * t[1][3]);
|
|
- let d03 = _5 * t[1][0] + _8 * t[1][1] + _11 * t[1][2] -
|
|
- (_4 * t[1][0] + _9 * t[1][1] + _10 * t[1][2]);
|
|
- let d10 = _1 * t[0][1] + _2 * t[0][2] + _5 * t[0][3] -
|
|
- (_0 * t[0][1] + _3 * t[0][2] + _4 * t[0][3]);
|
|
- let d11 = _0 * t[0][0] + _7 * t[0][2] + _8 * t[0][3] -
|
|
- (_1 * t[0][0] + _6 * t[0][2] + _9 * t[0][3]);
|
|
- let d12 = _3 * t[0][0] + _6 * t[0][1] + _11 * t[0][3] -
|
|
- (_2 * t[0][0] + _7 * t[0][1] + _10 * t[0][3]);
|
|
- let d13 = _4 * t[0][0] + _9 * t[0][1] + _10 * t[0][2] -
|
|
- (_5 * t[0][0] + _8 * t[0][1] + _11 * t[0][2]);
|
|
-
|
|
- let _0 = t[0][2] * t[1][3];
|
|
- let _1 = t[0][3] * t[1][2];
|
|
- let _2 = t[0][1] * t[1][3];
|
|
- let _3 = t[0][3] * t[1][1];
|
|
- let _4 = t[0][1] * t[1][2];
|
|
- let _5 = t[0][2] * t[1][1];
|
|
- let _6 = t[0][0] * t[1][3];
|
|
- let _7 = t[0][3] * t[1][0];
|
|
- let _8 = t[0][0] * t[1][2];
|
|
- let _9 = t[0][2] * t[1][0];
|
|
- let _10 = t[0][0] * t[1][1];
|
|
- let _11 = t[0][1] * t[1][0];
|
|
-
|
|
- let d20 = _0*t[3][1] + _3*t[3][2] + _4*t[3][3]-
|
|
- (_1*t[3][1] + _2*t[3][2] + _5*t[3][3]);
|
|
- let d21 = _1*t[3][0] + _6*t[3][2] + _9*t[3][3]-
|
|
- (_0*t[3][0] + _7*t[3][2] + _8*t[3][3]);
|
|
- let d22 = _2*t[3][0] + _7*t[3][1] + _10*t[3][3]-
|
|
- (_3*t[3][0] + _6*t[3][1] + _11*t[3][3]);
|
|
- let d23 = _5*t[3][0] + _8*t[3][1] + _11*t[3][2]-
|
|
- (_4*t[3][0] + _9*t[3][1] + _10*t[3][2]);
|
|
- let d30 = _2*t[2][2] + _5*t[2][3] + _1*t[2][1]-
|
|
- (_4*t[2][3] + _0*t[2][1] + _3*t[2][2]);
|
|
- let d31 = _8*t[2][3] + _0*t[2][0] + _7*t[2][2]-
|
|
- (_6*t[2][2] + _9*t[2][3] + _1*t[2][0]);
|
|
- let d32 = _6*t[2][1] + _11*t[2][3] + _3*t[2][0]-
|
|
- (_10*t[2][3] + _2*t[2][0] + _7*t[2][1]);
|
|
- let d33 = _10*t[2][2] + _4*t[2][0] + _9*t[2][1]-
|
|
- (_8*t[2][1] + _11*t[2][2] + _5*t[2][0]);
|
|
-
|
|
- let det = t[0][0] * d00 + t[0][1] * d01 + t[0][2] * d02 + t[0][3] * d03;
|
|
-
|
|
- let det = 1.0 / det;
|
|
- let mut ret = [[d00, d01, d02, d03],
|
|
- [d10, d11, d12, d13],
|
|
- [d20, d21, d22, d23],
|
|
- [d30, d31, d32, d33]];
|
|
- for i in 0..4 {
|
|
- for j in 0..4 {
|
|
- ret[i][j] *= det;
|
|
- }
|
|
- }
|
|
- bb(&ret);
|
|
- }
|
|
- })
|
|
-}
|
|
-
|
|
-#[bench]
|
|
-fn inverse_simd4(b: &mut B) {
|
|
- let mut x = [f32x4::splat(0_f32); 4];
|
|
- for i in 0..4 { x[i] = x[i].replace(i as u32, 1.0); }
|
|
-
|
|
- fn shuf0145(v: f32x4, w: f32x4) -> f32x4 {
|
|
- f32x4::new(v.extract(0), v.extract(1),
|
|
- w.extract(4 - 4), w.extract(5 - 4))
|
|
- }
|
|
- fn shuf0246(v: f32x4, w: f32x4) -> f32x4 {
|
|
- f32x4::new(v.extract(0), v.extract(2),
|
|
- w.extract(4 - 4), w.extract(6 - 4))
|
|
- }
|
|
- fn shuf1357(v: f32x4, w: f32x4) -> f32x4 {
|
|
- f32x4::new(v.extract(1), v.extract(3),
|
|
- w.extract(5 - 4), w.extract(7 - 4))
|
|
- }
|
|
- fn shuf2367(v: f32x4, w: f32x4) -> f32x4 {
|
|
- f32x4::new(v.extract(2), v.extract(3),
|
|
- w.extract(6 - 4), w.extract(7 - 4))
|
|
- }
|
|
-
|
|
- fn swiz1032(v: f32x4) -> f32x4 {
|
|
- f32x4::new(v.extract(1), v.extract(0),
|
|
- v.extract(3), v.extract(2))
|
|
- }
|
|
- fn swiz2301(v: f32x4) -> f32x4 {
|
|
- f32x4::new(v.extract(2), v.extract(3),
|
|
- v.extract(0), v.extract(1))
|
|
- }
|
|
-
|
|
- b.iter(|| {
|
|
- for _ in 0..100 {
|
|
- let src0;
|
|
- let src1;
|
|
- let src2;
|
|
- let src3;
|
|
- let mut tmp1;
|
|
- let row0;
|
|
- let mut row1;
|
|
- let mut row2;
|
|
- let mut row3;
|
|
- let mut minor0;
|
|
- let mut minor1;
|
|
- let mut minor2;
|
|
- let mut minor3;
|
|
- let mut det;
|
|
-
|
|
- let x = bb(&x);
|
|
- src0 = x[0];
|
|
- src1 = x[1];
|
|
- src2 = x[2];
|
|
- src3 = x[3];
|
|
-
|
|
- tmp1 = shuf0145(src0, src1);
|
|
- row1 = shuf0145(src2, src3);
|
|
- row0 = shuf0246(tmp1, row1);
|
|
- row1 = shuf1357(row1, tmp1);
|
|
-
|
|
- tmp1 = shuf2367(src0, src1);
|
|
- row3 = shuf2367(src2, src3);
|
|
- row2 = shuf0246(tmp1, row3);
|
|
- row3 = shuf0246(row3, tmp1);
|
|
-
|
|
-
|
|
- tmp1 = row2 * row3;
|
|
- tmp1 = swiz1032(tmp1);
|
|
- minor0 = row1 * tmp1;
|
|
- minor1 = row0 * tmp1;
|
|
- tmp1 = swiz2301(tmp1);
|
|
- minor0 = (row1 * tmp1) - minor0;
|
|
- minor1 = (row0 * tmp1) - minor1;
|
|
- minor1 = swiz2301(minor1);
|
|
-
|
|
-
|
|
- tmp1 = row1 * row2;
|
|
- tmp1 = swiz1032(tmp1);
|
|
- minor0 = (row3 * tmp1) + minor0;
|
|
- minor3 = row0 * tmp1;
|
|
- tmp1 = swiz2301(tmp1);
|
|
-
|
|
- minor0 = minor0 - row3 * tmp1;
|
|
- minor3 = row0 * tmp1 - minor3;
|
|
- minor3 = swiz2301(minor3);
|
|
-
|
|
-
|
|
- tmp1 = row3 * swiz2301(row1);
|
|
- tmp1 = swiz1032(tmp1);
|
|
- row2 = swiz2301(row2);
|
|
- minor0 = row2 * tmp1 + minor0;
|
|
- minor2 = row0 * tmp1;
|
|
- tmp1 = swiz2301(tmp1);
|
|
- minor0 = minor0 - row2 * tmp1;
|
|
- minor2 = row0 * tmp1 - minor2;
|
|
- minor2 = swiz2301(minor2);
|
|
-
|
|
-
|
|
- tmp1 = row0 * row1;
|
|
- tmp1 = swiz1032(tmp1);
|
|
- minor2 = minor2 + row3 * tmp1;
|
|
- minor3 = row2 * tmp1 - minor3;
|
|
- tmp1 = swiz2301(tmp1);
|
|
- minor2 = row3 * tmp1 - minor2;
|
|
- minor3 = minor3 - row2 * tmp1;
|
|
-
|
|
-
|
|
-
|
|
- tmp1 = row0 * row3;
|
|
- tmp1 = swiz1032(tmp1);
|
|
- minor1 = minor1 - row2 * tmp1;
|
|
- minor2 = row1 * tmp1 + minor2;
|
|
- tmp1 = swiz2301(tmp1);
|
|
- minor1 = row2 * tmp1 + minor1;
|
|
- minor2 = minor2 - row1 * tmp1;
|
|
-
|
|
- tmp1 = row0 * row2;
|
|
- tmp1 = swiz1032(tmp1);
|
|
- minor1 = row3 * tmp1 + minor1;
|
|
- minor3 = minor3 - row1 * tmp1;
|
|
- tmp1 = swiz2301(tmp1);
|
|
- minor1 = minor1 - row3 * tmp1;
|
|
- minor3 = row1 * tmp1 + minor3;
|
|
-
|
|
- det = row0 * minor0;
|
|
- det = swiz2301(det) + det;
|
|
- det = swiz1032(det) + det;
|
|
- //tmp1 = det.approx_reciprocal(); det = tmp1 * (f32x4::splat(2.0) - det * tmp1);
|
|
- det = f32x4::splat(1.0) / det;
|
|
-
|
|
- bb(&[minor0 * det, minor1 * det, minor2 * det, minor3 * det]);
|
|
- }
|
|
- })
|
|
-
|
|
-}
|
|
-
|
|
-#[bench]
|
|
-fn transpose_naive(b: &mut B) {
|
|
- let x = [[0_f32; 4]; 4];
|
|
- b.iter(|| {
|
|
- for _ in 0..100 {
|
|
- let x = bb(&x);
|
|
- bb(&[[x[0][0], x[1][0], x[2][0], x[3][0]],
|
|
- [x[0][1], x[1][1], x[2][1], x[3][1]],
|
|
- [x[0][2], x[1][2], x[2][2], x[3][2]],
|
|
- [x[0][3], x[1][3], x[2][3], x[3][3]]]);
|
|
- }
|
|
- })
|
|
-}
|
|
-
|
|
-#[bench]
|
|
-fn transpose_simd4(b: &mut B) {
|
|
- let x = [f32x4::splat(0_f32); 4];
|
|
-
|
|
- fn shuf0246(v: f32x4, w: f32x4) -> f32x4 {
|
|
- f32x4::new(v.extract(0), v.extract(2),
|
|
- w.extract(4 - 4), w.extract(6 - 4))
|
|
- }
|
|
- fn shuf1357(v: f32x4, w: f32x4) -> f32x4 {
|
|
- f32x4::new(v.extract(1), v.extract(3),
|
|
- w.extract(5 - 4), w.extract(7 - 4))
|
|
- }
|
|
- b.iter(|| {
|
|
- for _ in 0..100 {
|
|
- let x = bb(&x);
|
|
- let x0 = x[0];
|
|
- let x1 = x[1];
|
|
- let x2 = x[2];
|
|
- let x3 = x[3];
|
|
-
|
|
- let a0 = shuf0246(x0, x1);
|
|
- let a1 = shuf0246(x2, x3);
|
|
- let a2 = shuf1357(x0, x1);
|
|
- let a3 = shuf1357(x2, x3);
|
|
-
|
|
- let b0 = shuf0246(a0, a1);
|
|
- let b1 = shuf0246(a2, a3);
|
|
- let b2 = shuf1357(a0, a1);
|
|
- let b3 = shuf1357(a2, a3);
|
|
- bb(&[b0, b1, b2, b3]);
|
|
- }
|
|
- })
|
|
-}
|
|
-
|
|
-#[cfg(target_feature = "avx")]
|
|
-#[bench]
|
|
-fn transpose_simd8_naive(b: &mut B) {
|
|
- let x = [f32x8::splat(0_f32); 2];
|
|
-
|
|
- fn shuf0246(v: f32x8, w: f32x8) -> f32x8 {
|
|
- f32x8::new(v.extract(0), v.extract(2), v.extract(4), v.extract(6),
|
|
- w.extract(0), w.extract(2), w.extract(4), w.extract(6))
|
|
- }
|
|
- fn shuf1357(v: f32x8, w: f32x8) -> f32x8 {
|
|
- f32x8::new(v.extract(1), v.extract(3), v.extract(5), v.extract(7),
|
|
- w.extract(1), w.extract(3), w.extract(5), w.extract(7),)
|
|
- }
|
|
- b.iter(|| {
|
|
- for _ in 0..100 {
|
|
- let x = bb(&x);
|
|
- let x01 = x[0];
|
|
- let x23 = x[1];
|
|
-
|
|
- let a01 = shuf0246(x01, x23);
|
|
- let a23 = shuf1357(x01, x23);
|
|
-
|
|
- let b01 = shuf0246(a01, a23);
|
|
- let b23 = shuf1357(a01, a23);
|
|
- bb(&[b01, b23]);
|
|
- }
|
|
- })
|
|
-}
|
|
-
|
|
-#[cfg(target_feature = "avx")]
|
|
-#[bench]
|
|
-fn transpose_simd8_avx2_vpermps(b: &mut B) {
|
|
- let x = [f32x8::splat(0_f32); 2];
|
|
-
|
|
- // efficient on AVX2 using vpermps
|
|
- fn perm04152637(v: f32x8) -> f32x8 {
|
|
- // broken on rustc 1.7.0-nightly (1ddaf8bdf 2015-12-12)
|
|
- // v.permutevar(i32x8::new(0, 4, 1, 5, 2, 6, 3, 7))
|
|
- f32x8::new(v.extract(0), v.extract(4), v.extract(1), v.extract(5),
|
|
- v.extract(2), v.extract(6), v.extract(3), v.extract(7))
|
|
- }
|
|
- fn shuf_lo(v: f32x8, w: f32x8) -> f32x8 {
|
|
- f32x8::new(v.extract(0), v.extract(1), w.extract(0), w.extract(1),
|
|
- v.extract(4), v.extract(5), w.extract(4), w.extract(5),)
|
|
- }
|
|
- fn shuf_hi(v: f32x8, w: f32x8) -> f32x8 {
|
|
- f32x8::new(v.extract(2), v.extract(3), w.extract(2), w.extract(3),
|
|
- v.extract(6), v.extract(7), w.extract(6), w.extract(7),)
|
|
- }
|
|
- b.iter(|| {
|
|
- for _ in 0..100 {
|
|
- let x = bb(&x);
|
|
- let x01 = x[0];
|
|
- let x23 = x[1];
|
|
-
|
|
- let a01 = perm04152637(x01);
|
|
- let a23 = perm04152637(x23);
|
|
-
|
|
- let b01 = shuf_lo(a01, a23);
|
|
- let b23 = shuf_hi(a01, a23);
|
|
- bb(&[b01, b23]);
|
|
- }
|
|
- })
|
|
-}
|
|
-
|
|
-#[cfg(target_feature = "avx")]
|
|
-#[bench]
|
|
-fn transpose_simd8_avx2_vpermpd(b: &mut B) {
|
|
- let x = [f32x8::splat(0_f32); 2];
|
|
-
|
|
- // efficient on AVX2 using vpermpd
|
|
- fn perm01452367(v: f32x8) -> f32x8 {
|
|
- f32x8::new(v.extract(0), v.extract(1), v.extract(4), v.extract(5),
|
|
- v.extract(2), v.extract(3), v.extract(6), v.extract(7))
|
|
- }
|
|
- fn shuf_lo_ps(v: f32x8, w: f32x8) -> f32x8 {
|
|
- f32x8::new(v.extract(0), w.extract(0), v.extract(1), w.extract(1),
|
|
- v.extract(4), w.extract(4), v.extract(5), w.extract(5),)
|
|
- }
|
|
- fn shuf_hi_ps(v: f32x8, w: f32x8) -> f32x8 {
|
|
- f32x8::new(v.extract(2), w.extract(2), v.extract(3), w.extract(3),
|
|
- v.extract(6), w.extract(6), v.extract(7), w.extract(7),)
|
|
- }
|
|
- b.iter(|| {
|
|
- for _ in 0..100 {
|
|
- let x = bb(&x);
|
|
- let x01 = x[0];
|
|
- let x23 = x[1];
|
|
-
|
|
- let a01 = perm01452367(x01);
|
|
- let a23 = perm01452367(x23);
|
|
-
|
|
- let b01 = shuf_lo_ps(a01, a23);
|
|
- let b23 = shuf_hi_ps(a01, a23);
|
|
- bb(&[b01, b23]);
|
|
- }
|
|
- })
|
|
-}
|
|
diff --git a/third_party/rust/simd/build.rs b/third_party/rust/simd/build.rs
|
|
deleted file mode 100644
|
|
index 61b5330a1846..000000000000
|
|
--- a/third_party/rust/simd/build.rs
|
|
+++ /dev/null
|
|
@@ -1,3 +0,0 @@
|
|
-fn main() {
|
|
- println!("cargo:rustc-env=RUSTC_BOOTSTRAP=1");
|
|
-}
|
|
diff --git a/third_party/rust/simd/examples/axpy.rs b/third_party/rust/simd/examples/axpy.rs
|
|
deleted file mode 100755
|
|
index 7862721b254d..000000000000
|
|
--- a/third_party/rust/simd/examples/axpy.rs
|
|
+++ /dev/null
|
|
@@ -1,65 +0,0 @@
|
|
-#![feature(cfg_target_feature)]
|
|
-extern crate simd;
|
|
-use simd::f32x4;
|
|
-#[cfg(target_feature = "avx")]
|
|
-use simd::x86::avx::f32x8;
|
|
-
|
|
-#[inline(never)]
|
|
-pub fn axpy(z: &mut [f32], a: f32, x: &[f32], y: &[f32]) {
|
|
- assert_eq!(x.len(), y.len());
|
|
- assert_eq!(x.len(), z.len());
|
|
-
|
|
- let len = std::cmp::min(std::cmp::min(x.len(), y.len()), z.len());
|
|
-
|
|
- let mut i = 0;
|
|
- while i < len & !3 {
|
|
- let x = f32x4::load(x, i);
|
|
- let y = f32x4::load(y, i);
|
|
- (f32x4::splat(a) * x + y).store(z, i);
|
|
- i += 4
|
|
- }
|
|
-}
|
|
-
|
|
-#[cfg(target_feature = "avx")]
|
|
-#[inline(never)]
|
|
-pub fn axpy8(z: &mut [f32], a: f32, x: &[f32], y: &[f32]) {
|
|
- assert_eq!(x.len(), y.len());
|
|
- assert_eq!(x.len(), z.len());
|
|
-
|
|
- let len = std::cmp::min(std::cmp::min(x.len(), y.len()), z.len());
|
|
-
|
|
- let mut i = 0;
|
|
- while i < len & !7 {
|
|
- let x = f32x8::load(x, i);
|
|
- let y = f32x8::load(y, i);
|
|
- (f32x8::splat(a) * x + y).store(z, i);
|
|
- i += 8
|
|
- }
|
|
-}
|
|
-
|
|
-
|
|
-#[cfg(not(target_feature = "avx"))]
|
|
-pub fn axpy8(_: &mut [f32], _: f32, _: &[f32], _: &[f32]) {
|
|
- unimplemented!()
|
|
-}
|
|
-
|
|
-
|
|
-fn main() {
|
|
- let mut z = vec![0.; 4];
|
|
- axpy(&mut z, 2., &[1.0, 3.0, 5.0, 7.0], &[2.0, 4.0, 6.0, 8.0]);
|
|
- println!("{:?}", z);
|
|
- let mut z = vec![0.; 8];
|
|
- axpy(&mut z, 3., &[1.0, 3.0, 6.0, 7.0, 10.0, 6.0, 3.0, 2.0],
|
|
- &[2.0, 4.0, 6.0, 8.0, 2.0, 4.0, 6.0, 8.0]);
|
|
- println!("{:?}", z);
|
|
-
|
|
- if cfg!(target_feature = "avx") {
|
|
- let mut z = vec![0.; 4];
|
|
- axpy8(&mut z, 2., &[1.0, 3.0, 5.0, 7.0], &[2.0, 4.0, 6.0, 8.0]);
|
|
- println!("{:?}", z);
|
|
- let mut z = vec![0.; 8];
|
|
- axpy8(&mut z, 3., &[1.0, 3.0, 6.0, 7.0, 10.0, 6.0, 3.0, 2.0],
|
|
- &[2.0, 4.0, 6.0, 8.0, 2.0, 4.0, 6.0, 8.0]);
|
|
- println!("{:?}", z);
|
|
- }
|
|
-}
|
|
diff --git a/third_party/rust/simd/examples/convert.rs b/third_party/rust/simd/examples/convert.rs
|
|
deleted file mode 100644
|
|
index 11823a4b50d2..000000000000
|
|
--- a/third_party/rust/simd/examples/convert.rs
|
|
+++ /dev/null
|
|
@@ -1,38 +0,0 @@
|
|
-extern crate simd;
|
|
-use simd::f32x4;
|
|
-
|
|
-#[inline(never)]
|
|
-pub fn convert_scalar(x: &mut [i32], y: &[f32]) {
|
|
- assert_eq!(x.len(), y.len());
|
|
-
|
|
- let mut i = 0;
|
|
- while i < x.len() & !3 {
|
|
- x[i] = y[i] as i32;
|
|
- i += 1;
|
|
- }
|
|
-}
|
|
-
|
|
-#[inline(never)]
|
|
-pub fn convert(x: &mut [i32], y: &[f32]) {
|
|
- assert_eq!(x.len(), y.len());
|
|
-
|
|
- let mut i = 0;
|
|
- while i < x.len() & !3 {
|
|
- let v = f32x4::load(y, i);
|
|
- v.to_i32().store(x, i);
|
|
- i += 4
|
|
- }
|
|
-}
|
|
-
|
|
-fn main() {
|
|
- let x = &mut [0; 12];
|
|
- let y = [1.0; 12];
|
|
- convert(x, &y);
|
|
- convert_scalar(x, &y);
|
|
- println!("{:?}", x);
|
|
- let x = &mut [0; 16];
|
|
- let y = [1.0; 16];
|
|
- convert(x, &y);
|
|
- convert_scalar(x, &y);
|
|
- println!("{:?}", x);
|
|
-}
|
|
diff --git a/third_party/rust/simd/examples/dot-product.rs b/third_party/rust/simd/examples/dot-product.rs
|
|
deleted file mode 100755
|
|
index 9f0e1d35c799..000000000000
|
|
--- a/third_party/rust/simd/examples/dot-product.rs
|
|
+++ /dev/null
|
|
@@ -1,60 +0,0 @@
|
|
-#![feature(cfg_target_feature)]
|
|
-extern crate simd;
|
|
-use simd::f32x4;
|
|
-#[cfg(target_feature = "avx")]
|
|
-use simd::x86::avx::{f32x8, LowHigh128};
|
|
-
|
|
-#[inline(never)]
|
|
-pub fn dot(x: &[f32], y: &[f32]) -> f32 {
|
|
- assert_eq!(x.len(), y.len());
|
|
-
|
|
- let len = std::cmp::min(x.len(), y.len());
|
|
-
|
|
- let mut sum = f32x4::splat(0.0);
|
|
- let mut i = 0;
|
|
- while i < len & !3 {
|
|
- let x = f32x4::load(x, i);
|
|
- let y = f32x4::load(y, i);
|
|
- sum = sum + x * y;
|
|
- i += 4
|
|
- }
|
|
- sum.extract(0) + sum.extract(1) + sum.extract(2) + sum.extract(3)
|
|
-}
|
|
-
|
|
-#[cfg(target_feature = "avx")]
|
|
-#[inline(never)]
|
|
-pub fn dot8(x: &[f32], y: &[f32]) -> f32 {
|
|
- assert_eq!(x.len(), y.len());
|
|
-
|
|
- let len = std::cmp::min(x.len(), y.len());
|
|
-
|
|
- let mut sum = f32x8::splat(0.0);
|
|
- let mut i = 0;
|
|
- while i < len & !7 {
|
|
- let x = f32x8::load(x, i);
|
|
- let y = f32x8::load(y, i);
|
|
- sum = sum + x * y;
|
|
- i += 8
|
|
- }
|
|
- let sum = sum.low() + sum.high();
|
|
- sum.extract(0) + sum.extract(1) + sum.extract(2) + sum.extract(3)
|
|
-}
|
|
-
|
|
-
|
|
-#[cfg(not(target_feature = "avx"))]
|
|
-pub fn dot8(_: &[f32], _: &[f32]) -> f32 {
|
|
- unimplemented!()
|
|
-}
|
|
-
|
|
-
|
|
-fn main() {
|
|
- println!("{}", dot(&[1.0, 3.0, 5.0, 7.0], &[2.0, 4.0, 6.0, 8.0]));
|
|
- println!("{}", dot(&[1.0, 3.0, 6.0, 7.0, 10.0, 6.0, 3.0, 2.0],
|
|
- &[2.0, 4.0, 6.0, 8.0, 2.0, 4.0, 6.0, 8.0]));
|
|
-
|
|
- if cfg!(target_feature = "avx") {
|
|
- println!("{}", dot8(&[1.0, 3.0, 5.0, 7.0], &[2.0, 4.0, 6.0, 8.0]));
|
|
- println!("{}", dot8(&[1.0, 3.0, 6.0, 7.0, 10.0, 6.0, 3.0, 2.0],
|
|
- &[2.0, 4.0, 6.0, 8.0, 2.0, 4.0, 6.0, 8.0]));
|
|
- }
|
|
-}
|
|
diff --git a/third_party/rust/simd/examples/fannkuch-redux-nosimd.rs b/third_party/rust/simd/examples/fannkuch-redux-nosimd.rs
|
|
deleted file mode 100644
|
|
index fa30b2283f93..000000000000
|
|
--- a/third_party/rust/simd/examples/fannkuch-redux-nosimd.rs
|
|
+++ /dev/null
|
|
@@ -1,156 +0,0 @@
|
|
-// The Computer Language Benchmarks Game
|
|
-// http://benchmarksgame.alioth.debian.org/
|
|
-//
|
|
-// contributed by the Rust Project Developers
|
|
-// contributed by TeXitoi
|
|
-
|
|
-use std::{cmp, mem};
|
|
-use std::thread;
|
|
-
|
|
-fn rotate(x: &mut [i32]) {
|
|
- let mut prev = x[0];
|
|
- for place in x.iter_mut().rev() {
|
|
- prev = mem::replace(place, prev)
|
|
- }
|
|
-}
|
|
-
|
|
-fn next_permutation(perm: &mut [i32], count: &mut [i32]) {
|
|
- for i in 1..perm.len() {
|
|
- rotate(&mut perm[.. i + 1]);
|
|
- let count_i = &mut count[i];
|
|
- if *count_i >= i as i32 {
|
|
- *count_i = 0;
|
|
- } else {
|
|
- *count_i += 1;
|
|
- break
|
|
- }
|
|
- }
|
|
-}
|
|
-
|
|
-#[derive(Clone, Copy)]
|
|
-struct P {
|
|
- p: [i32; 16],
|
|
-}
|
|
-
|
|
-#[derive(Clone, Copy)]
|
|
-struct Perm {
|
|
- cnt: [i32; 16],
|
|
- fact: [u32; 16],
|
|
- n: u32,
|
|
- permcount: u32,
|
|
- perm: P,
|
|
-}
|
|
-
|
|
-impl Perm {
|
|
- fn new(n: u32) -> Perm {
|
|
- let mut fact = [1; 16];
|
|
- for i in 1 .. n as usize + 1 {
|
|
- fact[i] = fact[i - 1] * i as u32;
|
|
- }
|
|
- Perm {
|
|
- cnt: [0; 16],
|
|
- fact: fact,
|
|
- n: n,
|
|
- permcount: 0,
|
|
- perm: P { p: [0; 16 ] }
|
|
- }
|
|
- }
|
|
-
|
|
- fn get(&mut self, mut idx: i32) -> P {
|
|
- let mut pp = [0u8; 16];
|
|
- self.permcount = idx as u32;
|
|
- for (i, place) in self.perm.p.iter_mut().enumerate() {
|
|
- *place = i as i32 + 1;
|
|
- }
|
|
-
|
|
- for i in (1 .. self.n as usize).rev() {
|
|
- let d = idx / self.fact[i] as i32;
|
|
- self.cnt[i] = d;
|
|
- idx %= self.fact[i] as i32;
|
|
- for (place, val) in pp.iter_mut().zip(self.perm.p[..(i+1)].iter()) {
|
|
- *place = (*val) as u8
|
|
- }
|
|
-
|
|
- let d = d as usize;
|
|
- for j in 0 .. i + 1 {
|
|
- self.perm.p[j] = if j + d <= i {pp[j + d]} else {pp[j+d-i-1]} as i32;
|
|
- }
|
|
- }
|
|
-
|
|
- self.perm
|
|
- }
|
|
-
|
|
- fn count(&self) -> u32 { self.permcount }
|
|
- fn max(&self) -> u32 { self.fact[self.n as usize] }
|
|
-
|
|
- fn next(&mut self) -> P {
|
|
- next_permutation(&mut self.perm.p, &mut self.cnt);
|
|
- self.permcount += 1;
|
|
-
|
|
- self.perm
|
|
- }
|
|
-}
|
|
-
|
|
-
|
|
-fn reverse(tperm: &mut [i32], k: usize) {
|
|
- tperm[..k].reverse()
|
|
-}
|
|
-
|
|
-fn work(mut perm: Perm, n: usize, max: usize) -> (i32, i32) {
|
|
- let mut checksum = 0;
|
|
- let mut maxflips = 0;
|
|
-
|
|
- let mut p = perm.get(n as i32);
|
|
-
|
|
- while perm.count() < max as u32 {
|
|
- let mut flips = 0;
|
|
-
|
|
- while p.p[0] != 1 {
|
|
- let k = p.p[0] as usize;
|
|
- reverse(&mut p.p, k);
|
|
- flips += 1;
|
|
- }
|
|
-
|
|
- checksum += if perm.count() % 2 == 0 {flips} else {-flips};
|
|
- maxflips = cmp::max(maxflips, flips);
|
|
-
|
|
- p = perm.next();
|
|
- }
|
|
-
|
|
- (checksum, maxflips)
|
|
-}
|
|
-
|
|
-fn fannkuch(n: i32) -> (i32, i32) {
|
|
- let perm = Perm::new(n as u32);
|
|
-
|
|
- let n = 1;
|
|
- let mut futures = vec![];
|
|
- let k = perm.max() / n;
|
|
-
|
|
- for j in (0..).map(|x| x * k).take_while(|&j| j < k * n) {
|
|
- let max = cmp::min(j+k, perm.max());
|
|
-
|
|
- futures.push(thread::spawn(move|| {
|
|
- work(perm, j as usize, max as usize)
|
|
- }))
|
|
- }
|
|
-
|
|
- let mut checksum = 0;
|
|
- let mut maxflips = 0;
|
|
- for fut in futures.into_iter() {
|
|
- let (cs, mf) = fut.join().unwrap();
|
|
- checksum += cs;
|
|
- maxflips = cmp::max(maxflips, mf);
|
|
- }
|
|
- (checksum, maxflips)
|
|
-}
|
|
-
|
|
-fn main() {
|
|
- let n = std::env::args_os().nth(1)
|
|
- .and_then(|s| s.into_string().ok())
|
|
- .and_then(|n| n.parse().ok())
|
|
- .unwrap_or(7);
|
|
-
|
|
- let (checksum, maxflips) = fannkuch(n);
|
|
- println!("{}\nPfannkuchen({}) = {}", checksum, n, maxflips);
|
|
-}
|
|
diff --git a/third_party/rust/simd/examples/fannkuch-redux.rs b/third_party/rust/simd/examples/fannkuch-redux.rs
|
|
deleted file mode 100755
|
|
index 2e52ae721135..000000000000
|
|
--- a/third_party/rust/simd/examples/fannkuch-redux.rs
|
|
+++ /dev/null
|
|
@@ -1,233 +0,0 @@
|
|
-#![feature(cfg_target_feature)]
|
|
-extern crate simd;
|
|
-#[macro_use] extern crate cfg_if;
|
|
-use simd::u8x16;
|
|
-
|
|
-use std::{env, process};
|
|
-
|
|
-cfg_if! {
|
|
- if #[cfg(target_arch = "aarch64")] {
|
|
- #[inline(always)]
|
|
- fn shuffle(x: u8x16, y: u8x16) -> u8x16 {
|
|
- use simd::aarch64::neon::*;
|
|
- y.table_lookup_1(x)
|
|
- }
|
|
- } else if #[cfg(all(target_arch = "arm",
|
|
- target_feature = "neon"))] {
|
|
- #[inline(always)]
|
|
- fn shuffle(x: u8x16, y: u8x16) -> u8x16 {
|
|
- use simd::arm::neon::*;
|
|
- #[inline(always)]
|
|
- fn split(x: u8x16) -> (u8x8, u8x8) {
|
|
- unsafe {std::mem::transmute(x)}
|
|
- }
|
|
- fn join(x: u8x8, y: u8x8) -> u8x16 {
|
|
- unsafe {std::mem::transmute((x, y))}
|
|
- }
|
|
-
|
|
- let (t0, t1) = split(x);
|
|
- let (i0, i1) = split(y);
|
|
- join(i0.table_lookup_2(t0, t1),
|
|
- i1.table_lookup_2(t0, t1))
|
|
- }
|
|
- } else if #[cfg(target_feature = "ssse3")] {
|
|
- #[inline(always)]
|
|
- fn shuffle(x: u8x16, y: u8x16) -> u8x16 {
|
|
- use simd::x86::ssse3::*;
|
|
- x.shuffle_bytes(y)
|
|
- }
|
|
- } else {
|
|
- // slow fallback, so tests work
|
|
- #[inline(always)]
|
|
- fn shuffle(x: u8x16, y: u8x16) -> u8x16 {
|
|
- u8x16::new(x.extract(y.extract(0) as u32),
|
|
- x.extract(y.extract(1) as u32),
|
|
- x.extract(y.extract(2) as u32),
|
|
- x.extract(y.extract(3) as u32),
|
|
- x.extract(y.extract(4) as u32),
|
|
- x.extract(y.extract(5) as u32),
|
|
- x.extract(y.extract(6) as u32),
|
|
- x.extract(y.extract(7) as u32),
|
|
- x.extract(y.extract(8) as u32),
|
|
- x.extract(y.extract(9) as u32),
|
|
- x.extract(y.extract(10) as u32),
|
|
- x.extract(y.extract(11) as u32),
|
|
- x.extract(y.extract(12) as u32),
|
|
- x.extract(y.extract(13) as u32),
|
|
- x.extract(y.extract(14) as u32),
|
|
- x.extract(y.extract(15) as u32))
|
|
- }
|
|
- }
|
|
-}
|
|
-struct State {
|
|
- s: [u8; 16],
|
|
- flip_masks: [u8x16; 16],
|
|
- rotate_masks: [u8x16; 16],
|
|
-
|
|
- maxflips: i32,
|
|
- odd: u16,
|
|
- checksum: i32,
|
|
-}
|
|
-impl State {
|
|
- fn new() -> State {
|
|
- State {
|
|
- s: [0; 16],
|
|
- flip_masks: [u8x16::splat(0); 16],
|
|
- rotate_masks: [u8x16::splat(0); 16],
|
|
-
|
|
- maxflips: 0,
|
|
- odd: 0,
|
|
- checksum: 0,
|
|
- }
|
|
- }
|
|
- #[inline(never)]
|
|
- fn rotate_sisd(&mut self, n: usize) {
|
|
- let c = self.s[0];
|
|
- for i in 1..(n + 1) {
|
|
- self.s[i - 1] = self.s[i];
|
|
- }
|
|
- self.s[n] = c;
|
|
- }
|
|
- #[inline(never)]
|
|
- fn popmasks(&mut self) {
|
|
- let mut mask = [0_u8; 16];
|
|
- for i in 0..16 {
|
|
- for j in 0..16 { mask[j] = j as u8; }
|
|
-
|
|
- for x in 0..(i+1)/2 {
|
|
- mask.swap(x, i - x);
|
|
- }
|
|
-
|
|
- self.flip_masks[i] = u8x16::load(&mask, 0);
|
|
-
|
|
- for j in 0..16 { self.s[j] = j as u8; }
|
|
- self.rotate_sisd(i);
|
|
- self.rotate_masks[i] = self.load_s();
|
|
- }
|
|
- }
|
|
- fn rotate(&mut self, n: usize) {
|
|
- shuffle(self.load_s(), self.rotate_masks[n]).store(&mut self.s, 0)
|
|
- }
|
|
-
|
|
- fn load_s(&self) -> u8x16 {
|
|
- u8x16::load(&self.s, 0)
|
|
- }
|
|
-
|
|
-
|
|
- #[inline(never)]
|
|
- fn tk(&mut self, n: usize) {
|
|
- #[derive(Copy, Clone, Debug)]
|
|
- struct Perm {
|
|
- perm: u8x16,
|
|
- start: u8,
|
|
- odd: u16
|
|
- }
|
|
-
|
|
- let mut perms = [Perm { perm: u8x16::splat(0), start: 0 , odd: 0 }; 60];
|
|
-
|
|
- let mut i = 0;
|
|
- let mut c = [0_u8; 16];
|
|
- let mut perm_max = 0;
|
|
-
|
|
- while i < n {
|
|
- while i < n && perm_max < 60 {
|
|
- self.rotate(i);
|
|
- if c[i] as usize >= i {
|
|
- c[i] = 0;
|
|
- i += 1;
|
|
- continue
|
|
- }
|
|
-
|
|
- c[i] += 1;
|
|
- i = 1;
|
|
- self.odd = !self.odd;
|
|
- if self.s[0] != 0 {
|
|
- if self.s[self.s[0] as usize] != 0 {
|
|
- perms[perm_max].perm = self.load_s();
|
|
- perms[perm_max].start = self.s[0];
|
|
- perms[perm_max].odd = self.odd;
|
|
- perm_max += 1;
|
|
- } else {
|
|
- if self.maxflips == 0 { self.maxflips = 1 }
|
|
- self.checksum += if self.odd != 0 { -1 } else { 1 };
|
|
- }
|
|
- }
|
|
- }
|
|
-
|
|
- let mut k = 0;
|
|
- while k < std::cmp::max(1, perm_max) - 1 {
|
|
- let pk = &perms[k];
|
|
- let pk1 = &perms[k + 1];
|
|
- //println!("perm1 {:?}\nperm2 {:?}", pk.perm, pk1.perm);
|
|
- let mut perm1 = pk.perm;
|
|
- let mut perm2 = pk1.perm;
|
|
-
|
|
- let mut f1 = 0;
|
|
- let mut f2 = 0;
|
|
- let mut toterm1 = pk.start;
|
|
- let mut toterm2 = pk1.start;
|
|
-
|
|
- while toterm1 != 0 && toterm2 != 0 {
|
|
- perm1 = shuffle(perm1, self.flip_masks[toterm1 as usize]);
|
|
- perm2 = shuffle(perm2, self.flip_masks[toterm2 as usize]);
|
|
- toterm1 = perm1.extract(0);
|
|
- toterm2 = perm2.extract(0);
|
|
-
|
|
- f1 += 1; f2 += 1;
|
|
- }
|
|
- while toterm1 != 0 {
|
|
- perm1 = shuffle(perm1, self.flip_masks[toterm1 as usize]);
|
|
- toterm1 = perm1.extract(0);
|
|
- f1 += 1;
|
|
- }
|
|
- while toterm2 != 0 {
|
|
- perm2 = shuffle(perm2, self.flip_masks[toterm2 as usize]);
|
|
- toterm2 = perm2.extract(0);
|
|
- f2 += 1;
|
|
- }
|
|
-
|
|
- if f1 > self.maxflips { self.maxflips = f1 }
|
|
- if f2 > self.maxflips { self.maxflips = f2 }
|
|
- self.checksum += if pk.odd != 0 { -f1 } else { f1 };
|
|
- self.checksum += if pk1.odd != 0 { -f2 } else { f2 };
|
|
-
|
|
- k += 2;
|
|
- }
|
|
- while k < perm_max {
|
|
- let pk = &perms[k];
|
|
- let mut perm = pk.perm;
|
|
- let mut f = 0;
|
|
- let mut toterm = pk.start;
|
|
- while toterm != 0 {
|
|
- perm = shuffle(perm, self.flip_masks[toterm as usize]);
|
|
- toterm = perm.extract(0);
|
|
- f += 1;
|
|
- }
|
|
- if f > self.maxflips { self.maxflips = f }
|
|
- self.checksum += if pk.odd != 0 { -f } else { f };
|
|
- k += 1
|
|
- }
|
|
- perm_max = 0;
|
|
- }
|
|
- }
|
|
-}
|
|
-
|
|
-fn main() {
|
|
- let mut state = State::new();
|
|
- state.popmasks();
|
|
-
|
|
- let args = env::args().collect::<Vec<_>>();
|
|
- if args.len() < 2 {
|
|
- println!("usage: {} number", args[0]);
|
|
- process::exit(1)
|
|
- }
|
|
- let max_n = args[1].parse().unwrap();
|
|
- if max_n < 3 || max_n > 15 {
|
|
- println!("range: must be 3 <= n <= 14");
|
|
- process::exit(1);
|
|
- }
|
|
- for i in 0..max_n { state.s[i] = i as u8 }
|
|
- state.tk(max_n);
|
|
-
|
|
- println!("{}\nPfannkuchen({}) = {}", state.checksum, max_n, state.maxflips);
|
|
-}
|
|
diff --git a/third_party/rust/simd/examples/mandelbrot.rs b/third_party/rust/simd/examples/mandelbrot.rs
|
|
deleted file mode 100755
|
|
index c6f1320a0784..000000000000
|
|
--- a/third_party/rust/simd/examples/mandelbrot.rs
|
|
+++ /dev/null
|
|
@@ -1,125 +0,0 @@
|
|
-#![feature(iterator_step_by, test)]
|
|
-
|
|
-extern crate test;
|
|
-extern crate simd;
|
|
-use simd::{f32x4, u32x4};
|
|
-use std::io::prelude::*;
|
|
-
|
|
-#[inline(never)]
|
|
-fn mandelbrot_naive(c_x: f32, c_y: f32, max_iter: u32) -> u32 {
|
|
- let mut x = c_x;
|
|
- let mut y = c_y;
|
|
- let mut count = 0;
|
|
- while count < max_iter {
|
|
- let xy = x * y;
|
|
- let xx = x * x;
|
|
- let yy = y * y;
|
|
- let sum = xx + yy;
|
|
- if sum > 4.0 {
|
|
- break
|
|
- }
|
|
- count += 1;
|
|
- x = xx - yy + c_x;
|
|
- y = xy * 2.0 + c_y;
|
|
- }
|
|
- count
|
|
-}
|
|
-
|
|
-#[inline(never)]
|
|
-fn mandelbrot_vector(c_x: f32x4, c_y: f32x4, max_iter: u32) -> u32x4 {
|
|
- let mut x = c_x;
|
|
- let mut y = c_y;
|
|
-
|
|
- let mut count = u32x4::splat(0);
|
|
- for _ in 0..max_iter as usize {
|
|
- let xy = x * y;
|
|
- let xx = x * x;
|
|
- let yy = y * y;
|
|
- let sum = xx + yy;
|
|
- let mask = sum.lt(f32x4::splat(4.0));
|
|
-
|
|
- if !mask.any() { break }
|
|
- count = count + mask.to_i().select(u32x4::splat(1),
|
|
- u32x4::splat(0));
|
|
-
|
|
- x = xx - yy + c_x;
|
|
- y = xy + xy + c_y;
|
|
- }
|
|
- count
|
|
-}
|
|
-
|
|
-const COLOURS: &'static [(f32, f32, f32)] = &[(0.0, 7.0, 100.0),
|
|
- (32.0, 107.0, 203.0),
|
|
- (237.0, 255.0, 255.0),
|
|
- (255.0, 170.0, 0.0),
|
|
- (0.0, 2.0, 0.0)];
|
|
-const SCALE: f32 = 12.0;
|
|
-const LIMIT: u32 = 100;
|
|
-
|
|
-#[inline(never)]
|
|
-fn output_one(buf: &mut [u8], val: u32) {
|
|
- let (r, g, b);
|
|
- if val == LIMIT {
|
|
- r = 0;
|
|
- g = 0;
|
|
- b = 0;
|
|
- } else {
|
|
- let val = (val as f32 % SCALE) * (COLOURS.len() as f32) / SCALE;
|
|
- let left = val as usize % COLOURS.len();
|
|
- let right = (left + 1) % COLOURS.len();
|
|
-
|
|
- let p = val - left as f32;
|
|
- let (r1, g1, b1) = COLOURS[left];
|
|
- let (r2, g2, b2) = COLOURS[right];
|
|
- r = (r1 + (r2 - r1) * p) as u8;
|
|
- g = (g1 + (g2 - g1) * p) as u8;
|
|
- b = (b1 + (b2 - b1) * p) as u8;
|
|
- }
|
|
- buf[0] = r;
|
|
- buf[1] = g;
|
|
- buf[2] = b;
|
|
-}
|
|
-
|
|
-fn main() {
|
|
- let mut args = std::env::args();
|
|
- args.next();
|
|
- let width = args.next().unwrap().parse().unwrap();
|
|
- let height = args.next().unwrap().parse().unwrap();
|
|
-
|
|
- let left = -2.2;
|
|
- let right = left + 3.0;
|
|
- let top = 1.0;
|
|
- let bottom = top - 2.0;
|
|
-
|
|
- let width_step: f32 = (right - left) / width as f32;
|
|
- let height_step: f32 = (bottom - top) / height as f32;
|
|
-
|
|
- let adjust = f32x4::splat(width_step) * f32x4::new(0., 1., 2., 3.);
|
|
-
|
|
- println!("P6 {} {} 255", width, height);
|
|
- let mut line = vec![0; width * 3];
|
|
-
|
|
- if args.next().is_none() {
|
|
- for i in 0..height {
|
|
- let y = f32x4::splat(top + height_step * i as f32);
|
|
- for j in (0..width).step_by(4) {
|
|
- let x = f32x4::splat(left + width_step * j as f32) + adjust;
|
|
- let ret = mandelbrot_vector(x, y, LIMIT);
|
|
- test::black_box(ret);
|
|
- for k in 0..4 { let val = ret.extract(k as u32); output_one(&mut line[3*(j + k)..3*(j + k + 1)], val); }
|
|
- }
|
|
- ::std::io::stdout().write(&line).unwrap();
|
|
- }
|
|
- } else {
|
|
- for i in 0..height {
|
|
- let y = top + height_step * i as f32;
|
|
- for j in 0..width {
|
|
- let x = left + width_step * j as f32;
|
|
- let val = mandelbrot_naive(x, y, LIMIT);
|
|
- test::black_box(val);
|
|
- output_one(&mut line[3*j..3*(j + 1)], val);
|
|
- }
|
|
- ::std::io::stdout().write(&line).unwrap();
|
|
- }
|
|
- }
|
|
-}
|
|
diff --git a/third_party/rust/simd/examples/matrix-inverse.rs b/third_party/rust/simd/examples/matrix-inverse.rs
|
|
deleted file mode 100644
|
|
index e6eb7ffc4655..000000000000
|
|
--- a/third_party/rust/simd/examples/matrix-inverse.rs
|
|
+++ /dev/null
|
|
@@ -1,281 +0,0 @@
|
|
-extern crate simd;
|
|
-use simd::f32x4;
|
|
-
|
|
-fn mul(x: &[f32x4; 4], y: &[f32x4; 4]) -> [f32x4; 4] {
|
|
- let y0 = y[0];
|
|
- let y1 = y[1];
|
|
- let y2 = y[2];
|
|
- let y3 = y[3];
|
|
- [f32x4::splat(y0.extract(0)) * x[0] +
|
|
- f32x4::splat(y0.extract(1)) * x[1] +
|
|
- f32x4::splat(y0.extract(2)) * x[2] +
|
|
- f32x4::splat(y0.extract(3)) * x[3],
|
|
- f32x4::splat(y1.extract(0)) * x[0] +
|
|
- f32x4::splat(y1.extract(1)) * x[1] +
|
|
- f32x4::splat(y1.extract(2)) * x[2] +
|
|
- f32x4::splat(y1.extract(3)) * x[3],
|
|
- f32x4::splat(y2.extract(0)) * x[0] +
|
|
- f32x4::splat(y2.extract(1)) * x[1] +
|
|
- f32x4::splat(y2.extract(2)) * x[2] +
|
|
- f32x4::splat(y2.extract(3)) * x[3],
|
|
- f32x4::splat(y3.extract(0)) * x[0] +
|
|
- f32x4::splat(y3.extract(1)) * x[1] +
|
|
- f32x4::splat(y3.extract(2)) * x[2] +
|
|
- f32x4::splat(y3.extract(3)) * x[3],
|
|
- ]
|
|
-}
|
|
-
|
|
-#[allow(dead_code)]
|
|
-fn inverse_naive(x: &[[f32; 4]; 4]) -> [[f32; 4]; 4] {
|
|
- let mut t = [[0_f32; 4]; 4];
|
|
- for i in 0..4 {
|
|
- t[0][i] = x[i][0];
|
|
- t[1][i] = x[i][1];
|
|
- t[2][i] = x[i][2];
|
|
- t[3][i] = x[i][3];
|
|
- }
|
|
- println!("{:?}", t);
|
|
-
|
|
- let _0 = t[2][2] * t[3][3];
|
|
- let _1 = t[2][3] * t[3][2];
|
|
- let _2 = t[2][1] * t[3][3];
|
|
- let _3 = t[2][3] * t[3][1];
|
|
- let _4 = t[2][1] * t[3][2];
|
|
- let _5 = t[2][2] * t[3][1];
|
|
- let _6 = t[2][0] * t[3][3];
|
|
- let _7 = t[2][3] * t[3][0];
|
|
- let _8 = t[2][0] * t[3][2];
|
|
- let _9 = t[2][2] * t[3][0];
|
|
- let _10 = t[2][0] * t[3][1];
|
|
- let _11 = t[2][1] * t[3][0];
|
|
- let v = [_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11];
|
|
- println!("{:?}", v);
|
|
-
|
|
- let d00 = _0 * t[1][1] + _3 * t[1][2] + _4 * t[1][3] -
|
|
- (_1 * t[1][1] + _2 * t[1][2] + _5 * t[1][3]);
|
|
- let d01 = _1 * t[1][0] + _6 * t[1][2] + _9 * t[1][3] -
|
|
- (_0 * t[1][0] + _7 * t[1][2] + _8 * t[1][3]);
|
|
- let d02 = _2 * t[1][0] + _7 * t[1][1] + _10 * t[1][3] -
|
|
- (_3 * t[1][0] + _6 * t[1][1] + _11 * t[1][3]);
|
|
- let d03 = _5 * t[1][0] + _8 * t[1][1] + _11 * t[1][2] -
|
|
- (_4 * t[1][0] + _9 * t[1][1] + _10 * t[1][2]);
|
|
- let d10 = _1 * t[0][1] + _2 * t[0][2] + _5 * t[0][3] -
|
|
- (_0 * t[0][1] + _3 * t[0][2] + _4 * t[0][3]);
|
|
- let d11 = _0 * t[0][0] + _7 * t[0][2] + _8 * t[0][3] -
|
|
- (_1 * t[0][0] + _6 * t[0][2] + _9 * t[0][3]);
|
|
- let d12 = _3 * t[0][0] + _6 * t[0][1] + _11 * t[0][3] -
|
|
- (_2 * t[0][0] + _7 * t[0][1] + _10 * t[0][3]);
|
|
- let d13 = _4 * t[0][0] + _9 * t[0][1] + _10 * t[0][2] -
|
|
- (_5 * t[0][0] + _8 * t[0][1] + _11 * t[0][2]);
|
|
-
|
|
- println!("{:?}", [d00, d01, d02, d03, d10, d11, d12, d13]);
|
|
-
|
|
- let _0 = t[0][2] * t[1][3];
|
|
- let _1 = t[0][3] * t[1][2];
|
|
- let _2 = t[0][1] * t[1][3];
|
|
- let _3 = t[0][3] * t[1][1];
|
|
- let _4 = t[0][1] * t[1][2];
|
|
- let _5 = t[0][2] * t[1][1];
|
|
- let _6 = t[0][0] * t[1][3];
|
|
- let _7 = t[0][3] * t[1][0];
|
|
- let _8 = t[0][0] * t[1][2];
|
|
- let _9 = t[0][2] * t[1][0];
|
|
- let _10 = t[0][0] * t[1][1];
|
|
- let _11 = t[0][1] * t[1][0];
|
|
- let v = [_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11];
|
|
- println!("{:?}", v);
|
|
-
|
|
- let d20 = _0*t[3][1] + _3*t[3][2] + _4*t[3][3]-
|
|
- (_1*t[3][1] + _2*t[3][2] + _5*t[3][3]);
|
|
- let d21 = _1*t[3][0] + _6*t[3][2] + _9*t[3][3]-
|
|
- (_0*t[3][0] + _7*t[3][2] + _8*t[3][3]);
|
|
- let d22 = _2*t[3][0] + _7*t[3][1] + _10*t[3][3]-
|
|
- (_3*t[3][0] + _6*t[3][1] + _11*t[3][3]);
|
|
- let d23 = _5*t[3][0] + _8*t[3][1] + _11*t[3][2]-
|
|
- (_4*t[3][0] + _9*t[3][1] + _10*t[3][2]);
|
|
- let d30 = _2*t[2][2] + _5*t[2][3] + _1*t[2][1]-
|
|
- (_4*t[2][3] + _0*t[2][1] + _3*t[2][2]);
|
|
- let d31 = _8*t[2][3] + _0*t[2][0] + _7*t[2][2]-
|
|
- (_6*t[2][2] + _9*t[2][3] + _1*t[2][0]);
|
|
- let d32 = _6*t[2][1] + _11*t[2][3] + _3*t[2][0]-
|
|
- (_10*t[2][3] + _2*t[2][0] + _7*t[2][1]);
|
|
- let d33 = _10*t[2][2] + _4*t[2][0] + _9*t[2][1]-
|
|
- (_8*t[2][1] + _11*t[2][2] + _5*t[2][0]);
|
|
-
|
|
- println!("{:?}", [d20, d21, d22, d23, d30, d31, d32, d33]);
|
|
-
|
|
- let det = t[0][0] * d00 + t[0][1] * d01 + t[0][2] * d02 + t[0][3] * d03;
|
|
-
|
|
- let det = 1.0 / det;
|
|
- let mut ret = [[d00, d01, d02, d03],
|
|
- [d10, d11, d12, d13],
|
|
- [d20, d21, d22, d23],
|
|
- [d30, d31, d32, d33]];
|
|
- for i in 0..4 {
|
|
- for j in 0..4 {
|
|
- ret[i][j] *= det;
|
|
- }
|
|
- }
|
|
- ret
|
|
-}
|
|
-
|
|
-fn inverse_simd4(x: &[f32x4; 4]) -> [f32x4; 4] {
|
|
- let src0 = x[0];
|
|
- let src1 = x[1];
|
|
- let src2 = x[2];
|
|
- let src3 = x[3];
|
|
-
|
|
- let tmp1 = f32x4::new(src0.extract(0), src0.extract(1),
|
|
- src1.extract(4 - 4), src1.extract(5 - 4));
|
|
- let row1 = f32x4::new(src2.extract(0), src2.extract(1),
|
|
- src3.extract(4 - 4), src3.extract(5 - 4));
|
|
- let row0 = f32x4::new(tmp1.extract(0), tmp1.extract(2),
|
|
- row1.extract(4 - 4), row1.extract(6 - 4));
|
|
- let row1 = f32x4::new(row1.extract(1), row1.extract(3),
|
|
- tmp1.extract(5 - 4), tmp1.extract(7 - 4));
|
|
-
|
|
- let tmp1 = f32x4::new(src0.extract(2), src0.extract(3),
|
|
- src1.extract(6 - 4), src1.extract(7 - 4));
|
|
- let row3 = f32x4::new(src2.extract(2), src2.extract(3),
|
|
- src3.extract(6 - 4), src3.extract(7 - 4));
|
|
- let row2 = f32x4::new(tmp1.extract(0), tmp1.extract(2),
|
|
- row3.extract(4 - 4), row3.extract(6 - 4));
|
|
- let row3 = f32x4::new(row3.extract(1), row3.extract(3),
|
|
- tmp1.extract(5 - 4), tmp1.extract(7 - 4));
|
|
-
|
|
-
|
|
- let tmp1 = row2 * row3;
|
|
- let tmp1 = f32x4::new(tmp1.extract(1), tmp1.extract(0),
|
|
- tmp1.extract(3), tmp1.extract(2));
|
|
- let minor0 = row1 * tmp1;
|
|
- let minor1 = row0 * tmp1;
|
|
- let tmp1 = f32x4::new(tmp1.extract(2), tmp1.extract(3),
|
|
- tmp1.extract(0), tmp1.extract(1));
|
|
- let minor0 = (row1 * tmp1) - minor0;
|
|
- let minor1 = (row0 * tmp1) - minor1;
|
|
- let minor1 = f32x4::new(minor1.extract(2), minor1.extract(3),
|
|
- minor1.extract(0), minor1.extract(1));
|
|
- //println!("{:?}", minor1);
|
|
-
|
|
-
|
|
- let tmp1 = row1 * row2;
|
|
- let tmp1 = f32x4::new(tmp1.extract(1), tmp1.extract(0),
|
|
- tmp1.extract(3), tmp1.extract(2));
|
|
- let minor0 = (row3 * tmp1) + minor0;
|
|
- let minor3 = row0 * tmp1;
|
|
- let tmp1 = f32x4::new(tmp1.extract(2), tmp1.extract(3),
|
|
- tmp1.extract(0), tmp1.extract(1));
|
|
-
|
|
- let minor0 = minor0 - row3 * tmp1;
|
|
- let minor3 = row0 * tmp1 - minor3;
|
|
- let minor3 = f32x4::new(minor3.extract(2), minor3.extract(3),
|
|
- minor3.extract(0), minor3.extract(1));
|
|
- //println!("{:?}", minor1);
|
|
-
|
|
-
|
|
- let tmp1 = row3 * f32x4::new(row1.extract(2), row1.extract(3),
|
|
- row1.extract(0), row1.extract(1));
|
|
- let tmp1 = f32x4::new(tmp1.extract(1), tmp1.extract(0),
|
|
- tmp1.extract(3), tmp1.extract(2));
|
|
- let row2 = f32x4::new(row2.extract(2), row2.extract(3),
|
|
- row2.extract(0), row2.extract(1));
|
|
- let minor0 = row2 * tmp1 + minor0;
|
|
- let minor2 = row0 * tmp1;
|
|
- let tmp1 = f32x4::new(tmp1.extract(2), tmp1.extract(3),
|
|
- tmp1.extract(0), tmp1.extract(1));
|
|
- let minor0 = minor0 - row2 * tmp1;
|
|
- let minor2 = row0 * tmp1 - minor2;
|
|
- let minor2 = f32x4::new(minor2.extract(2), minor2.extract(3),
|
|
- minor2.extract(0), minor2.extract(1));
|
|
- //println!("{:?}", minor1);
|
|
-
|
|
-
|
|
- let tmp1 = row0 * row1;
|
|
- let tmp1 = f32x4::new(tmp1.extract(1), tmp1.extract(0),
|
|
- tmp1.extract(3), tmp1.extract(2));
|
|
- let minor2 = minor2 + row3 * tmp1;
|
|
- let minor3 = row2 * tmp1 - minor3;
|
|
- let tmp1 = f32x4::new(tmp1.extract(2), tmp1.extract(3),
|
|
- tmp1.extract(0), tmp1.extract(1));
|
|
- let minor2 = row3 * tmp1 - minor2;
|
|
- let minor3 = minor3 - row2 * tmp1;
|
|
- //println!("{:?}", minor1);
|
|
-
|
|
-
|
|
-
|
|
- let tmp1 = row0 * row3;
|
|
- let tmp1 = f32x4::new(tmp1.extract(1), tmp1.extract(0),
|
|
- tmp1.extract(3), tmp1.extract(2));
|
|
- let minor1 = minor1 - row2 * tmp1;
|
|
- let minor2 = row1 * tmp1 + minor2;
|
|
- let tmp1 = f32x4::new(tmp1.extract(2), tmp1.extract(3),
|
|
- tmp1.extract(0), tmp1.extract(1));
|
|
- let minor1 = row2 * tmp1 + minor1;
|
|
- let minor2 = minor2 - row1 * tmp1;
|
|
- //println!("{:?}", minor1);
|
|
-
|
|
- let tmp1 = row0 * row2;
|
|
- let tmp1 = f32x4::new(tmp1.extract(1), tmp1.extract(0),
|
|
- tmp1.extract(3), tmp1.extract(2));
|
|
- let minor1 = row3 * tmp1 + minor1;
|
|
- let minor3 = minor3 - row1 * tmp1;
|
|
- let tmp1 = f32x4::new(tmp1.extract(2), tmp1.extract(3),
|
|
- tmp1.extract(0), tmp1.extract(1));
|
|
- let minor1 = minor1 - row3 * tmp1;
|
|
- let minor3 = row1 * tmp1 + minor3;
|
|
- //println!("{:?}", minor1);
|
|
-
|
|
- let det = row0 * minor0;
|
|
- let det = f32x4::new(det.extract(2), det.extract(3),
|
|
- det.extract(0), det.extract(1)) + det;
|
|
- let det = f32x4::new(det.extract(1), det.extract(0),
|
|
- det.extract(3), det.extract(2)) + det;
|
|
- let tmp1 = det.approx_reciprocal();
|
|
- let det = tmp1 + tmp1 - det * tmp1 * tmp1;
|
|
-
|
|
-// let det = f32x4::splat(det.extract(0));
|
|
-
|
|
- [minor0 * det, minor1 * det, minor2 * det, minor3 * det]
|
|
-}
|
|
-
|
|
-fn p(x: &[f32x4; 4]) {
|
|
- for xx in x {
|
|
- for i in 0..4 {
|
|
- let v = xx.extract(i);
|
|
- if v == 0.0 {
|
|
- print!("{}{:6.2}", if i > 0 {", "} else {"|"}, "");
|
|
- } else {
|
|
- print!("{}{:6.2}", if i > 0 {", "} else {"|"}, xx.extract(i));
|
|
- }
|
|
- }
|
|
- println!(" |");
|
|
- }
|
|
-}
|
|
-
|
|
-fn main() {
|
|
- let x = [f32x4::new(-100.0, 6.0, 100.0, 1.0),
|
|
- f32x4::new(3.0, 1.0, 0.0, 1.0),
|
|
- f32x4::new(2.0, 1.0, 1.0, 1.0),
|
|
- f32x4::new(-10.0, 1.0, 1.0, 1.0)];
|
|
-
|
|
- /* let mut x_ = [[0.0; 4]; 4];
|
|
- for i in 0..4 {
|
|
- for j in 0..4 {
|
|
- x_[i][j] = x[i].extract(j as u32)
|
|
- }
|
|
- }
|
|
-
|
|
- let ret = inverse_naive(&x_);
|
|
- let mut y = [f32x4::splat(0.0); 4];
|
|
- for i in 0..4 {
|
|
- for j in 0..4 {
|
|
- y[i] = y[i].replace(j as u32, ret[i][j])
|
|
- }
|
|
-}*/
|
|
- let y = inverse_simd4(&x);
|
|
- p(&x);
|
|
- println!("");
|
|
- p(&y);
|
|
- println!("");
|
|
- p(&mul(&x, &y))
|
|
-}
|
|
diff --git a/third_party/rust/simd/examples/nbody-nosimd.rs b/third_party/rust/simd/examples/nbody-nosimd.rs
|
|
deleted file mode 100644
|
|
index d5f1bb422ff2..000000000000
|
|
--- a/third_party/rust/simd/examples/nbody-nosimd.rs
|
|
+++ /dev/null
|
|
@@ -1,156 +0,0 @@
|
|
-// The Computer Language Benchmarks Game
|
|
-// http://benchmarksgame.alioth.debian.org/
|
|
-//
|
|
-// contributed by the Rust Project Developers
|
|
-// contributed by TeXitoi
|
|
-
|
|
-const PI: f64 = 3.141592653589793;
|
|
-const SOLAR_MASS: f64 = 4.0 * PI * PI;
|
|
-const YEAR: f64 = 365.24;
|
|
-const N_BODIES: usize = 5;
|
|
-
|
|
-static BODIES: [Planet;N_BODIES] = [
|
|
- // Sun
|
|
- Planet {
|
|
- x: 0.0, y: 0.0, z: 0.0,
|
|
- vx: 0.0, vy: 0.0, vz: 0.0,
|
|
- mass: SOLAR_MASS,
|
|
- },
|
|
- // Jupiter
|
|
- Planet {
|
|
- x: 4.84143144246472090e+00,
|
|
- y: -1.16032004402742839e+00,
|
|
- z: -1.03622044471123109e-01,
|
|
- vx: 1.66007664274403694e-03 * YEAR,
|
|
- vy: 7.69901118419740425e-03 * YEAR,
|
|
- vz: -6.90460016972063023e-05 * YEAR,
|
|
- mass: 9.54791938424326609e-04 * SOLAR_MASS,
|
|
- },
|
|
- // Saturn
|
|
- Planet {
|
|
- x: 8.34336671824457987e+00,
|
|
- y: 4.12479856412430479e+00,
|
|
- z: -4.03523417114321381e-01,
|
|
- vx: -2.76742510726862411e-03 * YEAR,
|
|
- vy: 4.99852801234917238e-03 * YEAR,
|
|
- vz: 2.30417297573763929e-05 * YEAR,
|
|
- mass: 2.85885980666130812e-04 * SOLAR_MASS,
|
|
- },
|
|
- // Uranus
|
|
- Planet {
|
|
- x: 1.28943695621391310e+01,
|
|
- y: -1.51111514016986312e+01,
|
|
- z: -2.23307578892655734e-01,
|
|
- vx: 2.96460137564761618e-03 * YEAR,
|
|
- vy: 2.37847173959480950e-03 * YEAR,
|
|
- vz: -2.96589568540237556e-05 * YEAR,
|
|
- mass: 4.36624404335156298e-05 * SOLAR_MASS,
|
|
- },
|
|
- // Neptune
|
|
- Planet {
|
|
- x: 1.53796971148509165e+01,
|
|
- y: -2.59193146099879641e+01,
|
|
- z: 1.79258772950371181e-01,
|
|
- vx: 2.68067772490389322e-03 * YEAR,
|
|
- vy: 1.62824170038242295e-03 * YEAR,
|
|
- vz: -9.51592254519715870e-05 * YEAR,
|
|
- mass: 5.15138902046611451e-05 * SOLAR_MASS,
|
|
- },
|
|
-];
|
|
-
|
|
-#[derive(Clone, Copy)]
|
|
-struct Planet {
|
|
- x: f64, y: f64, z: f64,
|
|
- vx: f64, vy: f64, vz: f64,
|
|
- mass: f64,
|
|
-}
|
|
-
|
|
-fn advance(bodies: &mut [Planet;N_BODIES], dt: f64, steps: i32) {
|
|
- for _ in 0..steps {
|
|
- let mut b_slice: &mut [_] = bodies;
|
|
- loop {
|
|
- let bi = match shift_mut_ref(&mut b_slice) {
|
|
- Some(bi) => bi,
|
|
- None => break
|
|
- };
|
|
- for bj in b_slice.iter_mut() {
|
|
- let dx = bi.x - bj.x;
|
|
- let dy = bi.y - bj.y;
|
|
- let dz = bi.z - bj.z;
|
|
-
|
|
- let d2 = dx * dx + dy * dy + dz * dz;
|
|
- let mag = dt / (d2 * d2.sqrt());
|
|
-
|
|
- let massj_mag = bj.mass * mag;
|
|
- bi.vx -= dx * massj_mag;
|
|
- bi.vy -= dy * massj_mag;
|
|
- bi.vz -= dz * massj_mag;
|
|
-
|
|
- let massi_mag = bi.mass * mag;
|
|
- bj.vx += dx * massi_mag;
|
|
- bj.vy += dy * massi_mag;
|
|
- bj.vz += dz * massi_mag;
|
|
- }
|
|
- bi.x += dt * bi.vx;
|
|
- bi.y += dt * bi.vy;
|
|
- bi.z += dt * bi.vz;
|
|
- }
|
|
- }
|
|
-}
|
|
-
|
|
-fn energy(bodies: &[Planet;N_BODIES]) -> f64 {
|
|
- let mut e = 0.0;
|
|
- let mut bodies = bodies.iter();
|
|
- loop {
|
|
- let bi = match bodies.next() {
|
|
- Some(bi) => bi,
|
|
- None => break
|
|
- };
|
|
- e += (bi.vx * bi.vx + bi.vy * bi.vy + bi.vz * bi.vz) * bi.mass / 2.0;
|
|
- for bj in bodies.clone() {
|
|
- let dx = bi.x - bj.x;
|
|
- let dy = bi.y - bj.y;
|
|
- let dz = bi.z - bj.z;
|
|
- let dist = (dx * dx + dy * dy + dz * dz).sqrt();
|
|
- e -= bi.mass * bj.mass / dist;
|
|
- }
|
|
- }
|
|
- e
|
|
-}
|
|
-
|
|
-fn offset_momentum(bodies: &mut [Planet;N_BODIES]) {
|
|
- let mut px = 0.0;
|
|
- let mut py = 0.0;
|
|
- let mut pz = 0.0;
|
|
- for bi in bodies.iter() {
|
|
- px += bi.vx * bi.mass;
|
|
- py += bi.vy * bi.mass;
|
|
- pz += bi.vz * bi.mass;
|
|
- }
|
|
- let sun = &mut bodies[0];
|
|
- sun.vx = - px / SOLAR_MASS;
|
|
- sun.vy = - py / SOLAR_MASS;
|
|
- sun.vz = - pz / SOLAR_MASS;
|
|
-}
|
|
-
|
|
-fn main() {
|
|
- let n = std::env::args().nth(1).expect("need one arg").parse().unwrap();
|
|
- let mut bodies = BODIES;
|
|
-
|
|
- offset_momentum(&mut bodies);
|
|
- println!("{:.9}", energy(&bodies));
|
|
-
|
|
- advance(&mut bodies, 0.01, n);
|
|
-
|
|
- println!("{:.9}", energy(&bodies));
|
|
-}
|
|
-
|
|
-/// Pop a mutable reference off the head of a slice, mutating the slice to no
|
|
-/// longer contain the mutable reference.
|
|
-fn shift_mut_ref<'a, T>(r: &mut &'a mut [T]) -> Option<&'a mut T> {
|
|
- if r.len() == 0 { return None }
|
|
- let tmp = std::mem::replace(r, &mut []);
|
|
- let (h, t) = tmp.split_at_mut(1);
|
|
- *r = t;
|
|
- Some(&mut h[0])
|
|
-}
|
|
diff --git a/third_party/rust/simd/examples/nbody.rs b/third_party/rust/simd/examples/nbody.rs
|
|
deleted file mode 100755
|
|
index d6d4e88e3741..000000000000
|
|
--- a/third_party/rust/simd/examples/nbody.rs
|
|
+++ /dev/null
|
|
@@ -1,170 +0,0 @@
|
|
-#![feature(cfg_target_feature)]
|
|
-
|
|
-extern crate simd;
|
|
-
|
|
-#[cfg(target_feature = "sse2")]
|
|
-use simd::x86::sse2::*;
|
|
-#[cfg(target_arch = "aarch64")]
|
|
-use simd::aarch64::neon::*;
|
|
-
|
|
-const PI: f64 = 3.141592653589793;
|
|
-const SOLAR_MASS: f64 = 4.0 * PI * PI;
|
|
-const DAYS_PER_YEAR: f64 = 365.24;
|
|
-
|
|
-struct Body {
|
|
- x: [f64; 3],
|
|
- _fill: f64,
|
|
- v: [f64; 3],
|
|
- mass: f64,
|
|
-}
|
|
-
|
|
-impl Body {
|
|
- fn new(x0: f64, x1: f64, x2: f64,
|
|
- v0: f64, v1: f64, v2: f64,
|
|
- mass: f64) -> Body {
|
|
- Body {
|
|
- x: [x0, x1, x2],
|
|
- _fill: 0.0,
|
|
- v: [v0, v1, v2],
|
|
- mass: mass,
|
|
- }
|
|
- }
|
|
-}
|
|
-
|
|
-const N_BODIES: usize = 5;
|
|
-const N: usize = N_BODIES * (N_BODIES - 1) / 2;
|
|
-fn offset_momentum(bodies: &mut [Body; N_BODIES]) {
|
|
- let (sun, rest) = bodies.split_at_mut(1);
|
|
- let sun = &mut sun[0];
|
|
- for body in rest {
|
|
- for k in 0..3 {
|
|
- sun.v[k] -= body.v[k] * body.mass / SOLAR_MASS;
|
|
- }
|
|
- }
|
|
-}
|
|
-fn advance(bodies: &mut [Body; N_BODIES], dt: f64) {
|
|
- let mut r = [[0.0; 4]; N];
|
|
- let mut mag = [0.0; N];
|
|
-
|
|
- let mut dx = [f64x2::splat(0.0); 3];
|
|
- let mut dsquared;
|
|
- let mut distance;
|
|
- let mut dmag;
|
|
-
|
|
- let mut i = 0;
|
|
- for j in 0..N_BODIES {
|
|
- for k in j+1..N_BODIES {
|
|
- for m in 0..3 {
|
|
- r[i][m] = bodies[j].x[m] - bodies[k].x[m];
|
|
- }
|
|
- i += 1;
|
|
- }
|
|
- }
|
|
-
|
|
- i = 0;
|
|
- while i < N {
|
|
- for m in 0..3 {
|
|
- dx[m] = f64x2::new(r[i][m], r[i+1][m]);
|
|
- }
|
|
-
|
|
- dsquared = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
|
|
- distance = dsquared.to_f32().approx_rsqrt().to_f64();
|
|
- for _ in 0..2 {
|
|
- distance = distance * f64x2::splat(1.5) -
|
|
- ((f64x2::splat(0.5) * dsquared) * distance) * (distance * distance)
|
|
- }
|
|
- dmag = f64x2::splat(dt) / dsquared * distance;
|
|
- dmag.store(&mut mag, i);
|
|
-
|
|
- i += 2;
|
|
- }
|
|
-
|
|
- i = 0;
|
|
- for j in 0..N_BODIES {
|
|
- for k in j+1..N_BODIES {
|
|
- for m in 0..3 {
|
|
- bodies[j].v[m] -= r[i][m] * bodies[k].mass * mag[i];
|
|
- bodies[k].v[m] += r[i][m] * bodies[j].mass * mag[i];
|
|
- }
|
|
- i += 1
|
|
- }
|
|
- }
|
|
- for body in bodies {
|
|
- for m in 0..3 {
|
|
- body.x[m] += dt * body.v[m]
|
|
- }
|
|
- }
|
|
-}
|
|
-
|
|
-fn energy(bodies: &[Body; N_BODIES]) -> f64 {
|
|
- let mut e = 0.0;
|
|
- for i in 0..N_BODIES {
|
|
- let bi = &bodies[i];
|
|
- e += bi.mass * (bi.v[0] * bi.v[0] + bi.v[1] * bi.v[1] + bi.v[2] * bi.v[2]) / 2.0;
|
|
- for j in i+1..N_BODIES {
|
|
- let bj = &bodies[j];
|
|
- let mut dx = [0.0; 3];
|
|
- for k in 0..3 {
|
|
- dx[k] = bi.x[k] - bj.x[k];
|
|
- }
|
|
- let mut distance = 0.0;
|
|
- for &d in &dx { distance += d * d }
|
|
- e -= bi.mass * bj.mass / distance.sqrt()
|
|
- }
|
|
- }
|
|
- e
|
|
-}
|
|
-
|
|
-fn main() {
|
|
- let mut bodies: [Body; N_BODIES] = [
|
|
- /* sun */
|
|
- Body::new(0.0, 0.0, 0.0,
|
|
- 0.0, 0.0, 0.0,
|
|
- SOLAR_MASS),
|
|
- /* jupiter */
|
|
- Body::new(4.84143144246472090e+00,
|
|
- -1.16032004402742839e+00,
|
|
- -1.03622044471123109e-01 ,
|
|
- 1.66007664274403694e-03 * DAYS_PER_YEAR,
|
|
- 7.69901118419740425e-03 * DAYS_PER_YEAR,
|
|
- -6.90460016972063023e-05 * DAYS_PER_YEAR ,
|
|
- 9.54791938424326609e-04 * SOLAR_MASS
|
|
- ),
|
|
- /* saturn */
|
|
- Body::new(8.34336671824457987e+00,
|
|
- 4.12479856412430479e+00,
|
|
- -4.03523417114321381e-01 ,
|
|
- -2.76742510726862411e-03 * DAYS_PER_YEAR,
|
|
- 4.99852801234917238e-03 * DAYS_PER_YEAR,
|
|
- 2.30417297573763929e-05 * DAYS_PER_YEAR ,
|
|
- 2.85885980666130812e-04 * SOLAR_MASS
|
|
- ),
|
|
- /* uranus */
|
|
- Body::new(1.28943695621391310e+01,
|
|
- -1.51111514016986312e+01,
|
|
- -2.23307578892655734e-01 ,
|
|
- 2.96460137564761618e-03 * DAYS_PER_YEAR,
|
|
- 2.37847173959480950e-03 * DAYS_PER_YEAR,
|
|
- -2.96589568540237556e-05 * DAYS_PER_YEAR ,
|
|
- 4.36624404335156298e-05 * SOLAR_MASS
|
|
- ),
|
|
- /* neptune */
|
|
- Body::new(1.53796971148509165e+01,
|
|
- -2.59193146099879641e+01,
|
|
- 1.79258772950371181e-01 ,
|
|
- 2.68067772490389322e-03 * DAYS_PER_YEAR,
|
|
- 1.62824170038242295e-03 * DAYS_PER_YEAR,
|
|
- -9.51592254519715870e-05 * DAYS_PER_YEAR ,
|
|
- 5.15138902046611451e-05 * SOLAR_MASS
|
|
- )
|
|
- ];
|
|
-
|
|
- let n: usize = std::env::args().nth(1).expect("need one arg").parse().unwrap();
|
|
-
|
|
- offset_momentum(&mut bodies);
|
|
- println!("{:.9}", energy(&bodies));
|
|
- for _ in 0..n {
|
|
- advance(&mut bodies, 0.01);
|
|
- }
|
|
- println!("{:.9}", energy(&bodies));
|
|
-}
|
|
diff --git a/third_party/rust/simd/examples/ops.rs b/third_party/rust/simd/examples/ops.rs
|
|
deleted file mode 100644
|
|
index f8c919101e3c..000000000000
|
|
--- a/third_party/rust/simd/examples/ops.rs
|
|
+++ /dev/null
|
|
@@ -1,10 +0,0 @@
|
|
-extern crate simd;
|
|
-
|
|
-use simd::*;
|
|
-
|
|
-#[allow(unused_variables)]
|
|
-fn main() {
|
|
- let x = i32x4::splat(1_i32);
|
|
- let y = -x;
|
|
- let z = !x;
|
|
-}
|
|
diff --git a/third_party/rust/simd/examples/spectral-norm-nosimd.rs b/third_party/rust/simd/examples/spectral-norm-nosimd.rs
|
|
deleted file mode 100644
|
|
index 919f9c61990f..000000000000
|
|
--- a/third_party/rust/simd/examples/spectral-norm-nosimd.rs
|
|
+++ /dev/null
|
|
@@ -1,106 +0,0 @@
|
|
-// The Computer Language Benchmarks Game
|
|
-// http://benchmarksgame.alioth.debian.org/
|
|
-//
|
|
-// contributed by the Rust Project Developers
|
|
-// contributed by TeXitoi
|
|
-
|
|
-#![allow(non_snake_case)]
|
|
-
|
|
-use std::iter::repeat;
|
|
-//use std::thread;
|
|
-
|
|
-// As std::simd::f64x2 is unstable, we provide a similar interface,
|
|
-// expecting llvm to autovectorize its usage.
|
|
-#[allow(non_camel_case_types)]
|
|
-struct f64x2(f64, f64);
|
|
-impl std::ops::Add for f64x2 {
|
|
- type Output = Self;
|
|
- fn add(self, rhs: Self) -> Self {
|
|
- f64x2(self.0 + rhs.0, self.1 + rhs.1)
|
|
- }
|
|
-}
|
|
-impl std::ops::Div for f64x2 {
|
|
- type Output = Self;
|
|
- fn div(self, rhs: Self) -> Self {
|
|
- f64x2(self.0 / rhs.0, self.1 / rhs.1)
|
|
- }
|
|
-}
|
|
-
|
|
-fn main() {
|
|
- let n: usize = std::env::args().nth(1).expect("need one arg").parse().unwrap();
|
|
- let answer = spectralnorm(n);
|
|
- println!("{:.9}", answer);
|
|
-}
|
|
-
|
|
-fn spectralnorm(n: usize) -> f64 {
|
|
- assert!(n % 2 == 0, "only even lengths are accepted");
|
|
- let mut u = repeat(1.0).take(n).collect::<Vec<_>>();
|
|
- let mut v = u.clone();
|
|
- let mut tmp = v.clone();
|
|
- for _ in 0..10 {
|
|
- mult_AtAv(&u, &mut v, &mut tmp);
|
|
- mult_AtAv(&v, &mut u, &mut tmp);
|
|
- }
|
|
- (dot(&u, &v) / dot(&v, &v)).sqrt()
|
|
-}
|
|
-
|
|
-fn mult_AtAv(v: &[f64], out: &mut [f64], tmp: &mut [f64]) {
|
|
- mult_Av(v, tmp);
|
|
- mult_Atv(tmp, out);
|
|
-}
|
|
-
|
|
-fn mult_Av(v: &[f64], out: &mut [f64]) {
|
|
- parallel(out, |start, out| mult(v, out, start, |i, j| A(i, j)));
|
|
-}
|
|
-
|
|
-fn mult_Atv(v: &[f64], out: &mut [f64]) {
|
|
- parallel(out, |start, out| mult(v, out, start, |i, j| A(j, i)));
|
|
-}
|
|
-
|
|
-fn mult<F>(v: &[f64], out: &mut [f64], start: usize, a: F)
|
|
- where F: Fn(usize, usize) -> f64 {
|
|
- for (i, slot) in out.iter_mut().enumerate().map(|(i, s)| (i + start, s)) {
|
|
- let mut sum = f64x2(0.0, 0.0);
|
|
- for (j, chunk) in v.chunks(2).enumerate().map(|(j, s)| (2 * j, s)) {
|
|
- let top = f64x2(chunk[0], chunk[1]);
|
|
- let bot = f64x2(a(i, j), a(i, j + 1));
|
|
- sum = sum + top / bot;
|
|
- }
|
|
- let f64x2(a, b) = sum;
|
|
- *slot = a + b;
|
|
- }
|
|
-}
|
|
-
|
|
-fn A(i: usize, j: usize) -> f64 {
|
|
- ((i + j) * (i + j + 1) / 2 + i + 1) as f64
|
|
-}
|
|
-
|
|
-fn dot(v: &[f64], u: &[f64]) -> f64 {
|
|
- v.iter().zip(u.iter()).map(|(a, b)| *a * *b).fold(0., |acc, i| acc + i)
|
|
-}
|
|
-
|
|
-//struct Racy<T>(T);
|
|
-//unsafe impl<T: 'static> Send for Racy<T> {}
|
|
-
|
|
-// Executes a closure in parallel over the given mutable slice. The closure `f`
|
|
-// is run in parallel and yielded the starting index within `v` as well as a
|
|
-// sub-slice of `v`.
|
|
-fn parallel<'a, T, F>(v: &mut [T], ref f: F)
|
|
- where T: 'static + Send + Sync,
|
|
-F: Fn(usize, &mut [T]) + Sync
|
|
-{
|
|
- f(0, v);
|
|
- /*let size = v.len() / 4 + 1;
|
|
- let jhs = v.chunks_mut(size).enumerate().map(|(i, chunk)| {
|
|
- // Need to convert `f` and `chunk` to something that can cross the task
|
|
- // boundary.
|
|
- let f = Racy(f as *const F as *const usize);
|
|
- let raw = Racy((&mut chunk[0] as *mut T, chunk.len()));
|
|
- thread::spawn(move|| {
|
|
- let f = f.0 as *const F;
|
|
- let raw = raw.0;
|
|
- unsafe { (*f)(i * size, std::slice::from_raw_parts_mut(raw.0, raw.1)) }
|
|
- })
|
|
- }).collect::<Vec<_>>();
|
|
- for jh in jhs { jh.join().unwrap(); }*/
|
|
-}
|
|
diff --git a/third_party/rust/simd/examples/spectral-norm.rs b/third_party/rust/simd/examples/spectral-norm.rs
|
|
deleted file mode 100755
|
|
index 656f52e4fad0..000000000000
|
|
--- a/third_party/rust/simd/examples/spectral-norm.rs
|
|
+++ /dev/null
|
|
@@ -1,74 +0,0 @@
|
|
-#![feature(cfg_target_feature)]
|
|
-#![allow(non_snake_case)]
|
|
-
|
|
-extern crate simd;
|
|
-
|
|
-#[cfg(target_feature = "sse2")]
|
|
-use simd::x86::sse2::f64x2;
|
|
-#[cfg(target_arch = "aarch64")]
|
|
-use simd::aarch64::neon::f64x2;
|
|
-
|
|
-fn A(i: usize, j: usize) -> f64 {
|
|
- ((i + j) * (i + j + 1) / 2 + i + 1) as f64
|
|
-}
|
|
-
|
|
-fn dot(x: &[f64], y: &[f64]) -> f64 {
|
|
- x.iter().zip(y).map(|(&x, &y)| x * y).fold(0.0, |a, b| a + b)
|
|
-}
|
|
-
|
|
-fn mult_Av(v: &[f64], out: &mut [f64]) {
|
|
- assert!(v.len() == out.len());
|
|
- assert!(v.len() % 2 == 0);
|
|
-
|
|
- for i in 0..v.len() {
|
|
- let mut sum = f64x2::splat(0.0);
|
|
-
|
|
- let mut j = 0;
|
|
- while j < v.len() {
|
|
- let b = f64x2::load(v, j);
|
|
- let a = f64x2::new(A(i, j), A(i, j + 1));
|
|
- sum = sum + b / a;
|
|
- j += 2
|
|
- }
|
|
- out[i] = sum.extract(0) + sum.extract(1);
|
|
- }
|
|
-}
|
|
-
|
|
-fn mult_Atv(v: &[f64], out: &mut [f64]) {
|
|
- assert!(v.len() == out.len());
|
|
- assert!(v.len() % 2 == 0);
|
|
-
|
|
- for i in 0..v.len() {
|
|
- let mut sum = f64x2::splat(0.0);
|
|
-
|
|
- let mut j = 0;
|
|
- while j < v.len() {
|
|
- let b = f64x2::load(v, j);
|
|
- let a = f64x2::new(A(j, i), A(j + 1, i));
|
|
- sum = sum + b / a;
|
|
- j += 2
|
|
- }
|
|
- out[i] = sum.extract(0) + sum.extract(1);
|
|
- }
|
|
-}
|
|
-
|
|
-fn mult_AtAv(v: &[f64], out: &mut [f64], tmp: &mut [f64]) {
|
|
- mult_Av(v, tmp);
|
|
- mult_Atv(tmp, out);
|
|
-}
|
|
-
|
|
-fn main() {
|
|
- let mut n: usize = std::env::args().nth(1).expect("need one arg").parse().unwrap();
|
|
- if n % 2 == 1 { n += 1 }
|
|
-
|
|
- let mut u = vec![1.0; n];
|
|
- let mut v = u.clone();
|
|
- let mut tmp = u.clone();
|
|
-
|
|
- for _ in 0..10 {
|
|
- mult_AtAv(&u, &mut v, &mut tmp);
|
|
- mult_AtAv(&v, &mut u, &mut tmp);
|
|
- }
|
|
-
|
|
- println!("{:.9}", (dot(&u, &v) / dot(&v, &v)).sqrt());
|
|
-}
|
|
diff --git a/third_party/rust/simd/src/aarch64/mod.rs b/third_party/rust/simd/src/aarch64/mod.rs
|
|
deleted file mode 100644
|
|
index 5ba0a302b4d1..000000000000
|
|
--- a/third_party/rust/simd/src/aarch64/mod.rs
|
|
+++ /dev/null
|
|
@@ -1,3 +0,0 @@
|
|
-//! Features specific to AArch64 CPUs.
|
|
-
|
|
-pub mod neon;
|
|
diff --git a/third_party/rust/simd/src/aarch64/neon.rs b/third_party/rust/simd/src/aarch64/neon.rs
|
|
deleted file mode 100644
|
|
index 0cca05a52788..000000000000
|
|
--- a/third_party/rust/simd/src/aarch64/neon.rs
|
|
+++ /dev/null
|
|
@@ -1,681 +0,0 @@
|
|
-use super::super::*;
|
|
-use {simd_cast, f32x2};
|
|
-
|
|
-pub use sixty_four::{f64x2, i64x2, u64x2, bool64ix2, bool64fx2};
|
|
-#[repr(simd)]
|
|
-#[derive(Copy, Clone)]
|
|
-pub struct u32x2(u32, u32);
|
|
-#[repr(simd)]
|
|
-#[derive(Copy, Clone)]
|
|
-pub struct i32x2(i32, i32);
|
|
-
|
|
-#[repr(simd)]
|
|
-#[derive(Copy, Clone)]
|
|
-pub struct u16x4(u16, u16, u16, u16);
|
|
-#[repr(simd)]
|
|
-#[derive(Copy, Clone)]
|
|
-pub struct i16x4(i16, i16, i16, i16);
|
|
-#[repr(simd)]
|
|
-#[derive(Copy, Clone)]
|
|
-pub struct u8x8(u8, u8, u8, u8,
|
|
- u8, u8, u8, u8);
|
|
-#[repr(simd)]
|
|
-#[derive(Copy, Clone)]
|
|
-pub struct i8x8(i8, i8, i8, i8,
|
|
- i8, i8, i8, i8);
|
|
-
|
|
-#[repr(simd)]
|
|
-#[derive(Copy, Clone)]
|
|
-pub struct i64x1(i64);
|
|
-#[repr(simd)]
|
|
-#[derive(Copy, Clone)]
|
|
-pub struct u64x1(u64);
|
|
-#[repr(simd)]
|
|
-#[derive(Copy, Clone)]
|
|
-pub struct f64x1(f64);
|
|
-
|
|
-#[allow(dead_code)]
|
|
-extern "platform-intrinsic" {
|
|
- fn aarch64_vhadd_s8(x: i8x8, y: i8x8) -> i8x8;
|
|
- fn aarch64_vhadd_u8(x: u8x8, y: u8x8) -> u8x8;
|
|
- fn aarch64_vhadd_s16(x: i16x4, y: i16x4) -> i16x4;
|
|
- fn aarch64_vhadd_u16(x: u16x4, y: u16x4) -> u16x4;
|
|
- fn aarch64_vhadd_s32(x: i32x2, y: i32x2) -> i32x2;
|
|
- fn aarch64_vhadd_u32(x: u32x2, y: u32x2) -> u32x2;
|
|
- fn aarch64_vhaddq_s8(x: i8x16, y: i8x16) -> i8x16;
|
|
- fn aarch64_vhaddq_u8(x: u8x16, y: u8x16) -> u8x16;
|
|
- fn aarch64_vhaddq_s16(x: i16x8, y: i16x8) -> i16x8;
|
|
- fn aarch64_vhaddq_u16(x: u16x8, y: u16x8) -> u16x8;
|
|
- fn aarch64_vhaddq_s32(x: i32x4, y: i32x4) -> i32x4;
|
|
- fn aarch64_vhaddq_u32(x: u32x4, y: u32x4) -> u32x4;
|
|
- fn aarch64_vrhadd_s8(x: i8x8, y: i8x8) -> i8x8;
|
|
- fn aarch64_vrhadd_u8(x: u8x8, y: u8x8) -> u8x8;
|
|
- fn aarch64_vrhadd_s16(x: i16x4, y: i16x4) -> i16x4;
|
|
- fn aarch64_vrhadd_u16(x: u16x4, y: u16x4) -> u16x4;
|
|
- fn aarch64_vrhadd_s32(x: i32x2, y: i32x2) -> i32x2;
|
|
- fn aarch64_vrhadd_u32(x: u32x2, y: u32x2) -> u32x2;
|
|
- fn aarch64_vrhaddq_s8(x: i8x16, y: i8x16) -> i8x16;
|
|
- fn aarch64_vrhaddq_u8(x: u8x16, y: u8x16) -> u8x16;
|
|
- fn aarch64_vrhaddq_s16(x: i16x8, y: i16x8) -> i16x8;
|
|
- fn aarch64_vrhaddq_u16(x: u16x8, y: u16x8) -> u16x8;
|
|
- fn aarch64_vrhaddq_s32(x: i32x4, y: i32x4) -> i32x4;
|
|
- fn aarch64_vrhaddq_u32(x: u32x4, y: u32x4) -> u32x4;
|
|
- fn aarch64_vqadd_s8(x: i8x8, y: i8x8) -> i8x8;
|
|
- fn aarch64_vqadd_u8(x: u8x8, y: u8x8) -> u8x8;
|
|
- fn aarch64_vqadd_s16(x: i16x4, y: i16x4) -> i16x4;
|
|
- fn aarch64_vqadd_u16(x: u16x4, y: u16x4) -> u16x4;
|
|
- fn aarch64_vqadd_s32(x: i32x2, y: i32x2) -> i32x2;
|
|
- fn aarch64_vqadd_u32(x: u32x2, y: u32x2) -> u32x2;
|
|
- fn aarch64_vqadd_s64(x: i64x1, y: i64x1) -> i64x1;
|
|
- fn aarch64_vqadd_u64(x: u64x1, y: u64x1) -> u64x1;
|
|
- fn aarch64_vqaddq_s8(x: i8x16, y: i8x16) -> i8x16;
|
|
- fn aarch64_vqaddq_u8(x: u8x16, y: u8x16) -> u8x16;
|
|
- fn aarch64_vqaddq_s16(x: i16x8, y: i16x8) -> i16x8;
|
|
- fn aarch64_vqaddq_u16(x: u16x8, y: u16x8) -> u16x8;
|
|
- fn aarch64_vqaddq_s32(x: i32x4, y: i32x4) -> i32x4;
|
|
- fn aarch64_vqaddq_u32(x: u32x4, y: u32x4) -> u32x4;
|
|
- fn aarch64_vqaddq_s64(x: i64x2, y: i64x2) -> i64x2;
|
|
- fn aarch64_vqaddq_u64(x: u64x2, y: u64x2) -> u64x2;
|
|
- fn aarch64_vuqadd_s8(x: i8x16, y: u8x16) -> i8x16;
|
|
- fn aarch64_vuqadd_s16(x: i16x8, y: u16x8) -> i16x8;
|
|
- fn aarch64_vuqadd_s32(x: i32x4, y: u32x4) -> i32x4;
|
|
- fn aarch64_vuqadd_s64(x: i64x2, y: u64x2) -> i64x2;
|
|
- fn aarch64_vsqadd_u8(x: u8x16, y: i8x16) -> u8x16;
|
|
- fn aarch64_vsqadd_u16(x: u16x8, y: i16x8) -> u16x8;
|
|
- fn aarch64_vsqadd_u32(x: u32x4, y: i32x4) -> u32x4;
|
|
- fn aarch64_vsqadd_u64(x: u64x2, y: i64x2) -> u64x2;
|
|
- fn aarch64_vraddhn_s16(x: i16x8, y: i16x8) -> i8x8;
|
|
- fn aarch64_vraddhn_u16(x: u16x8, y: u16x8) -> u8x8;
|
|
- fn aarch64_vraddhn_s32(x: i32x4, y: i32x4) -> i16x4;
|
|
- fn aarch64_vraddhn_u32(x: u32x4, y: u32x4) -> u16x4;
|
|
- fn aarch64_vraddhn_s64(x: i64x2, y: i64x2) -> i32x2;
|
|
- fn aarch64_vraddhn_u64(x: u64x2, y: u64x2) -> u32x2;
|
|
- fn aarch64_vfmulx_f32(x: f32x2, y: f32x2) -> f32x2;
|
|
- fn aarch64_vfmulx_f64(x: f64x1, y: f64x1) -> f64x1;
|
|
- fn aarch64_vfmulxq_f32(x: f32x4, y: f32x4) -> f32x4;
|
|
- fn aarch64_vfmulxq_f64(x: f64x2, y: f64x2) -> f64x2;
|
|
- fn aarch64_vfma_f32(x: f32x2, y: f32x2) -> f32x2;
|
|
- fn aarch64_vfma_f64(x: f64x1, y: f64x1) -> f64x1;
|
|
- fn aarch64_vfmaq_f32(x: f32x4, y: f32x4) -> f32x4;
|
|
- fn aarch64_vfmaq_f64(x: f64x2, y: f64x2) -> f64x2;
|
|
- fn aarch64_vqdmulh_s16(x: i16x4, y: i16x4) -> i16x4;
|
|
- fn aarch64_vqdmulh_s32(x: i32x2, y: i32x2) -> i32x2;
|
|
- fn aarch64_vqdmulhq_s16(x: i16x8, y: i16x8) -> i16x8;
|
|
- fn aarch64_vqdmulhq_s32(x: i32x4, y: i32x4) -> i32x4;
|
|
- fn aarch64_vqrdmulh_s16(x: i16x4, y: i16x4) -> i16x4;
|
|
- fn aarch64_vqrdmulh_s32(x: i32x2, y: i32x2) -> i32x2;
|
|
- fn aarch64_vqrdmulhq_s16(x: i16x8, y: i16x8) -> i16x8;
|
|
- fn aarch64_vqrdmulhq_s32(x: i32x4, y: i32x4) -> i32x4;
|
|
- fn aarch64_vmull_s8(x: i8x8, y: i8x8) -> i16x8;
|
|
- fn aarch64_vmull_u8(x: u8x8, y: u8x8) -> u16x8;
|
|
- fn aarch64_vmull_s16(x: i16x4, y: i16x4) -> i32x4;
|
|
- fn aarch64_vmull_u16(x: u16x4, y: u16x4) -> u32x4;
|
|
- fn aarch64_vmull_s32(x: i32x2, y: i32x2) -> i64x2;
|
|
- fn aarch64_vmull_u32(x: u32x2, y: u32x2) -> u64x2;
|
|
- fn aarch64_vqdmullq_s8(x: i8x8, y: i8x8) -> i16x8;
|
|
- fn aarch64_vqdmullq_s16(x: i16x4, y: i16x4) -> i32x4;
|
|
- fn aarch64_vhsub_s8(x: i8x8, y: i8x8) -> i8x8;
|
|
- fn aarch64_vhsub_u8(x: u8x8, y: u8x8) -> u8x8;
|
|
- fn aarch64_vhsub_s16(x: i16x4, y: i16x4) -> i16x4;
|
|
- fn aarch64_vhsub_u16(x: u16x4, y: u16x4) -> u16x4;
|
|
- fn aarch64_vhsub_s32(x: i32x2, y: i32x2) -> i32x2;
|
|
- fn aarch64_vhsub_u32(x: u32x2, y: u32x2) -> u32x2;
|
|
- fn aarch64_vhsubq_s8(x: i8x16, y: i8x16) -> i8x16;
|
|
- fn aarch64_vhsubq_u8(x: u8x16, y: u8x16) -> u8x16;
|
|
- fn aarch64_vhsubq_s16(x: i16x8, y: i16x8) -> i16x8;
|
|
- fn aarch64_vhsubq_u16(x: u16x8, y: u16x8) -> u16x8;
|
|
- fn aarch64_vhsubq_s32(x: i32x4, y: i32x4) -> i32x4;
|
|
- fn aarch64_vhsubq_u32(x: u32x4, y: u32x4) -> u32x4;
|
|
- fn aarch64_vqsub_s8(x: i8x8, y: i8x8) -> i8x8;
|
|
- fn aarch64_vqsub_u8(x: u8x8, y: u8x8) -> u8x8;
|
|
- fn aarch64_vqsub_s16(x: i16x4, y: i16x4) -> i16x4;
|
|
- fn aarch64_vqsub_u16(x: u16x4, y: u16x4) -> u16x4;
|
|
- fn aarch64_vqsub_s32(x: i32x2, y: i32x2) -> i32x2;
|
|
- fn aarch64_vqsub_u32(x: u32x2, y: u32x2) -> u32x2;
|
|
- fn aarch64_vqsub_s64(x: i64x1, y: i64x1) -> i64x1;
|
|
- fn aarch64_vqsub_u64(x: u64x1, y: u64x1) -> u64x1;
|
|
- fn aarch64_vqsubq_s8(x: i8x16, y: i8x16) -> i8x16;
|
|
- fn aarch64_vqsubq_u8(x: u8x16, y: u8x16) -> u8x16;
|
|
- fn aarch64_vqsubq_s16(x: i16x8, y: i16x8) -> i16x8;
|
|
- fn aarch64_vqsubq_u16(x: u16x8, y: u16x8) -> u16x8;
|
|
- fn aarch64_vqsubq_s32(x: i32x4, y: i32x4) -> i32x4;
|
|
- fn aarch64_vqsubq_u32(x: u32x4, y: u32x4) -> u32x4;
|
|
- fn aarch64_vqsubq_s64(x: i64x2, y: i64x2) -> i64x2;
|
|
- fn aarch64_vqsubq_u64(x: u64x2, y: u64x2) -> u64x2;
|
|
- fn aarch64_vrsubhn_s16(x: i16x8, y: i16x8) -> i8x8;
|
|
- fn aarch64_vrsubhn_u16(x: u16x8, y: u16x8) -> u8x8;
|
|
- fn aarch64_vrsubhn_s32(x: i32x4, y: i32x4) -> i16x4;
|
|
- fn aarch64_vrsubhn_u32(x: u32x4, y: u32x4) -> u16x4;
|
|
- fn aarch64_vrsubhn_s64(x: i64x2, y: i64x2) -> i32x2;
|
|
- fn aarch64_vrsubhn_u64(x: u64x2, y: u64x2) -> u32x2;
|
|
- fn aarch64_vabd_s8(x: i8x8, y: i8x8) -> i8x8;
|
|
- fn aarch64_vabd_u8(x: u8x8, y: u8x8) -> u8x8;
|
|
- fn aarch64_vabd_s16(x: i16x4, y: i16x4) -> i16x4;
|
|
- fn aarch64_vabd_u16(x: u16x4, y: u16x4) -> u16x4;
|
|
- fn aarch64_vabd_s32(x: i32x2, y: i32x2) -> i32x2;
|
|
- fn aarch64_vabd_u32(x: u32x2, y: u32x2) -> u32x2;
|
|
- fn aarch64_vabd_f32(x: f32x2, y: f32x2) -> f32x2;
|
|
- fn aarch64_vabd_f64(x: f64x1, y: f64x1) -> f64x1;
|
|
- fn aarch64_vabdq_s8(x: i8x16, y: i8x16) -> i8x16;
|
|
- fn aarch64_vabdq_u8(x: u8x16, y: u8x16) -> u8x16;
|
|
- fn aarch64_vabdq_s16(x: i16x8, y: i16x8) -> i16x8;
|
|
- fn aarch64_vabdq_u16(x: u16x8, y: u16x8) -> u16x8;
|
|
- fn aarch64_vabdq_s32(x: i32x4, y: i32x4) -> i32x4;
|
|
- fn aarch64_vabdq_u32(x: u32x4, y: u32x4) -> u32x4;
|
|
- fn aarch64_vabdq_f32(x: f32x4, y: f32x4) -> f32x4;
|
|
- fn aarch64_vabdq_f64(x: f64x2, y: f64x2) -> f64x2;
|
|
- fn aarch64_vmax_s8(x: i8x8, y: i8x8) -> i8x8;
|
|
- fn aarch64_vmax_u8(x: u8x8, y: u8x8) -> u8x8;
|
|
- fn aarch64_vmax_s16(x: i16x4, y: i16x4) -> i16x4;
|
|
- fn aarch64_vmax_u16(x: u16x4, y: u16x4) -> u16x4;
|
|
- fn aarch64_vmax_s32(x: i32x2, y: i32x2) -> i32x2;
|
|
- fn aarch64_vmax_u32(x: u32x2, y: u32x2) -> u32x2;
|
|
- fn aarch64_vmax_f32(x: f32x2, y: f32x2) -> f32x2;
|
|
- fn aarch64_vmax_f64(x: f64x1, y: f64x1) -> f64x1;
|
|
- fn aarch64_vmaxq_s8(x: i8x16, y: i8x16) -> i8x16;
|
|
- fn aarch64_vmaxq_u8(x: u8x16, y: u8x16) -> u8x16;
|
|
- fn aarch64_vmaxq_s16(x: i16x8, y: i16x8) -> i16x8;
|
|
- fn aarch64_vmaxq_u16(x: u16x8, y: u16x8) -> u16x8;
|
|
- fn aarch64_vmaxq_s32(x: i32x4, y: i32x4) -> i32x4;
|
|
- fn aarch64_vmaxq_u32(x: u32x4, y: u32x4) -> u32x4;
|
|
- fn aarch64_vmaxq_f32(x: f32x4, y: f32x4) -> f32x4;
|
|
- fn aarch64_vmaxq_f64(x: f64x2, y: f64x2) -> f64x2;
|
|
- fn aarch64_vmin_s8(x: i8x8, y: i8x8) -> i8x8;
|
|
- fn aarch64_vmin_u8(x: u8x8, y: u8x8) -> u8x8;
|
|
- fn aarch64_vmin_s16(x: i16x4, y: i16x4) -> i16x4;
|
|
- fn aarch64_vmin_u16(x: u16x4, y: u16x4) -> u16x4;
|
|
- fn aarch64_vmin_s32(x: i32x2, y: i32x2) -> i32x2;
|
|
- fn aarch64_vmin_u32(x: u32x2, y: u32x2) -> u32x2;
|
|
- fn aarch64_vmin_f32(x: f32x2, y: f32x2) -> f32x2;
|
|
- fn aarch64_vmin_f64(x: f64x1, y: f64x1) -> f64x1;
|
|
- fn aarch64_vminq_s8(x: i8x16, y: i8x16) -> i8x16;
|
|
- fn aarch64_vminq_u8(x: u8x16, y: u8x16) -> u8x16;
|
|
- fn aarch64_vminq_s16(x: i16x8, y: i16x8) -> i16x8;
|
|
- fn aarch64_vminq_u16(x: u16x8, y: u16x8) -> u16x8;
|
|
- fn aarch64_vminq_s32(x: i32x4, y: i32x4) -> i32x4;
|
|
- fn aarch64_vminq_u32(x: u32x4, y: u32x4) -> u32x4;
|
|
- fn aarch64_vminq_f32(x: f32x4, y: f32x4) -> f32x4;
|
|
- fn aarch64_vminq_f64(x: f64x2, y: f64x2) -> f64x2;
|
|
- fn aarch64_vmaxnm_f32(x: f32x2, y: f32x2) -> f32x2;
|
|
- fn aarch64_vmaxnm_f64(x: f64x1, y: f64x1) -> f64x1;
|
|
- fn aarch64_vmaxnmq_f32(x: f32x4, y: f32x4) -> f32x4;
|
|
- fn aarch64_vmaxnmq_f64(x: f64x2, y: f64x2) -> f64x2;
|
|
- fn aarch64_vminnm_f32(x: f32x2, y: f32x2) -> f32x2;
|
|
- fn aarch64_vminnm_f64(x: f64x1, y: f64x1) -> f64x1;
|
|
- fn aarch64_vminnmq_f32(x: f32x4, y: f32x4) -> f32x4;
|
|
- fn aarch64_vminnmq_f64(x: f64x2, y: f64x2) -> f64x2;
|
|
- fn aarch64_vshl_s8(x: i8x8, y: i8x8) -> i8x8;
|
|
- fn aarch64_vshl_u8(x: u8x8, y: i8x8) -> u8x8;
|
|
- fn aarch64_vshl_s16(x: i16x4, y: i16x4) -> i16x4;
|
|
- fn aarch64_vshl_u16(x: u16x4, y: i16x4) -> u16x4;
|
|
- fn aarch64_vshl_s32(x: i32x2, y: i32x2) -> i32x2;
|
|
- fn aarch64_vshl_u32(x: u32x2, y: i32x2) -> u32x2;
|
|
- fn aarch64_vshl_s64(x: i64x1, y: i64x1) -> i64x1;
|
|
- fn aarch64_vshl_u64(x: u64x1, y: i64x1) -> u64x1;
|
|
- fn aarch64_vshlq_s8(x: i8x16, y: i8x16) -> i8x16;
|
|
- fn aarch64_vshlq_u8(x: u8x16, y: i8x16) -> u8x16;
|
|
- fn aarch64_vshlq_s16(x: i16x8, y: i16x8) -> i16x8;
|
|
- fn aarch64_vshlq_u16(x: u16x8, y: i16x8) -> u16x8;
|
|
- fn aarch64_vshlq_s32(x: i32x4, y: i32x4) -> i32x4;
|
|
- fn aarch64_vshlq_u32(x: u32x4, y: i32x4) -> u32x4;
|
|
- fn aarch64_vshlq_s64(x: i64x2, y: i64x2) -> i64x2;
|
|
- fn aarch64_vshlq_u64(x: u64x2, y: i64x2) -> u64x2;
|
|
- fn aarch64_vqshl_s8(x: i8x8, y: i8x8) -> i8x8;
|
|
- fn aarch64_vqshl_u8(x: u8x8, y: i8x8) -> u8x8;
|
|
- fn aarch64_vqshl_s16(x: i16x4, y: i16x4) -> i16x4;
|
|
- fn aarch64_vqshl_u16(x: u16x4, y: i16x4) -> u16x4;
|
|
- fn aarch64_vqshl_s32(x: i32x2, y: i32x2) -> i32x2;
|
|
- fn aarch64_vqshl_u32(x: u32x2, y: i32x2) -> u32x2;
|
|
- fn aarch64_vqshl_s64(x: i64x1, y: i64x1) -> i64x1;
|
|
- fn aarch64_vqshl_u64(x: u64x1, y: i64x1) -> u64x1;
|
|
- fn aarch64_vqshlq_s8(x: i8x16, y: i8x16) -> i8x16;
|
|
- fn aarch64_vqshlq_u8(x: u8x16, y: i8x16) -> u8x16;
|
|
- fn aarch64_vqshlq_s16(x: i16x8, y: i16x8) -> i16x8;
|
|
- fn aarch64_vqshlq_u16(x: u16x8, y: i16x8) -> u16x8;
|
|
- fn aarch64_vqshlq_s32(x: i32x4, y: i32x4) -> i32x4;
|
|
- fn aarch64_vqshlq_u32(x: u32x4, y: i32x4) -> u32x4;
|
|
- fn aarch64_vqshlq_s64(x: i64x2, y: i64x2) -> i64x2;
|
|
- fn aarch64_vqshlq_u64(x: u64x2, y: i64x2) -> u64x2;
|
|
- fn aarch64_vrshl_s8(x: i8x8, y: i8x8) -> i8x8;
|
|
- fn aarch64_vrshl_u8(x: u8x8, y: i8x8) -> u8x8;
|
|
- fn aarch64_vrshl_s16(x: i16x4, y: i16x4) -> i16x4;
|
|
- fn aarch64_vrshl_u16(x: u16x4, y: i16x4) -> u16x4;
|
|
- fn aarch64_vrshl_s32(x: i32x2, y: i32x2) -> i32x2;
|
|
- fn aarch64_vrshl_u32(x: u32x2, y: i32x2) -> u32x2;
|
|
- fn aarch64_vrshl_s64(x: i64x1, y: i64x1) -> i64x1;
|
|
- fn aarch64_vrshl_u64(x: u64x1, y: i64x1) -> u64x1;
|
|
- fn aarch64_vrshlq_s8(x: i8x16, y: i8x16) -> i8x16;
|
|
- fn aarch64_vrshlq_u8(x: u8x16, y: i8x16) -> u8x16;
|
|
- fn aarch64_vrshlq_s16(x: i16x8, y: i16x8) -> i16x8;
|
|
- fn aarch64_vrshlq_u16(x: u16x8, y: i16x8) -> u16x8;
|
|
- fn aarch64_vrshlq_s32(x: i32x4, y: i32x4) -> i32x4;
|
|
- fn aarch64_vrshlq_u32(x: u32x4, y: i32x4) -> u32x4;
|
|
- fn aarch64_vrshlq_s64(x: i64x2, y: i64x2) -> i64x2;
|
|
- fn aarch64_vrshlq_u64(x: u64x2, y: i64x2) -> u64x2;
|
|
- fn aarch64_vqrshl_s8(x: i8x8, y: i8x8) -> i8x8;
|
|
- fn aarch64_vqrshl_u8(x: u8x8, y: i8x8) -> u8x8;
|
|
- fn aarch64_vqrshl_s16(x: i16x4, y: i16x4) -> i16x4;
|
|
- fn aarch64_vqrshl_u16(x: u16x4, y: i16x4) -> u16x4;
|
|
- fn aarch64_vqrshl_s32(x: i32x2, y: i32x2) -> i32x2;
|
|
- fn aarch64_vqrshl_u32(x: u32x2, y: i32x2) -> u32x2;
|
|
- fn aarch64_vqrshl_s64(x: i64x1, y: i64x1) -> i64x1;
|
|
- fn aarch64_vqrshl_u64(x: u64x1, y: i64x1) -> u64x1;
|
|
- fn aarch64_vqrshlq_s8(x: i8x16, y: i8x16) -> i8x16;
|
|
- fn aarch64_vqrshlq_u8(x: u8x16, y: i8x16) -> u8x16;
|
|
- fn aarch64_vqrshlq_s16(x: i16x8, y: i16x8) -> i16x8;
|
|
- fn aarch64_vqrshlq_u16(x: u16x8, y: i16x8) -> u16x8;
|
|
- fn aarch64_vqrshlq_s32(x: i32x4, y: i32x4) -> i32x4;
|
|
- fn aarch64_vqrshlq_u32(x: u32x4, y: i32x4) -> u32x4;
|
|
- fn aarch64_vqrshlq_s64(x: i64x2, y: i64x2) -> i64x2;
|
|
- fn aarch64_vqrshlq_u64(x: u64x2, y: i64x2) -> u64x2;
|
|
- fn aarch64_vqshrun_n_s16(x: i16x8, y: u32) -> i8x8;
|
|
- fn aarch64_vqshrun_n_s32(x: i32x4, y: u32) -> i16x4;
|
|
- fn aarch64_vqshrun_n_s64(x: i64x2, y: u32) -> i32x2;
|
|
- fn aarch64_vqrshrun_n_s16(x: i16x8, y: u32) -> i8x8;
|
|
- fn aarch64_vqrshrun_n_s32(x: i32x4, y: u32) -> i16x4;
|
|
- fn aarch64_vqrshrun_n_s64(x: i64x2, y: u32) -> i32x2;
|
|
- fn aarch64_vqshrn_n_s16(x: i16x8, y: u32) -> i8x8;
|
|
- fn aarch64_vqshrn_n_u16(x: u16x8, y: u32) -> u8x8;
|
|
- fn aarch64_vqshrn_n_s32(x: i32x4, y: u32) -> i16x4;
|
|
- fn aarch64_vqshrn_n_u32(x: u32x4, y: u32) -> u16x4;
|
|
- fn aarch64_vqshrn_n_s64(x: i64x2, y: u32) -> i32x2;
|
|
- fn aarch64_vqshrn_n_u64(x: u64x2, y: u32) -> u32x2;
|
|
- fn aarch64_vrshrn_n_s16(x: i16x8, y: u32) -> i8x8;
|
|
- fn aarch64_vrshrn_n_u16(x: u16x8, y: u32) -> u8x8;
|
|
- fn aarch64_vrshrn_n_s32(x: i32x4, y: u32) -> i16x4;
|
|
- fn aarch64_vrshrn_n_u32(x: u32x4, y: u32) -> u16x4;
|
|
- fn aarch64_vrshrn_n_s64(x: i64x2, y: u32) -> i32x2;
|
|
- fn aarch64_vrshrn_n_u64(x: u64x2, y: u32) -> u32x2;
|
|
- fn aarch64_vqrshrn_n_s16(x: i16x8, y: u32) -> i8x8;
|
|
- fn aarch64_vqrshrn_n_u16(x: u16x8, y: u32) -> u8x8;
|
|
- fn aarch64_vqrshrn_n_s32(x: i32x4, y: u32) -> i16x4;
|
|
- fn aarch64_vqrshrn_n_u32(x: u32x4, y: u32) -> u16x4;
|
|
- fn aarch64_vqrshrn_n_s64(x: i64x2, y: u32) -> i32x2;
|
|
- fn aarch64_vqrshrn_n_u64(x: u64x2, y: u32) -> u32x2;
|
|
- fn aarch64_vsri_s8(x: i8x8, y: i8x8) -> i8x8;
|
|
- fn aarch64_vsri_u8(x: u8x8, y: u8x8) -> u8x8;
|
|
- fn aarch64_vsri_s16(x: i16x4, y: i16x4) -> i16x4;
|
|
- fn aarch64_vsri_u16(x: u16x4, y: u16x4) -> u16x4;
|
|
- fn aarch64_vsri_s32(x: i32x2, y: i32x2) -> i32x2;
|
|
- fn aarch64_vsri_u32(x: u32x2, y: u32x2) -> u32x2;
|
|
- fn aarch64_vsri_s64(x: i64x1, y: i64x1) -> i64x1;
|
|
- fn aarch64_vsri_u64(x: u64x1, y: u64x1) -> u64x1;
|
|
- fn aarch64_vsriq_s8(x: i8x16, y: i8x16) -> i8x16;
|
|
- fn aarch64_vsriq_u8(x: u8x16, y: u8x16) -> u8x16;
|
|
- fn aarch64_vsriq_s16(x: i16x8, y: i16x8) -> i16x8;
|
|
- fn aarch64_vsriq_u16(x: u16x8, y: u16x8) -> u16x8;
|
|
- fn aarch64_vsriq_s32(x: i32x4, y: i32x4) -> i32x4;
|
|
- fn aarch64_vsriq_u32(x: u32x4, y: u32x4) -> u32x4;
|
|
- fn aarch64_vsriq_s64(x: i64x2, y: i64x2) -> i64x2;
|
|
- fn aarch64_vsriq_u64(x: u64x2, y: u64x2) -> u64x2;
|
|
- fn aarch64_vsli_s8(x: i8x8, y: i8x8) -> i8x8;
|
|
- fn aarch64_vsli_u8(x: u8x8, y: u8x8) -> u8x8;
|
|
- fn aarch64_vsli_s16(x: i16x4, y: i16x4) -> i16x4;
|
|
- fn aarch64_vsli_u16(x: u16x4, y: u16x4) -> u16x4;
|
|
- fn aarch64_vsli_s32(x: i32x2, y: i32x2) -> i32x2;
|
|
- fn aarch64_vsli_u32(x: u32x2, y: u32x2) -> u32x2;
|
|
- fn aarch64_vsli_s64(x: i64x1, y: i64x1) -> i64x1;
|
|
- fn aarch64_vsli_u64(x: u64x1, y: u64x1) -> u64x1;
|
|
- fn aarch64_vsliq_s8(x: i8x16, y: i8x16) -> i8x16;
|
|
- fn aarch64_vsliq_u8(x: u8x16, y: u8x16) -> u8x16;
|
|
- fn aarch64_vsliq_s16(x: i16x8, y: i16x8) -> i16x8;
|
|
- fn aarch64_vsliq_u16(x: u16x8, y: u16x8) -> u16x8;
|
|
- fn aarch64_vsliq_s32(x: i32x4, y: i32x4) -> i32x4;
|
|
- fn aarch64_vsliq_u32(x: u32x4, y: u32x4) -> u32x4;
|
|
- fn aarch64_vsliq_s64(x: i64x2, y: i64x2) -> i64x2;
|
|
- fn aarch64_vsliq_u64(x: u64x2, y: u64x2) -> u64x2;
|
|
- fn aarch64_vvqmovn_s16(x: i16x8) -> i8x8;
|
|
- fn aarch64_vvqmovn_u16(x: u16x8) -> u8x8;
|
|
- fn aarch64_vvqmovn_s32(x: i32x4) -> i16x4;
|
|
- fn aarch64_vvqmovn_u32(x: u32x4) -> u16x4;
|
|
- fn aarch64_vvqmovn_s64(x: i64x2) -> i32x2;
|
|
- fn aarch64_vvqmovn_u64(x: u64x2) -> u32x2;
|
|
- fn aarch64_vabs_s8(x: i8x8) -> i8x8;
|
|
- fn aarch64_vabs_s16(x: i16x4) -> i16x4;
|
|
- fn aarch64_vabs_s32(x: i32x2) -> i32x2;
|
|
- fn aarch64_vabs_s64(x: i64x1) -> i64x1;
|
|
- fn aarch64_vabsq_s8(x: i8x16) -> i8x16;
|
|
- fn aarch64_vabsq_s16(x: i16x8) -> i16x8;
|
|
- fn aarch64_vabsq_s32(x: i32x4) -> i32x4;
|
|
- fn aarch64_vabsq_s64(x: i64x2) -> i64x2;
|
|
- fn aarch64_vabs_f32(x: f32x2) -> f32x2;
|
|
- fn aarch64_vabs_f64(x: f64x1) -> f64x1;
|
|
- fn aarch64_vabsq_f32(x: f32x4) -> f32x4;
|
|
- fn aarch64_vabsq_f64(x: f64x2) -> f64x2;
|
|
- fn aarch64_vqabs_s8(x: i8x8) -> i8x8;
|
|
- fn aarch64_vqabs_s16(x: i16x4) -> i16x4;
|
|
- fn aarch64_vqabs_s32(x: i32x2) -> i32x2;
|
|
- fn aarch64_vqabs_s64(x: i64x1) -> i64x1;
|
|
- fn aarch64_vqabsq_s8(x: i8x16) -> i8x16;
|
|
- fn aarch64_vqabsq_s16(x: i16x8) -> i16x8;
|
|
- fn aarch64_vqabsq_s32(x: i32x4) -> i32x4;
|
|
- fn aarch64_vqabsq_s64(x: i64x2) -> i64x2;
|
|
- fn aarch64_vqneg_s8(x: i8x8) -> i8x8;
|
|
- fn aarch64_vqneg_s16(x: i16x4) -> i16x4;
|
|
- fn aarch64_vqneg_s32(x: i32x2) -> i32x2;
|
|
- fn aarch64_vqneg_s64(x: i64x1) -> i64x1;
|
|
- fn aarch64_vqnegq_s8(x: i8x16) -> i8x16;
|
|
- fn aarch64_vqnegq_s16(x: i16x8) -> i16x8;
|
|
- fn aarch64_vqnegq_s32(x: i32x4) -> i32x4;
|
|
- fn aarch64_vqnegq_s64(x: i64x2) -> i64x2;
|
|
- fn aarch64_vclz_s8(x: i8x8) -> i8x8;
|
|
- fn aarch64_vclz_u8(x: u8x8) -> u8x8;
|
|
- fn aarch64_vclz_s16(x: i16x4) -> i16x4;
|
|
- fn aarch64_vclz_u16(x: u16x4) -> u16x4;
|
|
- fn aarch64_vclz_s32(x: i32x2) -> i32x2;
|
|
- fn aarch64_vclz_u32(x: u32x2) -> u32x2;
|
|
- fn aarch64_vclzq_s8(x: i8x16) -> i8x16;
|
|
- fn aarch64_vclzq_u8(x: u8x16) -> u8x16;
|
|
- fn aarch64_vclzq_s16(x: i16x8) -> i16x8;
|
|
- fn aarch64_vclzq_u16(x: u16x8) -> u16x8;
|
|
- fn aarch64_vclzq_s32(x: i32x4) -> i32x4;
|
|
- fn aarch64_vclzq_u32(x: u32x4) -> u32x4;
|
|
- fn aarch64_vcls_s8(x: i8x8) -> i8x8;
|
|
- fn aarch64_vcls_u8(x: u8x8) -> u8x8;
|
|
- fn aarch64_vcls_s16(x: i16x4) -> i16x4;
|
|
- fn aarch64_vcls_u16(x: u16x4) -> u16x4;
|
|
- fn aarch64_vcls_s32(x: i32x2) -> i32x2;
|
|
- fn aarch64_vcls_u32(x: u32x2) -> u32x2;
|
|
- fn aarch64_vclsq_s8(x: i8x16) -> i8x16;
|
|
- fn aarch64_vclsq_u8(x: u8x16) -> u8x16;
|
|
- fn aarch64_vclsq_s16(x: i16x8) -> i16x8;
|
|
- fn aarch64_vclsq_u16(x: u16x8) -> u16x8;
|
|
- fn aarch64_vclsq_s32(x: i32x4) -> i32x4;
|
|
- fn aarch64_vclsq_u32(x: u32x4) -> u32x4;
|
|
- fn aarch64_vcnt_s8(x: i8x8) -> i8x8;
|
|
- fn aarch64_vcnt_u8(x: u8x8) -> u8x8;
|
|
- fn aarch64_vcntq_s8(x: i8x16) -> i8x16;
|
|
- fn aarch64_vcntq_u8(x: u8x16) -> u8x16;
|
|
- fn aarch64_vrecpe_u32(x: u32x2) -> u32x2;
|
|
- fn aarch64_vrecpe_f32(x: f32x2) -> f32x2;
|
|
- fn aarch64_vrecpe_f64(x: f64x1) -> f64x1;
|
|
- fn aarch64_vrecpeq_u32(x: u32x4) -> u32x4;
|
|
- fn aarch64_vrecpeq_f32(x: f32x4) -> f32x4;
|
|
- fn aarch64_vrecpeq_f64(x: f64x2) -> f64x2;
|
|
- fn aarch64_vrecps_f32(x: f32x2, y: f32x2) -> f32x2;
|
|
- fn aarch64_vrecps_f64(x: f64x1, y: f64x1) -> f64x1;
|
|
- fn aarch64_vrecpsq_f32(x: f32x4, y: f32x4) -> f32x4;
|
|
- fn aarch64_vrecpsq_f64(x: f64x2, y: f64x2) -> f64x2;
|
|
- fn aarch64_vsqrt_f32(x: f32x2) -> f32x2;
|
|
- fn aarch64_vsqrt_f64(x: f64x1) -> f64x1;
|
|
- fn aarch64_vsqrtq_f32(x: f32x4) -> f32x4;
|
|
- fn aarch64_vsqrtq_f64(x: f64x2) -> f64x2;
|
|
- fn aarch64_vrsqrte_u32(x: u32x2) -> u32x2;
|
|
- fn aarch64_vrsqrte_f32(x: f32x2) -> f32x2;
|
|
- fn aarch64_vrsqrte_f64(x: f64x1) -> f64x1;
|
|
- fn aarch64_vrsqrteq_u32(x: u32x4) -> u32x4;
|
|
- fn aarch64_vrsqrteq_f32(x: f32x4) -> f32x4;
|
|
- fn aarch64_vrsqrteq_f64(x: f64x2) -> f64x2;
|
|
- fn aarch64_vrsqrts_f32(x: f32x2, y: f32x2) -> f32x2;
|
|
- fn aarch64_vrsqrts_f64(x: f64x1, y: f64x1) -> f64x1;
|
|
- fn aarch64_vrsqrtsq_f32(x: f32x4, y: f32x4) -> f32x4;
|
|
- fn aarch64_vrsqrtsq_f64(x: f64x2, y: f64x2) -> f64x2;
|
|
- fn aarch64_vrbit_s8(x: i8x8) -> i8x8;
|
|
- fn aarch64_vrbit_u8(x: u8x8) -> u8x8;
|
|
- fn aarch64_vrbitq_s8(x: i8x16) -> i8x16;
|
|
- fn aarch64_vrbitq_u8(x: u8x16) -> u8x16;
|
|
- fn aarch64_vpadd_s8(x: i8x8, y: i8x8) -> i8x8;
|
|
- fn aarch64_vpadd_u8(x: u8x8, y: u8x8) -> u8x8;
|
|
- fn aarch64_vpadd_s16(x: i16x4, y: i16x4) -> i16x4;
|
|
- fn aarch64_vpadd_u16(x: u16x4, y: u16x4) -> u16x4;
|
|
- fn aarch64_vpadd_s32(x: i32x2, y: i32x2) -> i32x2;
|
|
- fn aarch64_vpadd_u32(x: u32x2, y: u32x2) -> u32x2;
|
|
- fn aarch64_vpadd_f32(x: f32x2, y: f32x2) -> f32x2;
|
|
- fn aarch64_vpaddq_s8(x: i8x16, y: i8x16) -> i8x16;
|
|
- fn aarch64_vpaddq_u8(x: u8x16, y: u8x16) -> u8x16;
|
|
- fn aarch64_vpaddq_s16(x: i16x8, y: i16x8) -> i16x8;
|
|
- fn aarch64_vpaddq_u16(x: u16x8, y: u16x8) -> u16x8;
|
|
- fn aarch64_vpaddq_s32(x: i32x4, y: i32x4) -> i32x4;
|
|
- fn aarch64_vpaddq_u32(x: u32x4, y: u32x4) -> u32x4;
|
|
- fn aarch64_vpaddq_f32(x: f32x4, y: f32x4) -> f32x4;
|
|
- fn aarch64_vpaddq_s64(x: i64x2, y: i64x2) -> i64x2;
|
|
- fn aarch64_vpaddq_u64(x: u64x2, y: u64x2) -> u64x2;
|
|
- fn aarch64_vpaddq_f64(x: f64x2, y: f64x2) -> f64x2;
|
|
- fn aarch64_vpaddl_s16(x: i8x8) -> i16x4;
|
|
- fn aarch64_vpaddl_u16(x: u8x8) -> u16x4;
|
|
- fn aarch64_vpaddl_s32(x: i16x4) -> i32x2;
|
|
- fn aarch64_vpaddl_u32(x: u16x4) -> u32x2;
|
|
- fn aarch64_vpaddl_s64(x: i32x2) -> i64x1;
|
|
- fn aarch64_vpaddl_u64(x: u32x2) -> u64x1;
|
|
- fn aarch64_vpaddlq_s16(x: i8x16) -> i16x8;
|
|
- fn aarch64_vpaddlq_u16(x: u8x16) -> u16x8;
|
|
- fn aarch64_vpaddlq_s32(x: i16x8) -> i32x4;
|
|
- fn aarch64_vpaddlq_u32(x: u16x8) -> u32x4;
|
|
- fn aarch64_vpaddlq_s64(x: i32x4) -> i64x2;
|
|
- fn aarch64_vpaddlq_u64(x: u32x4) -> u64x2;
|
|
- fn aarch64_vpmax_s8(x: i8x8, y: i8x8) -> i8x8;
|
|
- fn aarch64_vpmax_u8(x: u8x8, y: u8x8) -> u8x8;
|
|
- fn aarch64_vpmax_s16(x: i16x4, y: i16x4) -> i16x4;
|
|
- fn aarch64_vpmax_u16(x: u16x4, y: u16x4) -> u16x4;
|
|
- fn aarch64_vpmax_s32(x: i32x2, y: i32x2) -> i32x2;
|
|
- fn aarch64_vpmax_u32(x: u32x2, y: u32x2) -> u32x2;
|
|
- fn aarch64_vpmax_f32(x: f32x2, y: f32x2) -> f32x2;
|
|
- fn aarch64_vpmaxq_s8(x: i8x16, y: i8x16) -> i8x16;
|
|
- fn aarch64_vpmaxq_u8(x: u8x16, y: u8x16) -> u8x16;
|
|
- fn aarch64_vpmaxq_s16(x: i16x8, y: i16x8) -> i16x8;
|
|
- fn aarch64_vpmaxq_u16(x: u16x8, y: u16x8) -> u16x8;
|
|
- fn aarch64_vpmaxq_s32(x: i32x4, y: i32x4) -> i32x4;
|
|
- fn aarch64_vpmaxq_u32(x: u32x4, y: u32x4) -> u32x4;
|
|
- fn aarch64_vpmaxq_f32(x: f32x4, y: f32x4) -> f32x4;
|
|
- fn aarch64_vpmaxq_s64(x: i64x2, y: i64x2) -> i64x2;
|
|
- fn aarch64_vpmaxq_u64(x: u64x2, y: u64x2) -> u64x2;
|
|
- fn aarch64_vpmaxq_f64(x: f64x2, y: f64x2) -> f64x2;
|
|
- fn aarch64_vpmin_s8(x: i8x8, y: i8x8) -> i8x8;
|
|
- fn aarch64_vpmin_u8(x: u8x8, y: u8x8) -> u8x8;
|
|
- fn aarch64_vpmin_s16(x: i16x4, y: i16x4) -> i16x4;
|
|
- fn aarch64_vpmin_u16(x: u16x4, y: u16x4) -> u16x4;
|
|
- fn aarch64_vpmin_s32(x: i32x2, y: i32x2) -> i32x2;
|
|
- fn aarch64_vpmin_u32(x: u32x2, y: u32x2) -> u32x2;
|
|
- fn aarch64_vpmin_f32(x: f32x2, y: f32x2) -> f32x2;
|
|
- fn aarch64_vpminq_s8(x: i8x16, y: i8x16) -> i8x16;
|
|
- fn aarch64_vpminq_u8(x: u8x16, y: u8x16) -> u8x16;
|
|
- fn aarch64_vpminq_s16(x: i16x8, y: i16x8) -> i16x8;
|
|
- fn aarch64_vpminq_u16(x: u16x8, y: u16x8) -> u16x8;
|
|
- fn aarch64_vpminq_s32(x: i32x4, y: i32x4) -> i32x4;
|
|
- fn aarch64_vpminq_u32(x: u32x4, y: u32x4) -> u32x4;
|
|
- fn aarch64_vpminq_f32(x: f32x4, y: f32x4) -> f32x4;
|
|
- fn aarch64_vpminq_s64(x: i64x2, y: i64x2) -> i64x2;
|
|
- fn aarch64_vpminq_u64(x: u64x2, y: u64x2) -> u64x2;
|
|
- fn aarch64_vpminq_f64(x: f64x2, y: f64x2) -> f64x2;
|
|
- fn aarch64_vpmaxnm_s8(x: i8x8, y: i8x8) -> i8x8;
|
|
- fn aarch64_vpmaxnm_u8(x: u8x8, y: u8x8) -> u8x8;
|
|
- fn aarch64_vpmaxnm_s16(x: i16x4, y: i16x4) -> i16x4;
|
|
- fn aarch64_vpmaxnm_u16(x: u16x4, y: u16x4) -> u16x4;
|
|
- fn aarch64_vpmaxnm_s32(x: i32x2, y: i32x2) -> i32x2;
|
|
- fn aarch64_vpmaxnm_u32(x: u32x2, y: u32x2) -> u32x2;
|
|
- fn aarch64_vpmaxnm_f32(x: f32x2, y: f32x2) -> f32x2;
|
|
- fn aarch64_vpmaxnmq_s8(x: i8x16, y: i8x16) -> i8x16;
|
|
- fn aarch64_vpmaxnmq_u8(x: u8x16, y: u8x16) -> u8x16;
|
|
- fn aarch64_vpmaxnmq_s16(x: i16x8, y: i16x8) -> i16x8;
|
|
- fn aarch64_vpmaxnmq_u16(x: u16x8, y: u16x8) -> u16x8;
|
|
- fn aarch64_vpmaxnmq_s32(x: i32x4, y: i32x4) -> i32x4;
|
|
- fn aarch64_vpmaxnmq_u32(x: u32x4, y: u32x4) -> u32x4;
|
|
- fn aarch64_vpmaxnmq_f32(x: f32x4, y: f32x4) -> f32x4;
|
|
- fn aarch64_vpmaxnmq_s64(x: i64x2, y: i64x2) -> i64x2;
|
|
- fn aarch64_vpmaxnmq_u64(x: u64x2, y: u64x2) -> u64x2;
|
|
- fn aarch64_vpmaxnmq_f64(x: f64x2, y: f64x2) -> f64x2;
|
|
- fn aarch64_vpminnm_f32(x: f32x2, y: f32x2) -> f32x2;
|
|
- fn aarch64_vpminnmq_f32(x: f32x4, y: f32x4) -> f32x4;
|
|
- fn aarch64_vpminnmq_f64(x: f64x2, y: f64x2) -> f64x2;
|
|
- fn aarch64_vaddv_s8(x: i8x8) -> i8;
|
|
- fn aarch64_vaddv_u8(x: u8x8) -> u8;
|
|
- fn aarch64_vaddv_s16(x: i16x4) -> i16;
|
|
- fn aarch64_vaddv_u16(x: u16x4) -> u16;
|
|
- fn aarch64_vaddv_s32(x: i32x2) -> i32;
|
|
- fn aarch64_vaddv_u32(x: u32x2) -> u32;
|
|
- fn aarch64_vaddv_f32(x: f32x2) -> f32;
|
|
- fn aarch64_vaddvq_s8(x: i8x16) -> i8;
|
|
- fn aarch64_vaddvq_u8(x: u8x16) -> u8;
|
|
- fn aarch64_vaddvq_s16(x: i16x8) -> i16;
|
|
- fn aarch64_vaddvq_u16(x: u16x8) -> u16;
|
|
- fn aarch64_vaddvq_s32(x: i32x4) -> i32;
|
|
- fn aarch64_vaddvq_u32(x: u32x4) -> u32;
|
|
- fn aarch64_vaddvq_f32(x: f32x4) -> f32;
|
|
- fn aarch64_vaddvq_s64(x: i64x2) -> i64;
|
|
- fn aarch64_vaddvq_u64(x: u64x2) -> u64;
|
|
- fn aarch64_vaddvq_f64(x: f64x2) -> f64;
|
|
- fn aarch64_vaddlv_s8(x: i8x8) -> i16;
|
|
- fn aarch64_vaddlv_u8(x: u8x8) -> u16;
|
|
- fn aarch64_vaddlv_s16(x: i16x4) -> i32;
|
|
- fn aarch64_vaddlv_u16(x: u16x4) -> u32;
|
|
- fn aarch64_vaddlv_s32(x: i32x2) -> i64;
|
|
- fn aarch64_vaddlv_u32(x: u32x2) -> u64;
|
|
- fn aarch64_vaddlvq_s8(x: i8x16) -> i16;
|
|
- fn aarch64_vaddlvq_u8(x: u8x16) -> u16;
|
|
- fn aarch64_vaddlvq_s16(x: i16x8) -> i32;
|
|
- fn aarch64_vaddlvq_u16(x: u16x8) -> u32;
|
|
- fn aarch64_vaddlvq_s32(x: i32x4) -> i64;
|
|
- fn aarch64_vaddlvq_u32(x: u32x4) -> u64;
|
|
- fn aarch64_vmaxv_s8(x: i8x8) -> i8;
|
|
- fn aarch64_vmaxv_u8(x: u8x8) -> u8;
|
|
- fn aarch64_vmaxv_s16(x: i16x4) -> i16;
|
|
- fn aarch64_vmaxv_u16(x: u16x4) -> u16;
|
|
- fn aarch64_vmaxv_s32(x: i32x2) -> i32;
|
|
- fn aarch64_vmaxv_u32(x: u32x2) -> u32;
|
|
- fn aarch64_vmaxv_f32(x: f32x2) -> f32;
|
|
- fn aarch64_vmaxvq_s8(x: i8x16) -> i8;
|
|
- fn aarch64_vmaxvq_u8(x: u8x16) -> u8;
|
|
- fn aarch64_vmaxvq_s16(x: i16x8) -> i16;
|
|
- fn aarch64_vmaxvq_u16(x: u16x8) -> u16;
|
|
- fn aarch64_vmaxvq_s32(x: i32x4) -> i32;
|
|
- fn aarch64_vmaxvq_u32(x: u32x4) -> u32;
|
|
- fn aarch64_vmaxvq_f32(x: f32x4) -> f32;
|
|
- fn aarch64_vmaxvq_f64(x: f64x2) -> f64;
|
|
- fn aarch64_vminv_s8(x: i8x8) -> i8;
|
|
- fn aarch64_vminv_u8(x: u8x8) -> u8;
|
|
- fn aarch64_vminv_s16(x: i16x4) -> i16;
|
|
- fn aarch64_vminv_u16(x: u16x4) -> u16;
|
|
- fn aarch64_vminv_s32(x: i32x2) -> i32;
|
|
- fn aarch64_vminv_u32(x: u32x2) -> u32;
|
|
- fn aarch64_vminv_f32(x: f32x2) -> f32;
|
|
- fn aarch64_vminvq_s8(x: i8x16) -> i8;
|
|
- fn aarch64_vminvq_u8(x: u8x16) -> u8;
|
|
- fn aarch64_vminvq_s16(x: i16x8) -> i16;
|
|
- fn aarch64_vminvq_u16(x: u16x8) -> u16;
|
|
- fn aarch64_vminvq_s32(x: i32x4) -> i32;
|
|
- fn aarch64_vminvq_u32(x: u32x4) -> u32;
|
|
- fn aarch64_vminvq_f32(x: f32x4) -> f32;
|
|
- fn aarch64_vminvq_f64(x: f64x2) -> f64;
|
|
- fn aarch64_vmaxnmv_f32(x: f32x2) -> f32;
|
|
- fn aarch64_vmaxnmvq_f32(x: f32x4) -> f32;
|
|
- fn aarch64_vmaxnmvq_f64(x: f64x2) -> f64;
|
|
- fn aarch64_vminnmv_f32(x: f32x2) -> f32;
|
|
- fn aarch64_vminnmvq_f32(x: f32x4) -> f32;
|
|
- fn aarch64_vminnmvq_f64(x: f64x2) -> f64;
|
|
- fn aarch64_vqtbl1_s8(x: i8x16, y: u8x8) -> i8x8;
|
|
- fn aarch64_vqtbl1_u8(x: u8x16, y: u8x8) -> u8x8;
|
|
- fn aarch64_vqtbl1q_s8(x: i8x16, y: u8x16) -> i8x16;
|
|
- fn aarch64_vqtbl1q_u8(x: u8x16, y: u8x16) -> u8x16;
|
|
- fn aarch64_vqtbx1_s8(x: i8x8, y: i8x16, z: u8x8) -> i8x8;
|
|
- fn aarch64_vqtbx1_u8(x: u8x8, y: u8x16, z: u8x8) -> u8x8;
|
|
- fn aarch64_vqtbx1q_s8(x: i8x16, y: i8x16, z: u8x16) -> i8x16;
|
|
- fn aarch64_vqtbx1q_u8(x: u8x16, y: u8x16, z: u8x16) -> u8x16;
|
|
- fn aarch64_vqtbl2_s8(x: (i8x16, i8x16), y: u8x8) -> i8x8;
|
|
- fn aarch64_vqtbl2_u8(x: (u8x16, u8x16), y: u8x8) -> u8x8;
|
|
- fn aarch64_vqtbl2q_s8(x: (i8x16, i8x16), y: u8x16) -> i8x16;
|
|
- fn aarch64_vqtbl2q_u8(x: (u8x16, u8x16), y: u8x16) -> u8x16;
|
|
- fn aarch64_vqtbx2_s8(x: (i8x16, i8x16), y: u8x8) -> i8x8;
|
|
- fn aarch64_vqtbx2_u8(x: (u8x16, u8x16), y: u8x8) -> u8x8;
|
|
- fn aarch64_vqtbx2q_s8(x: (i8x16, i8x16), y: u8x16) -> i8x16;
|
|
- fn aarch64_vqtbx2q_u8(x: (u8x16, u8x16), y: u8x16) -> u8x16;
|
|
- fn aarch64_vqtbl3_s8(x: (i8x16, i8x16, i8x16), y: u8x8) -> i8x8;
|
|
- fn aarch64_vqtbl3_u8(x: (u8x16, u8x16, u8x16), y: u8x8) -> u8x8;
|
|
- fn aarch64_vqtbl3q_s8(x: (i8x16, i8x16, i8x16), y: u8x16) -> i8x16;
|
|
- fn aarch64_vqtbl3q_u8(x: (u8x16, u8x16, u8x16), y: u8x16) -> u8x16;
|
|
- fn aarch64_vqtbx3_s8(x: i8x8, y: (i8x16, i8x16, i8x16), z: u8x8) -> i8x8;
|
|
- fn aarch64_vqtbx3_u8(x: u8x8, y: (u8x16, u8x16, u8x16), z: u8x8) -> u8x8;
|
|
- fn aarch64_vqtbx3q_s8(x: i8x16, y: (i8x16, i8x16, i8x16), z: u8x16) -> i8x16;
|
|
- fn aarch64_vqtbx3q_u8(x: u8x16, y: (u8x16, u8x16, u8x16), z: u8x16) -> u8x16;
|
|
- fn aarch64_vqtbl4_s8(x: (i8x16, i8x16, i8x16, i8x16), y: u8x8) -> i8x8;
|
|
- fn aarch64_vqtbl4_u8(x: (u8x16, u8x16, u8x16, u8x16), y: u8x8) -> u8x8;
|
|
- fn aarch64_vqtbl4q_s8(x: (i8x16, i8x16, i8x16, i8x16), y: u8x16) -> i8x16;
|
|
- fn aarch64_vqtbl4q_u8(x: (u8x16, u8x16, u8x16, u8x16), y: u8x16) -> u8x16;
|
|
- fn aarch64_vqtbx4_s8(x: i8x8, y: (i8x16, i8x16, i8x16, i8x16), z: u8x8) -> i8x8;
|
|
- fn aarch64_vqtbx4_u8(x: u8x8, y: (u8x16, u8x16, u8x16, u8x16), z: u8x8) -> u8x8;
|
|
- fn aarch64_vqtbx4q_s8(x: i8x16, y: (i8x16, i8x16, i8x16, i8x16), z: u8x16) -> i8x16;
|
|
- fn aarch64_vqtbx4q_u8(x: u8x16, y: (u8x16, u8x16, u8x16, u8x16), z: u8x16) -> u8x16;
|
|
-}
|
|
-
|
|
-pub trait Aarch64F32x4 {
|
|
- fn to_f64(self) -> f64x2;
|
|
-}
|
|
-impl Aarch64F32x4 for f32x4 {
|
|
- #[inline]
|
|
- fn to_f64(self) -> f64x2 {
|
|
- unsafe {
|
|
- simd_cast(f32x2(self.0, self.1))
|
|
- }
|
|
- }
|
|
-}
|
|
-
|
|
-pub trait Aarch64U8x16 {
|
|
- fn table_lookup_1(self, t0: u8x16) -> u8x16;
|
|
-}
|
|
-impl Aarch64U8x16 for u8x16 {
|
|
- #[inline]
|
|
- fn table_lookup_1(self, t0: u8x16) -> u8x16 {
|
|
- unsafe {aarch64_vqtbl1q_u8(t0, self)}
|
|
- }
|
|
-}
|
|
-pub trait Aarch64I8x16 {
|
|
- fn table_lookup_1(self, t0: i8x16) -> i8x16;
|
|
-}
|
|
-impl Aarch64I8x16 for i8x16 {
|
|
- #[inline]
|
|
- fn table_lookup_1(self, t0: i8x16) -> i8x16 {
|
|
- unsafe {aarch64_vqtbl2q_s8((t0, t0), ::bitcast(self))}
|
|
- }
|
|
-}
|
|
-
|
|
-#[doc(hidden)]
|
|
-pub mod common {
|
|
- use super::super::super::*;
|
|
- use core::mem;
|
|
-
|
|
- #[inline]
|
|
- pub fn f32x4_sqrt(x: f32x4) -> f32x4 {
|
|
- unsafe {super::aarch64_vsqrtq_f32(x)}
|
|
- }
|
|
- #[inline]
|
|
- pub fn f32x4_approx_rsqrt(x: f32x4) -> f32x4 {
|
|
- unsafe {super::aarch64_vrsqrteq_f32(x)}
|
|
- }
|
|
- #[inline]
|
|
- pub fn f32x4_approx_reciprocal(x: f32x4) -> f32x4 {
|
|
- unsafe {super::aarch64_vrecpeq_f32(x)}
|
|
- }
|
|
- #[inline]
|
|
- pub fn f32x4_max(x: f32x4, y: f32x4) -> f32x4 {
|
|
- unsafe {super::aarch64_vmaxq_f32(x, y)}
|
|
- }
|
|
- #[inline]
|
|
- pub fn f32x4_min(x: f32x4, y: f32x4) -> f32x4 {
|
|
- unsafe {super::aarch64_vminq_f32(x, y)}
|
|
- }
|
|
-
|
|
- macro_rules! bools {
|
|
- ($($ty: ty, $all: ident ($min: ident), $any: ident ($max: ident);)*) => {
|
|
- $(
|
|
- #[inline]
|
|
- pub fn $all(x: $ty) -> bool {
|
|
- unsafe {
|
|
- super::$min(mem::transmute(x)) != 0
|
|
- }
|
|
- }
|
|
- #[inline]
|
|
- pub fn $any(x: $ty) -> bool {
|
|
- unsafe {
|
|
- super::$max(mem::transmute(x)) != 0
|
|
- }
|
|
- }
|
|
- )*
|
|
- }
|
|
- }
|
|
-
|
|
- bools! {
|
|
- bool32fx4, bool32fx4_all(aarch64_vminvq_u32), bool32fx4_any(aarch64_vmaxvq_u32);
|
|
- bool8ix16, bool8ix16_all(aarch64_vminvq_u8), bool8ix16_any(aarch64_vmaxvq_u8);
|
|
- bool16ix8, bool16ix8_all(aarch64_vminvq_u16), bool16ix8_any(aarch64_vmaxvq_u16);
|
|
- bool32ix4, bool32ix4_all(aarch64_vminvq_u32), bool32ix4_any(aarch64_vmaxvq_u32);
|
|
- }
|
|
-}
|
|
diff --git a/third_party/rust/simd/src/arm/mod.rs b/third_party/rust/simd/src/arm/mod.rs
|
|
deleted file mode 100644
|
|
index 0d451103840b..000000000000
|
|
--- a/third_party/rust/simd/src/arm/mod.rs
|
|
+++ /dev/null
|
|
@@ -1,4 +0,0 @@
|
|
-//! Features specific to ARM CPUs.
|
|
-
|
|
-#[cfg(any(feature = "doc", target_feature = "neon"))]
|
|
-pub mod neon;
|
|
diff --git a/third_party/rust/simd/src/arm/neon.rs b/third_party/rust/simd/src/arm/neon.rs
|
|
deleted file mode 100644
|
|
index 8c90a72bb0dc..000000000000
|
|
--- a/third_party/rust/simd/src/arm/neon.rs
|
|
+++ /dev/null
|
|
@@ -1,622 +0,0 @@
|
|
-use super::super::*;
|
|
-use sixty_four::{i64x2, u64x2};
|
|
-
|
|
-#[repr(simd)]
|
|
-#[derive(Debug, Copy, Clone)]
|
|
-pub struct u32x2(u32, u32);
|
|
-#[repr(simd)]
|
|
-#[derive(Debug, Copy, Clone)]
|
|
-pub struct i32x2(i32, i32);
|
|
-#[repr(simd)]
|
|
-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))]
|
|
-#[derive(Debug, Copy)]
|
|
-pub struct bool32ix2(i32, i32);
|
|
-
|
|
-#[repr(simd)]
|
|
-#[derive(Debug, Copy, Clone)]
|
|
-pub struct f32x2(f32, f32);
|
|
-#[repr(simd)]
|
|
-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))]
|
|
-#[derive(Debug, Copy)]
|
|
-pub struct bool32fx2(i32, i32);
|
|
-
|
|
-#[repr(simd)]
|
|
-#[derive(Debug, Copy, Clone)]
|
|
-pub struct u16x4(u16, u16, u16, u16);
|
|
-#[repr(simd)]
|
|
-#[derive(Debug, Copy, Clone)]
|
|
-pub struct i16x4(i16, i16, i16, i16);
|
|
-#[repr(simd)]
|
|
-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))]
|
|
-#[derive(Debug, Copy)]
|
|
-pub struct bool16ix4(i16, i16, i16, i16);
|
|
-
|
|
-#[repr(simd)]
|
|
-#[derive(Debug, Copy, Clone)]
|
|
-pub struct u8x8(u8, u8, u8, u8,
|
|
- u8, u8, u8, u8);
|
|
-#[repr(simd)]
|
|
-#[derive(Debug, Copy, Clone)]
|
|
-pub struct i8x8(i8, i8, i8, i8,
|
|
- i8, i8, i8, i8);
|
|
-#[repr(simd)]
|
|
-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))]
|
|
-#[derive(Debug, Copy)]
|
|
-pub struct bool8ix8(i8, i8, i8, i8,
|
|
- i8, i8, i8, i8);
|
|
-
|
|
-#[repr(simd)]
|
|
-#[derive(Debug, Copy, Clone)]
|
|
-pub struct i64x1(i64);
|
|
-#[repr(simd)]
|
|
-#[derive(Debug, Copy, Clone)]
|
|
-pub struct u64x1(u64);
|
|
-
|
|
-macro_rules! half_bools {
|
|
- ($($ty: ty, $as_u: ty, $elem: ty, $all: ident ($min: ident), $any: ident ($max: ident);)*) => {
|
|
- $(
|
|
- impl $ty {
|
|
- #[inline]
|
|
- pub fn $all(self) -> bool {
|
|
- unsafe {
|
|
- let t: $as_u = bitcast(self);
|
|
- let y = $min(t, mem::uninitialized());
|
|
- let y32: u32x2 = bitcast(y);
|
|
- y32.0 == 0xFFFFFFFF
|
|
- }
|
|
- }
|
|
- #[inline]
|
|
- pub fn $any(self) -> bool {
|
|
- unsafe {
|
|
- let t: $as_u = bitcast(self);
|
|
- let y = $max(t, mem::uninitialized());
|
|
- let y32: u32x2 = bitcast(y);
|
|
- y32.0 != 0
|
|
- }
|
|
- }
|
|
- }
|
|
-
|
|
- impl Clone for $ty {
|
|
- #[inline] fn clone(&self) -> Self {
|
|
- *self
|
|
- }
|
|
- }
|
|
-
|
|
- unsafe impl Simd for $ty {
|
|
- type Bool = $ty;
|
|
- type Elem = $elem;
|
|
- }
|
|
-
|
|
- )*
|
|
- }
|
|
-}
|
|
-
|
|
-half_bools! {
|
|
- bool32fx2, u32x2, i32, bool32fx2_all(arm_vpmin_u32), bool32fx2_any(arm_vpmax_u32);
|
|
- bool8ix8, u8x8, i8, bool8ix8_all(arm_vpmin_u8), bool8ix8_any(arm_vpmax_u8);
|
|
- bool16ix4, u16x4, i16, bool16ix4_all(arm_vpmin_u16), bool16ix4_any(arm_vpmax_u16);
|
|
- bool32ix2, u32x2, f32, bool32ix2_all(arm_vpmin_u32), bool32ix2_any(arm_vpmax_u32);
|
|
-}
|
|
-
|
|
-macro_rules! half_simd {
|
|
- ($($ty: ty, $elem: ty, $bool_ty: ty;)*) => {
|
|
- $(
|
|
- unsafe impl Simd for $ty {
|
|
- type Bool = $bool_ty;
|
|
- type Elem = $elem;
|
|
- }
|
|
- )*
|
|
- }
|
|
-}
|
|
-
|
|
-half_simd! {
|
|
- f32x2, f32, bool32fx2;
|
|
- u32x2, u32, bool32ix2;
|
|
- i32x2, i32, bool32ix2;
|
|
- u16x4, u16, bool16ix4;
|
|
- i16x4, i16, bool16ix4;
|
|
- u8x8, u8, bool8ix8;
|
|
- i8x8, i8, bool8ix8;
|
|
-}
|
|
-
|
|
-#[allow(dead_code)]
|
|
-extern "platform-intrinsic" {
|
|
- fn arm_vhadd_s8(x: i8x8, y: i8x8) -> i8x8;
|
|
- fn arm_vhadd_u8(x: u8x8, y: u8x8) -> u8x8;
|
|
- fn arm_vhadd_s16(x: i16x4, y: i16x4) -> i16x4;
|
|
- fn arm_vhadd_u16(x: u16x4, y: u16x4) -> u16x4;
|
|
- fn arm_vhadd_s32(x: i32x2, y: i32x2) -> i32x2;
|
|
- fn arm_vhadd_u32(x: u32x2, y: u32x2) -> u32x2;
|
|
- fn arm_vhaddq_s8(x: i8x16, y: i8x16) -> i8x16;
|
|
- fn arm_vhaddq_u8(x: u8x16, y: u8x16) -> u8x16;
|
|
- fn arm_vhaddq_s16(x: i16x8, y: i16x8) -> i16x8;
|
|
- fn arm_vhaddq_u16(x: u16x8, y: u16x8) -> u16x8;
|
|
- fn arm_vhaddq_s32(x: i32x4, y: i32x4) -> i32x4;
|
|
- fn arm_vhaddq_u32(x: u32x4, y: u32x4) -> u32x4;
|
|
- fn arm_vrhadd_s8(x: i8x8, y: i8x8) -> i8x8;
|
|
- fn arm_vrhadd_u8(x: u8x8, y: u8x8) -> u8x8;
|
|
- fn arm_vrhadd_s16(x: i16x4, y: i16x4) -> i16x4;
|
|
- fn arm_vrhadd_u16(x: u16x4, y: u16x4) -> u16x4;
|
|
- fn arm_vrhadd_s32(x: i32x2, y: i32x2) -> i32x2;
|
|
- fn arm_vrhadd_u32(x: u32x2, y: u32x2) -> u32x2;
|
|
- fn arm_vrhaddq_s8(x: i8x16, y: i8x16) -> i8x16;
|
|
- fn arm_vrhaddq_u8(x: u8x16, y: u8x16) -> u8x16;
|
|
- fn arm_vrhaddq_s16(x: i16x8, y: i16x8) -> i16x8;
|
|
- fn arm_vrhaddq_u16(x: u16x8, y: u16x8) -> u16x8;
|
|
- fn arm_vrhaddq_s32(x: i32x4, y: i32x4) -> i32x4;
|
|
- fn arm_vrhaddq_u32(x: u32x4, y: u32x4) -> u32x4;
|
|
- fn arm_vqadd_s8(x: i8x8, y: i8x8) -> i8x8;
|
|
- fn arm_vqadd_u8(x: u8x8, y: u8x8) -> u8x8;
|
|
- fn arm_vqadd_s16(x: i16x4, y: i16x4) -> i16x4;
|
|
- fn arm_vqadd_u16(x: u16x4, y: u16x4) -> u16x4;
|
|
- fn arm_vqadd_s32(x: i32x2, y: i32x2) -> i32x2;
|
|
- fn arm_vqadd_u32(x: u32x2, y: u32x2) -> u32x2;
|
|
- fn arm_vqadd_s64(x: i64x1, y: i64x1) -> i64x1;
|
|
- fn arm_vqadd_u64(x: u64x1, y: u64x1) -> u64x1;
|
|
- fn arm_vqaddq_s8(x: i8x16, y: i8x16) -> i8x16;
|
|
- fn arm_vqaddq_u8(x: u8x16, y: u8x16) -> u8x16;
|
|
- fn arm_vqaddq_s16(x: i16x8, y: i16x8) -> i16x8;
|
|
- fn arm_vqaddq_u16(x: u16x8, y: u16x8) -> u16x8;
|
|
- fn arm_vqaddq_s32(x: i32x4, y: i32x4) -> i32x4;
|
|
- fn arm_vqaddq_u32(x: u32x4, y: u32x4) -> u32x4;
|
|
- fn arm_vqaddq_s64(x: i64x2, y: i64x2) -> i64x2;
|
|
- fn arm_vqaddq_u64(x: u64x2, y: u64x2) -> u64x2;
|
|
- fn arm_vraddhn_s16(x: i16x8, y: i16x8) -> i8x8;
|
|
- fn arm_vraddhn_u16(x: u16x8, y: u16x8) -> u8x8;
|
|
- fn arm_vraddhn_s32(x: i32x4, y: i32x4) -> i16x4;
|
|
- fn arm_vraddhn_u32(x: u32x4, y: u32x4) -> u16x4;
|
|
- fn arm_vraddhn_s64(x: i64x2, y: i64x2) -> i32x2;
|
|
- fn arm_vraddhn_u64(x: u64x2, y: u64x2) -> u32x2;
|
|
- fn arm_vfma_f32(x: f32x2, y: f32x2) -> f32x2;
|
|
- fn arm_vfmaq_f32(x: f32x4, y: f32x4) -> f32x4;
|
|
- fn arm_vqdmulh_s16(x: i16x4, y: i16x4) -> i16x4;
|
|
- fn arm_vqdmulh_s32(x: i32x2, y: i32x2) -> i32x2;
|
|
- fn arm_vqdmulhq_s16(x: i16x8, y: i16x8) -> i16x8;
|
|
- fn arm_vqdmulhq_s32(x: i32x4, y: i32x4) -> i32x4;
|
|
- fn arm_vqrdmulh_s16(x: i16x4, y: i16x4) -> i16x4;
|
|
- fn arm_vqrdmulh_s32(x: i32x2, y: i32x2) -> i32x2;
|
|
- fn arm_vqrdmulhq_s16(x: i16x8, y: i16x8) -> i16x8;
|
|
- fn arm_vqrdmulhq_s32(x: i32x4, y: i32x4) -> i32x4;
|
|
- fn arm_vmull_s8(x: i8x8, y: i8x8) -> i16x8;
|
|
- fn arm_vmull_u8(x: u8x8, y: u8x8) -> u16x8;
|
|
- fn arm_vmull_s16(x: i16x4, y: i16x4) -> i32x4;
|
|
- fn arm_vmull_u16(x: u16x4, y: u16x4) -> u32x4;
|
|
- fn arm_vmull_s32(x: i32x2, y: i32x2) -> i64x2;
|
|
- fn arm_vmull_u32(x: u32x2, y: u32x2) -> u64x2;
|
|
- fn arm_vqdmullq_s8(x: i8x8, y: i8x8) -> i16x8;
|
|
- fn arm_vqdmullq_s16(x: i16x4, y: i16x4) -> i32x4;
|
|
- fn arm_vhsub_s8(x: i8x8, y: i8x8) -> i8x8;
|
|
- fn arm_vhsub_u8(x: u8x8, y: u8x8) -> u8x8;
|
|
- fn arm_vhsub_s16(x: i16x4, y: i16x4) -> i16x4;
|
|
- fn arm_vhsub_u16(x: u16x4, y: u16x4) -> u16x4;
|
|
- fn arm_vhsub_s32(x: i32x2, y: i32x2) -> i32x2;
|
|
- fn arm_vhsub_u32(x: u32x2, y: u32x2) -> u32x2;
|
|
- fn arm_vhsubq_s8(x: i8x16, y: i8x16) -> i8x16;
|
|
- fn arm_vhsubq_u8(x: u8x16, y: u8x16) -> u8x16;
|
|
- fn arm_vhsubq_s16(x: i16x8, y: i16x8) -> i16x8;
|
|
- fn arm_vhsubq_u16(x: u16x8, y: u16x8) -> u16x8;
|
|
- fn arm_vhsubq_s32(x: i32x4, y: i32x4) -> i32x4;
|
|
- fn arm_vhsubq_u32(x: u32x4, y: u32x4) -> u32x4;
|
|
- fn arm_vqsub_s8(x: i8x8, y: i8x8) -> i8x8;
|
|
- fn arm_vqsub_u8(x: u8x8, y: u8x8) -> u8x8;
|
|
- fn arm_vqsub_s16(x: i16x4, y: i16x4) -> i16x4;
|
|
- fn arm_vqsub_u16(x: u16x4, y: u16x4) -> u16x4;
|
|
- fn arm_vqsub_s32(x: i32x2, y: i32x2) -> i32x2;
|
|
- fn arm_vqsub_u32(x: u32x2, y: u32x2) -> u32x2;
|
|
- fn arm_vqsub_s64(x: i64x1, y: i64x1) -> i64x1;
|
|
- fn arm_vqsub_u64(x: u64x1, y: u64x1) -> u64x1;
|
|
- fn arm_vqsubq_s8(x: i8x16, y: i8x16) -> i8x16;
|
|
- fn arm_vqsubq_u8(x: u8x16, y: u8x16) -> u8x16;
|
|
- fn arm_vqsubq_s16(x: i16x8, y: i16x8) -> i16x8;
|
|
- fn arm_vqsubq_u16(x: u16x8, y: u16x8) -> u16x8;
|
|
- fn arm_vqsubq_s32(x: i32x4, y: i32x4) -> i32x4;
|
|
- fn arm_vqsubq_u32(x: u32x4, y: u32x4) -> u32x4;
|
|
- fn arm_vqsubq_s64(x: i64x2, y: i64x2) -> i64x2;
|
|
- fn arm_vqsubq_u64(x: u64x2, y: u64x2) -> u64x2;
|
|
- fn arm_vrsubhn_s16(x: i16x8, y: i16x8) -> i8x8;
|
|
- fn arm_vrsubhn_u16(x: u16x8, y: u16x8) -> u8x8;
|
|
- fn arm_vrsubhn_s32(x: i32x4, y: i32x4) -> i16x4;
|
|
- fn arm_vrsubhn_u32(x: u32x4, y: u32x4) -> u16x4;
|
|
- fn arm_vrsubhn_s64(x: i64x2, y: i64x2) -> i32x2;
|
|
- fn arm_vrsubhn_u64(x: u64x2, y: u64x2) -> u32x2;
|
|
- fn arm_vabd_s8(x: i8x8, y: i8x8) -> i8x8;
|
|
- fn arm_vabd_u8(x: u8x8, y: u8x8) -> u8x8;
|
|
- fn arm_vabd_s16(x: i16x4, y: i16x4) -> i16x4;
|
|
- fn arm_vabd_u16(x: u16x4, y: u16x4) -> u16x4;
|
|
- fn arm_vabd_s32(x: i32x2, y: i32x2) -> i32x2;
|
|
- fn arm_vabd_u32(x: u32x2, y: u32x2) -> u32x2;
|
|
- fn arm_vabd_f32(x: f32x2, y: f32x2) -> f32x2;
|
|
- fn arm_vabdq_s8(x: i8x16, y: i8x16) -> i8x16;
|
|
- fn arm_vabdq_u8(x: u8x16, y: u8x16) -> u8x16;
|
|
- fn arm_vabdq_s16(x: i16x8, y: i16x8) -> i16x8;
|
|
- fn arm_vabdq_u16(x: u16x8, y: u16x8) -> u16x8;
|
|
- fn arm_vabdq_s32(x: i32x4, y: i32x4) -> i32x4;
|
|
- fn arm_vabdq_u32(x: u32x4, y: u32x4) -> u32x4;
|
|
- fn arm_vabdq_f32(x: f32x4, y: f32x4) -> f32x4;
|
|
- fn arm_vmax_s8(x: i8x8, y: i8x8) -> i8x8;
|
|
- fn arm_vmax_u8(x: u8x8, y: u8x8) -> u8x8;
|
|
- fn arm_vmax_s16(x: i16x4, y: i16x4) -> i16x4;
|
|
- fn arm_vmax_u16(x: u16x4, y: u16x4) -> u16x4;
|
|
- fn arm_vmax_s32(x: i32x2, y: i32x2) -> i32x2;
|
|
- fn arm_vmax_u32(x: u32x2, y: u32x2) -> u32x2;
|
|
- fn arm_vmax_f32(x: f32x2, y: f32x2) -> f32x2;
|
|
- fn arm_vmaxq_s8(x: i8x16, y: i8x16) -> i8x16;
|
|
- fn arm_vmaxq_u8(x: u8x16, y: u8x16) -> u8x16;
|
|
- fn arm_vmaxq_s16(x: i16x8, y: i16x8) -> i16x8;
|
|
- fn arm_vmaxq_u16(x: u16x8, y: u16x8) -> u16x8;
|
|
- fn arm_vmaxq_s32(x: i32x4, y: i32x4) -> i32x4;
|
|
- fn arm_vmaxq_u32(x: u32x4, y: u32x4) -> u32x4;
|
|
- fn arm_vmaxq_f32(x: f32x4, y: f32x4) -> f32x4;
|
|
- fn arm_vmin_s8(x: i8x8, y: i8x8) -> i8x8;
|
|
- fn arm_vmin_u8(x: u8x8, y: u8x8) -> u8x8;
|
|
- fn arm_vmin_s16(x: i16x4, y: i16x4) -> i16x4;
|
|
- fn arm_vmin_u16(x: u16x4, y: u16x4) -> u16x4;
|
|
- fn arm_vmin_s32(x: i32x2, y: i32x2) -> i32x2;
|
|
- fn arm_vmin_u32(x: u32x2, y: u32x2) -> u32x2;
|
|
- fn arm_vmin_f32(x: f32x2, y: f32x2) -> f32x2;
|
|
- fn arm_vminq_s8(x: i8x16, y: i8x16) -> i8x16;
|
|
- fn arm_vminq_u8(x: u8x16, y: u8x16) -> u8x16;
|
|
- fn arm_vminq_s16(x: i16x8, y: i16x8) -> i16x8;
|
|
- fn arm_vminq_u16(x: u16x8, y: u16x8) -> u16x8;
|
|
- fn arm_vminq_s32(x: i32x4, y: i32x4) -> i32x4;
|
|
- fn arm_vminq_u32(x: u32x4, y: u32x4) -> u32x4;
|
|
- fn arm_vminq_f32(x: f32x4, y: f32x4) -> f32x4;
|
|
- fn arm_vshl_s8(x: i8x8, y: i8x8) -> i8x8;
|
|
- fn arm_vshl_u8(x: u8x8, y: i8x8) -> u8x8;
|
|
- fn arm_vshl_s16(x: i16x4, y: i16x4) -> i16x4;
|
|
- fn arm_vshl_u16(x: u16x4, y: i16x4) -> u16x4;
|
|
- fn arm_vshl_s32(x: i32x2, y: i32x2) -> i32x2;
|
|
- fn arm_vshl_u32(x: u32x2, y: i32x2) -> u32x2;
|
|
- fn arm_vshl_s64(x: i64x1, y: i64x1) -> i64x1;
|
|
- fn arm_vshl_u64(x: u64x1, y: i64x1) -> u64x1;
|
|
- fn arm_vshlq_s8(x: i8x16, y: i8x16) -> i8x16;
|
|
- fn arm_vshlq_u8(x: u8x16, y: i8x16) -> u8x16;
|
|
- fn arm_vshlq_s16(x: i16x8, y: i16x8) -> i16x8;
|
|
- fn arm_vshlq_u16(x: u16x8, y: i16x8) -> u16x8;
|
|
- fn arm_vshlq_s32(x: i32x4, y: i32x4) -> i32x4;
|
|
- fn arm_vshlq_u32(x: u32x4, y: i32x4) -> u32x4;
|
|
- fn arm_vshlq_s64(x: i64x2, y: i64x2) -> i64x2;
|
|
- fn arm_vshlq_u64(x: u64x2, y: i64x2) -> u64x2;
|
|
- fn arm_vqshl_s8(x: i8x8, y: i8x8) -> i8x8;
|
|
- fn arm_vqshl_u8(x: u8x8, y: i8x8) -> u8x8;
|
|
- fn arm_vqshl_s16(x: i16x4, y: i16x4) -> i16x4;
|
|
- fn arm_vqshl_u16(x: u16x4, y: i16x4) -> u16x4;
|
|
- fn arm_vqshl_s32(x: i32x2, y: i32x2) -> i32x2;
|
|
- fn arm_vqshl_u32(x: u32x2, y: i32x2) -> u32x2;
|
|
- fn arm_vqshl_s64(x: i64x1, y: i64x1) -> i64x1;
|
|
- fn arm_vqshl_u64(x: u64x1, y: i64x1) -> u64x1;
|
|
- fn arm_vqshlq_s8(x: i8x16, y: i8x16) -> i8x16;
|
|
- fn arm_vqshlq_u8(x: u8x16, y: i8x16) -> u8x16;
|
|
- fn arm_vqshlq_s16(x: i16x8, y: i16x8) -> i16x8;
|
|
- fn arm_vqshlq_u16(x: u16x8, y: i16x8) -> u16x8;
|
|
- fn arm_vqshlq_s32(x: i32x4, y: i32x4) -> i32x4;
|
|
- fn arm_vqshlq_u32(x: u32x4, y: i32x4) -> u32x4;
|
|
- fn arm_vqshlq_s64(x: i64x2, y: i64x2) -> i64x2;
|
|
- fn arm_vqshlq_u64(x: u64x2, y: i64x2) -> u64x2;
|
|
- fn arm_vrshl_s8(x: i8x8, y: i8x8) -> i8x8;
|
|
- fn arm_vrshl_u8(x: u8x8, y: i8x8) -> u8x8;
|
|
- fn arm_vrshl_s16(x: i16x4, y: i16x4) -> i16x4;
|
|
- fn arm_vrshl_u16(x: u16x4, y: i16x4) -> u16x4;
|
|
- fn arm_vrshl_s32(x: i32x2, y: i32x2) -> i32x2;
|
|
- fn arm_vrshl_u32(x: u32x2, y: i32x2) -> u32x2;
|
|
- fn arm_vrshl_s64(x: i64x1, y: i64x1) -> i64x1;
|
|
- fn arm_vrshl_u64(x: u64x1, y: i64x1) -> u64x1;
|
|
- fn arm_vrshlq_s8(x: i8x16, y: i8x16) -> i8x16;
|
|
- fn arm_vrshlq_u8(x: u8x16, y: i8x16) -> u8x16;
|
|
- fn arm_vrshlq_s16(x: i16x8, y: i16x8) -> i16x8;
|
|
- fn arm_vrshlq_u16(x: u16x8, y: i16x8) -> u16x8;
|
|
- fn arm_vrshlq_s32(x: i32x4, y: i32x4) -> i32x4;
|
|
- fn arm_vrshlq_u32(x: u32x4, y: i32x4) -> u32x4;
|
|
- fn arm_vrshlq_s64(x: i64x2, y: i64x2) -> i64x2;
|
|
- fn arm_vrshlq_u64(x: u64x2, y: i64x2) -> u64x2;
|
|
- fn arm_vqrshl_s8(x: i8x8, y: i8x8) -> i8x8;
|
|
- fn arm_vqrshl_u8(x: u8x8, y: i8x8) -> u8x8;
|
|
- fn arm_vqrshl_s16(x: i16x4, y: i16x4) -> i16x4;
|
|
- fn arm_vqrshl_u16(x: u16x4, y: i16x4) -> u16x4;
|
|
- fn arm_vqrshl_s32(x: i32x2, y: i32x2) -> i32x2;
|
|
- fn arm_vqrshl_u32(x: u32x2, y: i32x2) -> u32x2;
|
|
- fn arm_vqrshl_s64(x: i64x1, y: i64x1) -> i64x1;
|
|
- fn arm_vqrshl_u64(x: u64x1, y: i64x1) -> u64x1;
|
|
- fn arm_vqrshlq_s8(x: i8x16, y: i8x16) -> i8x16;
|
|
- fn arm_vqrshlq_u8(x: u8x16, y: i8x16) -> u8x16;
|
|
- fn arm_vqrshlq_s16(x: i16x8, y: i16x8) -> i16x8;
|
|
- fn arm_vqrshlq_u16(x: u16x8, y: i16x8) -> u16x8;
|
|
- fn arm_vqrshlq_s32(x: i32x4, y: i32x4) -> i32x4;
|
|
- fn arm_vqrshlq_u32(x: u32x4, y: i32x4) -> u32x4;
|
|
- fn arm_vqrshlq_s64(x: i64x2, y: i64x2) -> i64x2;
|
|
- fn arm_vqrshlq_u64(x: u64x2, y: i64x2) -> u64x2;
|
|
- fn arm_vqshrun_n_s16(x: i16x8, y: u32) -> i8x8;
|
|
- fn arm_vqshrun_n_s32(x: i32x4, y: u32) -> i16x4;
|
|
- fn arm_vqshrun_n_s64(x: i64x2, y: u32) -> i32x2;
|
|
- fn arm_vqrshrun_n_s16(x: i16x8, y: u32) -> i8x8;
|
|
- fn arm_vqrshrun_n_s32(x: i32x4, y: u32) -> i16x4;
|
|
- fn arm_vqrshrun_n_s64(x: i64x2, y: u32) -> i32x2;
|
|
- fn arm_vqshrn_n_s16(x: i16x8, y: u32) -> i8x8;
|
|
- fn arm_vqshrn_n_u16(x: u16x8, y: u32) -> u8x8;
|
|
- fn arm_vqshrn_n_s32(x: i32x4, y: u32) -> i16x4;
|
|
- fn arm_vqshrn_n_u32(x: u32x4, y: u32) -> u16x4;
|
|
- fn arm_vqshrn_n_s64(x: i64x2, y: u32) -> i32x2;
|
|
- fn arm_vqshrn_n_u64(x: u64x2, y: u32) -> u32x2;
|
|
- fn arm_vrshrn_n_s16(x: i16x8, y: u32) -> i8x8;
|
|
- fn arm_vrshrn_n_u16(x: u16x8, y: u32) -> u8x8;
|
|
- fn arm_vrshrn_n_s32(x: i32x4, y: u32) -> i16x4;
|
|
- fn arm_vrshrn_n_u32(x: u32x4, y: u32) -> u16x4;
|
|
- fn arm_vrshrn_n_s64(x: i64x2, y: u32) -> i32x2;
|
|
- fn arm_vrshrn_n_u64(x: u64x2, y: u32) -> u32x2;
|
|
- fn arm_vqrshrn_n_s16(x: i16x8, y: u32) -> i8x8;
|
|
- fn arm_vqrshrn_n_u16(x: u16x8, y: u32) -> u8x8;
|
|
- fn arm_vqrshrn_n_s32(x: i32x4, y: u32) -> i16x4;
|
|
- fn arm_vqrshrn_n_u32(x: u32x4, y: u32) -> u16x4;
|
|
- fn arm_vqrshrn_n_s64(x: i64x2, y: u32) -> i32x2;
|
|
- fn arm_vqrshrn_n_u64(x: u64x2, y: u32) -> u32x2;
|
|
- fn arm_vsri_s8(x: i8x8, y: i8x8) -> i8x8;
|
|
- fn arm_vsri_u8(x: u8x8, y: u8x8) -> u8x8;
|
|
- fn arm_vsri_s16(x: i16x4, y: i16x4) -> i16x4;
|
|
- fn arm_vsri_u16(x: u16x4, y: u16x4) -> u16x4;
|
|
- fn arm_vsri_s32(x: i32x2, y: i32x2) -> i32x2;
|
|
- fn arm_vsri_u32(x: u32x2, y: u32x2) -> u32x2;
|
|
- fn arm_vsri_s64(x: i64x1, y: i64x1) -> i64x1;
|
|
- fn arm_vsri_u64(x: u64x1, y: u64x1) -> u64x1;
|
|
- fn arm_vsriq_s8(x: i8x16, y: i8x16) -> i8x16;
|
|
- fn arm_vsriq_u8(x: u8x16, y: u8x16) -> u8x16;
|
|
- fn arm_vsriq_s16(x: i16x8, y: i16x8) -> i16x8;
|
|
- fn arm_vsriq_u16(x: u16x8, y: u16x8) -> u16x8;
|
|
- fn arm_vsriq_s32(x: i32x4, y: i32x4) -> i32x4;
|
|
- fn arm_vsriq_u32(x: u32x4, y: u32x4) -> u32x4;
|
|
- fn arm_vsriq_s64(x: i64x2, y: i64x2) -> i64x2;
|
|
- fn arm_vsriq_u64(x: u64x2, y: u64x2) -> u64x2;
|
|
- fn arm_vsli_s8(x: i8x8, y: i8x8) -> i8x8;
|
|
- fn arm_vsli_u8(x: u8x8, y: u8x8) -> u8x8;
|
|
- fn arm_vsli_s16(x: i16x4, y: i16x4) -> i16x4;
|
|
- fn arm_vsli_u16(x: u16x4, y: u16x4) -> u16x4;
|
|
- fn arm_vsli_s32(x: i32x2, y: i32x2) -> i32x2;
|
|
- fn arm_vsli_u32(x: u32x2, y: u32x2) -> u32x2;
|
|
- fn arm_vsli_s64(x: i64x1, y: i64x1) -> i64x1;
|
|
- fn arm_vsli_u64(x: u64x1, y: u64x1) -> u64x1;
|
|
- fn arm_vsliq_s8(x: i8x16, y: i8x16) -> i8x16;
|
|
- fn arm_vsliq_u8(x: u8x16, y: u8x16) -> u8x16;
|
|
- fn arm_vsliq_s16(x: i16x8, y: i16x8) -> i16x8;
|
|
- fn arm_vsliq_u16(x: u16x8, y: u16x8) -> u16x8;
|
|
- fn arm_vsliq_s32(x: i32x4, y: i32x4) -> i32x4;
|
|
- fn arm_vsliq_u32(x: u32x4, y: u32x4) -> u32x4;
|
|
- fn arm_vsliq_s64(x: i64x2, y: i64x2) -> i64x2;
|
|
- fn arm_vsliq_u64(x: u64x2, y: u64x2) -> u64x2;
|
|
- fn arm_vvqmovn_s16(x: i16x8) -> i8x8;
|
|
- fn arm_vvqmovn_u16(x: u16x8) -> u8x8;
|
|
- fn arm_vvqmovn_s32(x: i32x4) -> i16x4;
|
|
- fn arm_vvqmovn_u32(x: u32x4) -> u16x4;
|
|
- fn arm_vvqmovn_s64(x: i64x2) -> i32x2;
|
|
- fn arm_vvqmovn_u64(x: u64x2) -> u32x2;
|
|
- fn arm_vabs_s8(x: i8x8) -> i8x8;
|
|
- fn arm_vabs_s16(x: i16x4) -> i16x4;
|
|
- fn arm_vabs_s32(x: i32x2) -> i32x2;
|
|
- fn arm_vabsq_s8(x: i8x16) -> i8x16;
|
|
- fn arm_vabsq_s16(x: i16x8) -> i16x8;
|
|
- fn arm_vabsq_s32(x: i32x4) -> i32x4;
|
|
- fn arm_vabs_f32(x: f32x2) -> f32x2;
|
|
- fn arm_vabsq_f32(x: f32x4) -> f32x4;
|
|
- fn arm_vqabs_s8(x: i8x8) -> i8x8;
|
|
- fn arm_vqabs_s16(x: i16x4) -> i16x4;
|
|
- fn arm_vqabs_s32(x: i32x2) -> i32x2;
|
|
- fn arm_vqabsq_s8(x: i8x16) -> i8x16;
|
|
- fn arm_vqabsq_s16(x: i16x8) -> i16x8;
|
|
- fn arm_vqabsq_s32(x: i32x4) -> i32x4;
|
|
- fn arm_vqneg_s8(x: i8x8) -> i8x8;
|
|
- fn arm_vqneg_s16(x: i16x4) -> i16x4;
|
|
- fn arm_vqneg_s32(x: i32x2) -> i32x2;
|
|
- fn arm_vqnegq_s8(x: i8x16) -> i8x16;
|
|
- fn arm_vqnegq_s16(x: i16x8) -> i16x8;
|
|
- fn arm_vqnegq_s32(x: i32x4) -> i32x4;
|
|
- fn arm_vclz_s8(x: i8x8) -> i8x8;
|
|
- fn arm_vclz_u8(x: u8x8) -> u8x8;
|
|
- fn arm_vclz_s16(x: i16x4) -> i16x4;
|
|
- fn arm_vclz_u16(x: u16x4) -> u16x4;
|
|
- fn arm_vclz_s32(x: i32x2) -> i32x2;
|
|
- fn arm_vclz_u32(x: u32x2) -> u32x2;
|
|
- fn arm_vclzq_s8(x: i8x16) -> i8x16;
|
|
- fn arm_vclzq_u8(x: u8x16) -> u8x16;
|
|
- fn arm_vclzq_s16(x: i16x8) -> i16x8;
|
|
- fn arm_vclzq_u16(x: u16x8) -> u16x8;
|
|
- fn arm_vclzq_s32(x: i32x4) -> i32x4;
|
|
- fn arm_vclzq_u32(x: u32x4) -> u32x4;
|
|
- fn arm_vcls_s8(x: i8x8) -> i8x8;
|
|
- fn arm_vcls_u8(x: u8x8) -> u8x8;
|
|
- fn arm_vcls_s16(x: i16x4) -> i16x4;
|
|
- fn arm_vcls_u16(x: u16x4) -> u16x4;
|
|
- fn arm_vcls_s32(x: i32x2) -> i32x2;
|
|
- fn arm_vcls_u32(x: u32x2) -> u32x2;
|
|
- fn arm_vclsq_s8(x: i8x16) -> i8x16;
|
|
- fn arm_vclsq_u8(x: u8x16) -> u8x16;
|
|
- fn arm_vclsq_s16(x: i16x8) -> i16x8;
|
|
- fn arm_vclsq_u16(x: u16x8) -> u16x8;
|
|
- fn arm_vclsq_s32(x: i32x4) -> i32x4;
|
|
- fn arm_vclsq_u32(x: u32x4) -> u32x4;
|
|
- fn arm_vcnt_s8(x: i8x8) -> i8x8;
|
|
- fn arm_vcnt_u8(x: u8x8) -> u8x8;
|
|
- fn arm_vcntq_s8(x: i8x16) -> i8x16;
|
|
- fn arm_vcntq_u8(x: u8x16) -> u8x16;
|
|
- fn arm_vrecpe_u32(x: u32x2) -> u32x2;
|
|
- fn arm_vrecpe_f32(x: f32x2) -> f32x2;
|
|
- fn arm_vrecpeq_u32(x: u32x4) -> u32x4;
|
|
- fn arm_vrecpeq_f32(x: f32x4) -> f32x4;
|
|
- fn arm_vrecps_f32(x: f32x2, y: f32x2) -> f32x2;
|
|
- fn arm_vrecpsq_f32(x: f32x4, y: f32x4) -> f32x4;
|
|
- fn arm_vsqrt_f32(x: f32x2) -> f32x2;
|
|
- fn arm_vsqrtq_f32(x: f32x4) -> f32x4;
|
|
- fn arm_vrsqrte_u32(x: u32x2) -> u32x2;
|
|
- fn arm_vrsqrte_f32(x: f32x2) -> f32x2;
|
|
- fn arm_vrsqrteq_u32(x: u32x4) -> u32x4;
|
|
- fn arm_vrsqrteq_f32(x: f32x4) -> f32x4;
|
|
- fn arm_vrsqrts_f32(x: f32x2, y: f32x2) -> f32x2;
|
|
- fn arm_vrsqrtsq_f32(x: f32x4, y: f32x4) -> f32x4;
|
|
- fn arm_vbsl_s8(x: u8x8, y: i8x8) -> i8x8;
|
|
- fn arm_vbsl_u8(x: u8x8, y: u8x8) -> u8x8;
|
|
- fn arm_vbsl_s16(x: u16x4, y: i16x4) -> i16x4;
|
|
- fn arm_vbsl_u16(x: u16x4, y: u16x4) -> u16x4;
|
|
- fn arm_vbsl_s32(x: u32x2, y: i32x2) -> i32x2;
|
|
- fn arm_vbsl_u32(x: u32x2, y: u32x2) -> u32x2;
|
|
- fn arm_vbsl_s64(x: u64x1, y: i64x1) -> i64x1;
|
|
- fn arm_vbsl_u64(x: u64x1, y: u64x1) -> u64x1;
|
|
- fn arm_vbslq_s8(x: u8x16, y: i8x16) -> i8x16;
|
|
- fn arm_vbslq_u8(x: u8x16, y: u8x16) -> u8x16;
|
|
- fn arm_vbslq_s16(x: u16x8, y: i16x8) -> i16x8;
|
|
- fn arm_vbslq_u16(x: u16x8, y: u16x8) -> u16x8;
|
|
- fn arm_vbslq_s32(x: u32x4, y: i32x4) -> i32x4;
|
|
- fn arm_vbslq_u32(x: u32x4, y: u32x4) -> u32x4;
|
|
- fn arm_vbslq_s64(x: u64x2, y: i64x2) -> i64x2;
|
|
- fn arm_vbslq_u64(x: u64x2, y: u64x2) -> u64x2;
|
|
- fn arm_vpadd_s8(x: i8x8, y: i8x8) -> i8x8;
|
|
- fn arm_vpadd_u8(x: u8x8, y: u8x8) -> u8x8;
|
|
- fn arm_vpadd_s16(x: i16x4, y: i16x4) -> i16x4;
|
|
- fn arm_vpadd_u16(x: u16x4, y: u16x4) -> u16x4;
|
|
- fn arm_vpadd_s32(x: i32x2, y: i32x2) -> i32x2;
|
|
- fn arm_vpadd_u32(x: u32x2, y: u32x2) -> u32x2;
|
|
- fn arm_vpadd_f32(x: f32x2, y: f32x2) -> f32x2;
|
|
- fn arm_vpaddl_s16(x: i8x8) -> i16x4;
|
|
- fn arm_vpaddl_u16(x: u8x8) -> u16x4;
|
|
- fn arm_vpaddl_s32(x: i16x4) -> i32x2;
|
|
- fn arm_vpaddl_u32(x: u16x4) -> u32x2;
|
|
- fn arm_vpaddl_s64(x: i32x2) -> i64x1;
|
|
- fn arm_vpaddl_u64(x: u32x2) -> u64x1;
|
|
- fn arm_vpaddlq_s16(x: i8x16) -> i16x8;
|
|
- fn arm_vpaddlq_u16(x: u8x16) -> u16x8;
|
|
- fn arm_vpaddlq_s32(x: i16x8) -> i32x4;
|
|
- fn arm_vpaddlq_u32(x: u16x8) -> u32x4;
|
|
- fn arm_vpaddlq_s64(x: i32x4) -> i64x2;
|
|
- fn arm_vpaddlq_u64(x: u32x4) -> u64x2;
|
|
- fn arm_vpadal_s16(x: i16x4, y: i8x8) -> i16x4;
|
|
- fn arm_vpadal_u16(x: u16x4, y: u8x8) -> u16x4;
|
|
- fn arm_vpadal_s32(x: i32x2, y: i16x4) -> i32x2;
|
|
- fn arm_vpadal_u32(x: u32x2, y: u16x4) -> u32x2;
|
|
- fn arm_vpadal_s64(x: i64x1, y: i32x2) -> i64x1;
|
|
- fn arm_vpadal_u64(x: u64x1, y: u32x2) -> u64x1;
|
|
- fn arm_vpadalq_s16(x: i16x8, y: i8x16) -> i16x8;
|
|
- fn arm_vpadalq_u16(x: u16x8, y: u8x16) -> u16x8;
|
|
- fn arm_vpadalq_s32(x: i32x4, y: i16x8) -> i32x4;
|
|
- fn arm_vpadalq_u32(x: u32x4, y: u16x8) -> u32x4;
|
|
- fn arm_vpadalq_s64(x: i64x2, y: i32x4) -> i64x2;
|
|
- fn arm_vpadalq_u64(x: u64x2, y: u32x4) -> u64x2;
|
|
- fn arm_vpmax_s8(x: i8x8, y: i8x8) -> i8x8;
|
|
- fn arm_vpmax_u8(x: u8x8, y: u8x8) -> u8x8;
|
|
- fn arm_vpmax_s16(x: i16x4, y: i16x4) -> i16x4;
|
|
- fn arm_vpmax_u16(x: u16x4, y: u16x4) -> u16x4;
|
|
- fn arm_vpmax_s32(x: i32x2, y: i32x2) -> i32x2;
|
|
- fn arm_vpmax_u32(x: u32x2, y: u32x2) -> u32x2;
|
|
- fn arm_vpmax_f32(x: f32x2, y: f32x2) -> f32x2;
|
|
- fn arm_vpmin_s8(x: i8x8, y: i8x8) -> i8x8;
|
|
- fn arm_vpmin_u8(x: u8x8, y: u8x8) -> u8x8;
|
|
- fn arm_vpmin_s16(x: i16x4, y: i16x4) -> i16x4;
|
|
- fn arm_vpmin_u16(x: u16x4, y: u16x4) -> u16x4;
|
|
- fn arm_vpmin_s32(x: i32x2, y: i32x2) -> i32x2;
|
|
- fn arm_vpmin_u32(x: u32x2, y: u32x2) -> u32x2;
|
|
- fn arm_vpmin_f32(x: f32x2, y: f32x2) -> f32x2;
|
|
- fn arm_vpminq_s8(x: i8x16, y: i8x16) -> i8x16;
|
|
- fn arm_vpminq_u8(x: u8x16, y: u8x16) -> u8x16;
|
|
- fn arm_vpminq_s16(x: i16x8, y: i16x8) -> i16x8;
|
|
- fn arm_vpminq_u16(x: u16x8, y: u16x8) -> u16x8;
|
|
- fn arm_vpminq_s32(x: i32x4, y: i32x4) -> i32x4;
|
|
- fn arm_vpminq_u32(x: u32x4, y: u32x4) -> u32x4;
|
|
- fn arm_vpminq_f32(x: f32x4, y: f32x4) -> f32x4;
|
|
- fn arm_vtbl1_s8(x: i8x8, y: u8x8) -> i8x8;
|
|
- fn arm_vtbl1_u8(x: u8x8, y: u8x8) -> u8x8;
|
|
- fn arm_vtbx1_s8(x: i8x8, y: i8x8, z: u8x8) -> i8x8;
|
|
- fn arm_vtbx1_u8(x: u8x8, y: u8x8, z: u8x8) -> u8x8;
|
|
- fn arm_vtbl2_s8(x: (i8x8, i8x8), y: u8x8) -> i8x8;
|
|
- fn arm_vtbl2_u8(x: (u8x8, u8x8), y: u8x8) -> u8x8;
|
|
- fn arm_vtbx2_s8(x: (i8x8, i8x8), y: u8x8) -> i8x8;
|
|
- fn arm_vtbx2_u8(x: (u8x8, u8x8), y: u8x8) -> u8x8;
|
|
- fn arm_vtbl3_s8(x: (i8x8, i8x8, i8x8), y: u8x8) -> i8x8;
|
|
- fn arm_vtbl3_u8(x: (u8x8, u8x8, u8x8), y: u8x8) -> u8x8;
|
|
- fn arm_vtbx3_s8(x: i8x8, y: (i8x8, i8x8, i8x8), z: u8x8) -> i8x8;
|
|
- fn arm_vtbx3_u8(x: u8x8, y: (u8x8, u8x8, u8x8), z: u8x8) -> u8x8;
|
|
- fn arm_vtbl4_s8(x: (i8x8, i8x8, i8x8, i8x8), y: u8x8) -> i8x8;
|
|
- fn arm_vtbl4_u8(x: (u8x8, u8x8, u8x8, u8x8), y: u8x8) -> u8x8;
|
|
- fn arm_vtbx4_s8(x: i8x8, y: (i8x8, i8x8, i8x8, i8x8), z: u8x8) -> i8x8;
|
|
- fn arm_vtbx4_u8(x: u8x8, y: (u8x8, u8x8, u8x8, u8x8), z: u8x8) -> u8x8;
|
|
-}
|
|
-
|
|
-
|
|
-impl u8x8 {
|
|
- #[inline]
|
|
- pub fn table_lookup_1(self, t0: u8x8) -> u8x8 {
|
|
- unsafe {arm_vtbl1_u8(t0, self)}
|
|
- }
|
|
- #[inline]
|
|
- pub fn table_lookup_2(self, t0: u8x8, t1: u8x8) -> u8x8 {
|
|
- unsafe {arm_vtbl2_u8((t0, t1), self)}
|
|
- }
|
|
- #[inline]
|
|
- pub fn table_lookup_3(self, t0: u8x8, t1: u8x8, t2: u8x8) -> u8x8 {
|
|
- unsafe {arm_vtbl3_u8((t0, t1, t2), self)}
|
|
- }
|
|
- #[inline]
|
|
- pub fn table_lookup_4(self, t0: u8x8, t1: u8x8, t2: u8x8, t3: u8x8) -> u8x8 {
|
|
- unsafe {arm_vtbl4_u8((t0, t1, t2, t3), self)}
|
|
- }
|
|
-}
|
|
-
|
|
-#[doc(hidden)]
|
|
-pub mod common {
|
|
- use super::super::super::*;
|
|
- use super::*;
|
|
- use core::mem;
|
|
-
|
|
- #[inline]
|
|
- pub fn f32x4_sqrt(x: f32x4) -> f32x4 {
|
|
- unsafe {super::arm_vsqrtq_f32(x)}
|
|
- }
|
|
- #[inline]
|
|
- pub fn f32x4_approx_rsqrt(x: f32x4) -> f32x4 {
|
|
- unsafe {super::arm_vrsqrteq_f32(x)}
|
|
- }
|
|
- #[inline]
|
|
- pub fn f32x4_approx_reciprocal(x: f32x4) -> f32x4 {
|
|
- unsafe {super::arm_vrecpeq_f32(x)}
|
|
- }
|
|
- #[inline]
|
|
- pub fn f32x4_max(x: f32x4, y: f32x4) -> f32x4 {
|
|
- unsafe {super::arm_vmaxq_f32(x, y)}
|
|
- }
|
|
- #[inline]
|
|
- pub fn f32x4_min(x: f32x4, y: f32x4) -> f32x4 {
|
|
- unsafe {super::arm_vminq_f32(x, y)}
|
|
- }
|
|
-
|
|
- macro_rules! bools {
|
|
- ($($ty: ty, $as_u: ty, $shuffle_fn: ident, $lo_idxs: expr, $hi_idxs: expr, $all: ident ($min: ident), $any: ident ($max: ident);)*) => {
|
|
- $(
|
|
- #[inline]
|
|
- pub fn $all(x: $ty) -> bool {
|
|
- unsafe {
|
|
- let t: $as_u = bitcast(x);
|
|
- let lo = $shuffle_fn(t, t, $lo_idxs);
|
|
- let hi = $shuffle_fn(t, t, $hi_idxs);
|
|
- let x = super::$min(lo, hi);
|
|
- let y = super::$min(x, mem::uninitialized());
|
|
- let y32: u32x2 = bitcast(y);
|
|
- y32.0 == 0xFFFFFFFF
|
|
- }
|
|
- }
|
|
- #[inline]
|
|
- pub fn $any(x: $ty) -> bool {
|
|
- unsafe {
|
|
- let t: $as_u = bitcast(x);
|
|
- let lo = $shuffle_fn(t, t, $lo_idxs);
|
|
- let hi = $shuffle_fn(t, t, $hi_idxs);
|
|
- let x = super::$max(lo, hi);
|
|
- let y = super::$max(x, mem::uninitialized());
|
|
- let y32: u32x2 = bitcast(y);
|
|
- y32.0 != 0
|
|
- }
|
|
- }
|
|
- )*
|
|
- }
|
|
- }
|
|
-
|
|
- bools! {
|
|
- bool32fx4, u32x4, simd_shuffle2, [0, 1], [2, 3], bool32fx4_all(arm_vpmin_u32), bool32fx4_any(arm_vpmax_u32);
|
|
- bool8ix16, u8x16, simd_shuffle8, [0, 1, 2, 3, 4, 5, 6, 7], [8, 9, 10, 11, 12, 13, 14, 15], bool8ix16_all(arm_vpmin_u8), bool8ix16_any(arm_vpmax_u8);
|
|
- bool16ix8, u16x8, simd_shuffle4, [0, 1, 2, 3], [4, 5, 6, 7], bool16ix8_all(arm_vpmin_u16), bool16ix8_any(arm_vpmax_u16);
|
|
- bool32ix4, u32x4, simd_shuffle2, [0, 1], [2, 3], bool32ix4_all(arm_vpmin_u32), bool32ix4_any(arm_vpmax_u32);
|
|
- }
|
|
-}
|
|
diff --git a/third_party/rust/simd/src/common.rs b/third_party/rust/simd/src/common.rs
|
|
deleted file mode 100644
|
|
index 1052ae36959d..000000000000
|
|
--- a/third_party/rust/simd/src/common.rs
|
|
+++ /dev/null
|
|
@@ -1,520 +0,0 @@
|
|
-use super::*;
|
|
-#[allow(unused_imports)]
|
|
-use super::{
|
|
- simd_eq, simd_ne, simd_lt, simd_le, simd_gt, simd_ge,
|
|
- simd_shuffle2, simd_shuffle4, simd_shuffle8, simd_shuffle16,
|
|
- simd_insert, simd_extract,
|
|
- simd_cast,
|
|
- simd_add, simd_sub, simd_mul, simd_div, simd_shl, simd_shr, simd_and, simd_or, simd_xor,
|
|
-
|
|
- Unalign, bitcast,
|
|
-};
|
|
-use core::{mem,ops};
|
|
-
|
|
-#[cfg(any(target_arch = "x86",
|
|
- target_arch = "x86_64"))]
|
|
-use x86::sse2::common;
|
|
-#[cfg(any(target_arch = "arm"))]
|
|
-use arm::neon::common;
|
|
-#[cfg(any(target_arch = "aarch64"))]
|
|
-use aarch64::neon::common;
|
|
-
|
|
-macro_rules! basic_impls {
|
|
- ($(
|
|
- $name: ident:
|
|
- $elem: ident, $bool: ident, $shuffle: ident, $length: expr, $($first: ident),* | $($last: ident),*;
|
|
- )*) => {
|
|
- $(impl $name {
|
|
- /// Create a new instance.
|
|
- #[inline]
|
|
- pub const fn new($($first: $elem),*, $($last: $elem),*) -> $name {
|
|
- $name($($first),*, $($last),*)
|
|
- }
|
|
-
|
|
- /// Create a new instance where every lane has value `x`.
|
|
- #[inline]
|
|
- pub const fn splat(x: $elem) -> $name {
|
|
- $name($({ #[allow(dead_code)] struct $first; x }),*,
|
|
- $({ #[allow(dead_code)] struct $last; x }),*)
|
|
- }
|
|
-
|
|
- /// Compare for equality.
|
|
- #[inline]
|
|
- pub fn eq(self, other: Self) -> $bool {
|
|
- unsafe {simd_eq(self, other)}
|
|
- }
|
|
- /// Compare for equality.
|
|
- #[inline]
|
|
- pub fn ne(self, other: Self) -> $bool {
|
|
- unsafe {simd_ne(self, other)}
|
|
- }
|
|
- /// Compare for equality.
|
|
- #[inline]
|
|
- pub fn lt(self, other: Self) -> $bool {
|
|
- unsafe {simd_lt(self, other)}
|
|
- }
|
|
- /// Compare for equality.
|
|
- #[inline]
|
|
- pub fn le(self, other: Self) -> $bool {
|
|
- unsafe {simd_le(self, other)}
|
|
- }
|
|
- /// Compare for equality.
|
|
- #[inline]
|
|
- pub fn gt(self, other: Self) -> $bool {
|
|
- unsafe {simd_gt(self, other)}
|
|
- }
|
|
- /// Compare for equality.
|
|
- #[inline]
|
|
- pub fn ge(self, other: Self) -> $bool {
|
|
- unsafe {simd_ge(self, other)}
|
|
- }
|
|
-
|
|
- /// Extract the value of the `idx`th lane of `self`.
|
|
- ///
|
|
- /// # Panics
|
|
- ///
|
|
- /// `extract` will panic if `idx` is out of bounds.
|
|
- #[inline]
|
|
- pub fn extract(self, idx: u32) -> $elem {
|
|
- assert!(idx < $length);
|
|
- unsafe {simd_extract(self, idx)}
|
|
- }
|
|
- /// Return a new vector where the `idx`th lane is replaced
|
|
- /// by `elem`.
|
|
- ///
|
|
- /// # Panics
|
|
- ///
|
|
- /// `replace` will panic if `idx` is out of bounds.
|
|
- #[inline]
|
|
- pub fn replace(self, idx: u32, elem: $elem) -> Self {
|
|
- assert!(idx < $length);
|
|
- unsafe {simd_insert(self, idx, elem)}
|
|
- }
|
|
-
|
|
- /// Load a new value from the `idx`th position of `array`.
|
|
- ///
|
|
- /// This is equivalent to the following, but is possibly
|
|
- /// more efficient:
|
|
- ///
|
|
- /// ```rust,ignore
|
|
- /// Self::new(array[idx], array[idx + 1], ...)
|
|
- /// ```
|
|
- ///
|
|
- /// # Panics
|
|
- ///
|
|
- /// `load` will panic if `idx` is out of bounds in
|
|
- /// `array`, or if `array[idx..]` is too short.
|
|
- #[inline]
|
|
- pub fn load(array: &[$elem], idx: usize) -> Self {
|
|
- let data = &array[idx..idx + $length];
|
|
- let loaded = unsafe {
|
|
- *(data.as_ptr() as *const Unalign<Self>)
|
|
- };
|
|
- loaded.0
|
|
- }
|
|
-
|
|
- /// Store the elements of `self` to `array`, starting at
|
|
- /// the `idx`th position.
|
|
- ///
|
|
- /// This is equivalent to the following, but is possibly
|
|
- /// more efficient:
|
|
- ///
|
|
- /// ```rust,ignore
|
|
- /// array[i] = self.extract(0);
|
|
- /// array[i + 1] = self.extract(1);
|
|
- /// // ...
|
|
- /// ```
|
|
- ///
|
|
- /// # Panics
|
|
- ///
|
|
- /// `store` will panic if `idx` is out of bounds in
|
|
- /// `array`, or if `array[idx...]` is too short.
|
|
- #[inline]
|
|
- pub fn store(self, array: &mut [$elem], idx: usize) {
|
|
- let place = &mut array[idx..idx + $length];
|
|
- unsafe {
|
|
- *(place.as_mut_ptr() as *mut Unalign<Self>) = Unalign(self)
|
|
- }
|
|
- }
|
|
- })*
|
|
- }
|
|
-}
|
|
-
|
|
-basic_impls! {
|
|
- u32x4: u32, bool32ix4, simd_shuffle4, 4, x0, x1 | x2, x3;
|
|
- i32x4: i32, bool32ix4, simd_shuffle4, 4, x0, x1 | x2, x3;
|
|
- f32x4: f32, bool32fx4, simd_shuffle4, 4, x0, x1 | x2, x3;
|
|
-
|
|
- u16x8: u16, bool16ix8, simd_shuffle8, 8, x0, x1, x2, x3 | x4, x5, x6, x7;
|
|
- i16x8: i16, bool16ix8, simd_shuffle8, 8, x0, x1, x2, x3 | x4, x5, x6, x7;
|
|
-
|
|
- u8x16: u8, bool8ix16, simd_shuffle16, 16, x0, x1, x2, x3, x4, x5, x6, x7 | x8, x9, x10, x11, x12, x13, x14, x15;
|
|
- i8x16: i8, bool8ix16, simd_shuffle16, 16, x0, x1, x2, x3, x4, x5, x6, x7 | x8, x9, x10, x11, x12, x13, x14, x15;
|
|
-}
|
|
-
|
|
-macro_rules! bool_impls {
|
|
- ($(
|
|
- $name: ident:
|
|
- $elem: ident, $repr: ident, $repr_elem: ident, $length: expr, $all: ident, $any: ident,
|
|
- $($first: ident),* | $($last: ident),*
|
|
- [$(#[$cvt_meta: meta] $cvt: ident -> $cvt_to: ident),*];
|
|
- )*) => {
|
|
- $(impl $name {
|
|
- /// Convert to integer representation.
|
|
- #[inline]
|
|
- pub fn to_repr(self) -> $repr {
|
|
- unsafe {mem::transmute(self)}
|
|
- }
|
|
- /// Convert from integer representation.
|
|
- #[inline]
|
|
- #[inline]
|
|
- pub fn from_repr(x: $repr) -> Self {
|
|
- unsafe {mem::transmute(x)}
|
|
- }
|
|
-
|
|
- /// Create a new instance.
|
|
- #[inline]
|
|
- pub fn new($($first: bool),*, $($last: bool),*) -> $name {
|
|
- unsafe {
|
|
- // negate everything together
|
|
- simd_sub($name::splat(false),
|
|
- $name($( ($first as $repr_elem) ),*,
|
|
- $( ($last as $repr_elem) ),*))
|
|
- }
|
|
- }
|
|
-
|
|
- /// Create a new instance where every lane has value `x`.
|
|
- #[allow(unused_variables)]
|
|
- #[inline]
|
|
- pub fn splat(x: bool) -> $name {
|
|
- let x = if x {!(0 as $repr_elem)} else {0};
|
|
- $name($({ let $first = (); x}),*,
|
|
- $({ let $last = (); x}),*)
|
|
- }
|
|
-
|
|
- /// Extract the value of the `idx`th lane of `self`.
|
|
- ///
|
|
- /// # Panics
|
|
- ///
|
|
- /// `extract` will panic if `idx` is out of bounds.
|
|
- #[inline]
|
|
- pub fn extract(self, idx: u32) -> bool {
|
|
- assert!(idx < $length);
|
|
- unsafe {simd_extract(self.to_repr(), idx) != 0}
|
|
- }
|
|
- /// Return a new vector where the `idx`th lane is replaced
|
|
- /// by `elem`.
|
|
- ///
|
|
- /// # Panics
|
|
- ///
|
|
- /// `replace` will panic if `idx` is out of bounds.
|
|
- #[inline]
|
|
- pub fn replace(self, idx: u32, elem: bool) -> Self {
|
|
- assert!(idx < $length);
|
|
- let x = if elem {!(0 as $repr_elem)} else {0};
|
|
- unsafe {Self::from_repr(simd_insert(self.to_repr(), idx, x))}
|
|
- }
|
|
- /// Select between elements of `then` and `else_`, based on
|
|
- /// the corresponding element of `self`.
|
|
- ///
|
|
- /// This is equivalent to the following, but is possibly
|
|
- /// more efficient:
|
|
- ///
|
|
- /// ```rust,ignore
|
|
- /// T::new(if self.extract(0) { then.extract(0) } else { else_.extract(0) },
|
|
- /// if self.extract(1) { then.extract(1) } else { else_.extract(1) },
|
|
- /// ...)
|
|
- /// ```
|
|
- #[inline]
|
|
- pub fn select<T: Simd<Bool = $name>>(self, then: T, else_: T) -> T {
|
|
- let then: $repr = bitcast(then);
|
|
- let else_: $repr = bitcast(else_);
|
|
- bitcast((then & self.to_repr()) | (else_ & (!self).to_repr()))
|
|
- }
|
|
-
|
|
- /// Check if every element of `self` is true.
|
|
- ///
|
|
- /// This is equivalent to the following, but is possibly
|
|
- /// more efficient:
|
|
- ///
|
|
- /// ```rust,ignore
|
|
- /// self.extract(0) && self.extract(1) && ...
|
|
- /// ```
|
|
- #[inline]
|
|
- pub fn all(self) -> bool {
|
|
- common::$all(self)
|
|
- }
|
|
- /// Check if any element of `self` is true.
|
|
- ///
|
|
- /// This is equivalent to the following, but is possibly
|
|
- /// more efficient:
|
|
- ///
|
|
- /// ```rust,ignore
|
|
- /// self.extract(0) || self.extract(1) || ...
|
|
- /// ```
|
|
- #[inline]
|
|
- pub fn any(self) -> bool {
|
|
- common::$any(self)
|
|
- }
|
|
-
|
|
- $(
|
|
- #[$cvt_meta]
|
|
- #[inline]
|
|
- pub fn $cvt(self) -> $cvt_to {
|
|
- bitcast(self)
|
|
- }
|
|
- )*
|
|
- }
|
|
- impl ops::Not for $name {
|
|
- type Output = Self;
|
|
-
|
|
- #[inline]
|
|
- fn not(self) -> Self {
|
|
- Self::from_repr($repr::splat(!(0 as $repr_elem)) ^ self.to_repr())
|
|
- }
|
|
- }
|
|
- )*
|
|
- }
|
|
-}
|
|
-
|
|
-bool_impls! {
|
|
- bool32ix4: bool32i, i32x4, i32, 4, bool32ix4_all, bool32ix4_any, x0, x1 | x2, x3
|
|
- [/// Convert `self` to a boolean vector for interacting with floating point vectors.
|
|
- to_f -> bool32fx4];
|
|
- bool32fx4: bool32f, i32x4, i32, 4, bool32fx4_all, bool32fx4_any, x0, x1 | x2, x3
|
|
- [/// Convert `self` to a boolean vector for interacting with integer vectors.
|
|
- to_i -> bool32ix4];
|
|
-
|
|
- bool16ix8: bool16i, i16x8, i16, 8, bool16ix8_all, bool16ix8_any, x0, x1, x2, x3 | x4, x5, x6, x7 [];
|
|
-
|
|
- bool8ix16: bool8i, i8x16, i8, 16, bool8ix16_all, bool8ix16_any, x0, x1, x2, x3, x4, x5, x6, x7 | x8, x9, x10, x11, x12, x13, x14, x15 [];
|
|
-}
|
|
-
|
|
-impl u32x4 {
|
|
- /// Convert each lane to a signed integer.
|
|
- #[inline]
|
|
- pub fn to_i32(self) -> i32x4 {
|
|
- unsafe {simd_cast(self)}
|
|
- }
|
|
- /// Convert each lane to a 32-bit float.
|
|
- #[inline]
|
|
- pub fn to_f32(self) -> f32x4 {
|
|
- unsafe {simd_cast(self)}
|
|
- }
|
|
-}
|
|
-impl i32x4 {
|
|
- /// Convert each lane to an unsigned integer.
|
|
- #[inline]
|
|
- pub fn to_u32(self) -> u32x4 {
|
|
- unsafe {simd_cast(self)}
|
|
- }
|
|
- /// Convert each lane to a 32-bit float.
|
|
- #[inline]
|
|
- pub fn to_f32(self) -> f32x4 {
|
|
- unsafe {simd_cast(self)}
|
|
- }
|
|
-}
|
|
-impl f32x4 {
|
|
- /// Compute the square root of each lane.
|
|
- #[inline]
|
|
- pub fn sqrt(self) -> Self {
|
|
- common::f32x4_sqrt(self)
|
|
- }
|
|
- /// Compute an approximation to the reciprocal of the square root
|
|
- /// of `self`, that is, `f32::splat(1.0) / self.sqrt()`.
|
|
- ///
|
|
- /// The accuracy of this approximation is platform dependent.
|
|
- #[inline]
|
|
- pub fn approx_rsqrt(self) -> Self {
|
|
- common::f32x4_approx_rsqrt(self)
|
|
- }
|
|
- /// Compute an approximation to the reciprocal of `self`, that is,
|
|
- /// `f32::splat(1.0) / self`.
|
|
- ///
|
|
- /// The accuracy of this approximation is platform dependent.
|
|
- #[inline]
|
|
- pub fn approx_reciprocal(self) -> Self {
|
|
- common::f32x4_approx_reciprocal(self)
|
|
- }
|
|
- /// Compute the lane-wise maximum of `self` and `other`.
|
|
- ///
|
|
- /// This is equivalent to the following, but is possibly more
|
|
- /// efficient:
|
|
- ///
|
|
- /// ```rust,ignore
|
|
- /// f32x4::new(self.extract(0).max(other.extract(0)),
|
|
- /// self.extract(1).max(other.extract(1)),
|
|
- /// ...)
|
|
- /// ```
|
|
- #[inline]
|
|
- pub fn max(self, other: Self) -> Self {
|
|
- common::f32x4_max(self, other)
|
|
- }
|
|
- /// Compute the lane-wise minimum of `self` and `other`.
|
|
- ///
|
|
- /// This is equivalent to the following, but is possibly more
|
|
- /// efficient:
|
|
- ///
|
|
- /// ```rust,ignore
|
|
- /// f32x4::new(self.extract(0).min(other.extract(0)),
|
|
- /// self.extract(1).min(other.extract(1)),
|
|
- /// ...)
|
|
- /// ```
|
|
- #[inline]
|
|
- pub fn min(self, other: Self) -> Self {
|
|
- common::f32x4_min(self, other)
|
|
- }
|
|
- /// Convert each lane to a signed integer.
|
|
- #[inline]
|
|
- pub fn to_i32(self) -> i32x4 {
|
|
- unsafe {simd_cast(self)}
|
|
- }
|
|
- /// Convert each lane to an unsigned integer.
|
|
- #[inline]
|
|
- pub fn to_u32(self) -> u32x4 {
|
|
- unsafe {simd_cast(self)}
|
|
- }
|
|
-}
|
|
-
|
|
-impl i16x8 {
|
|
- /// Convert each lane to an unsigned integer.
|
|
- #[inline]
|
|
- pub fn to_u16(self) -> u16x8 {
|
|
- unsafe {simd_cast(self)}
|
|
- }
|
|
-}
|
|
-impl u16x8 {
|
|
- /// Convert each lane to a signed integer.
|
|
- #[inline]
|
|
- pub fn to_i16(self) -> i16x8 {
|
|
- unsafe {simd_cast(self)}
|
|
- }
|
|
-}
|
|
-
|
|
-impl i8x16 {
|
|
- /// Convert each lane to an unsigned integer.
|
|
- #[inline]
|
|
- pub fn to_u8(self) -> u8x16 {
|
|
- unsafe {simd_cast(self)}
|
|
- }
|
|
-}
|
|
-impl u8x16 {
|
|
- /// Convert each lane to a signed integer.
|
|
- #[inline]
|
|
- pub fn to_i8(self) -> i8x16 {
|
|
- unsafe {simd_cast(self)}
|
|
- }
|
|
-}
|
|
-
|
|
-
|
|
-macro_rules! neg_impls {
|
|
- ($zero: expr, $($ty: ident,)*) => {
|
|
- $(impl ops::Neg for $ty {
|
|
- type Output = Self;
|
|
- fn neg(self) -> Self {
|
|
- $ty::splat($zero) - self
|
|
- }
|
|
- })*
|
|
- }
|
|
-}
|
|
-neg_impls!{
|
|
- 0,
|
|
- i32x4,
|
|
- i16x8,
|
|
- i8x16,
|
|
-}
|
|
-neg_impls! {
|
|
- 0.0,
|
|
- f32x4,
|
|
-}
|
|
-macro_rules! not_impls {
|
|
- ($($ty: ident,)*) => {
|
|
- $(impl ops::Not for $ty {
|
|
- type Output = Self;
|
|
- fn not(self) -> Self {
|
|
- $ty::splat(!0) ^ self
|
|
- }
|
|
- })*
|
|
- }
|
|
-}
|
|
-not_impls! {
|
|
- i32x4,
|
|
- i16x8,
|
|
- i8x16,
|
|
- u32x4,
|
|
- u16x8,
|
|
- u8x16,
|
|
-}
|
|
-
|
|
-macro_rules! operators {
|
|
- ($($trayt: ident ($func: ident, $method: ident): $($ty: ty),*;)*) => {
|
|
- $(
|
|
- $(impl ops::$trayt for $ty {
|
|
- type Output = Self;
|
|
- #[inline]
|
|
- fn $method(self, x: Self) -> Self {
|
|
- unsafe {$func(self, x)}
|
|
- }
|
|
- })*
|
|
- )*
|
|
- }
|
|
-}
|
|
-operators! {
|
|
- Add (simd_add, add):
|
|
- i8x16, u8x16, i16x8, u16x8, i32x4, u32x4,
|
|
- f32x4;
|
|
- Sub (simd_sub, sub):
|
|
- i8x16, u8x16, i16x8, u16x8, i32x4, u32x4,
|
|
- f32x4;
|
|
- Mul (simd_mul, mul):
|
|
- i8x16, u8x16, i16x8, u16x8, i32x4, u32x4,
|
|
- f32x4;
|
|
- Div (simd_div, div): f32x4;
|
|
-
|
|
- BitAnd (simd_and, bitand):
|
|
- i8x16, u8x16, i16x8, u16x8, i32x4, u32x4,
|
|
- bool8ix16, bool16ix8, bool32ix4,
|
|
- bool32fx4;
|
|
- BitOr (simd_or, bitor):
|
|
- i8x16, u8x16, i16x8, u16x8, i32x4, u32x4,
|
|
- bool8ix16, bool16ix8, bool32ix4,
|
|
- bool32fx4;
|
|
- BitXor (simd_xor, bitxor):
|
|
- i8x16, u8x16, i16x8, u16x8, i32x4, u32x4,
|
|
- bool8ix16, bool16ix8, bool32ix4,
|
|
- bool32fx4;
|
|
-}
|
|
-
|
|
-macro_rules! shift_one {
|
|
- ($ty: ident, $($by: ident),*) => {
|
|
- $(
|
|
- impl ops::Shl<$by> for $ty {
|
|
- type Output = Self;
|
|
- #[inline]
|
|
- fn shl(self, other: $by) -> Self {
|
|
- unsafe { simd_shl(self, $ty::splat(other as <$ty as Simd>::Elem)) }
|
|
- }
|
|
- }
|
|
- impl ops::Shr<$by> for $ty {
|
|
- type Output = Self;
|
|
- #[inline]
|
|
- fn shr(self, other: $by) -> Self {
|
|
- unsafe {simd_shr(self, $ty::splat(other as <$ty as Simd>::Elem))}
|
|
- }
|
|
- }
|
|
- )*
|
|
- }
|
|
-}
|
|
-
|
|
-macro_rules! shift {
|
|
- ($($ty: ident),*) => {
|
|
- $(shift_one! {
|
|
- $ty,
|
|
- u8, u16, u32, u64, usize,
|
|
- i8, i16, i32, i64, isize
|
|
- })*
|
|
- }
|
|
-}
|
|
-shift! {
|
|
- i8x16, u8x16, i16x8, u16x8, i32x4, u32x4
|
|
-}
|
|
diff --git a/third_party/rust/simd/src/lib.rs b/third_party/rust/simd/src/lib.rs
|
|
deleted file mode 100644
|
|
index e8fb1b16f53b..000000000000
|
|
--- a/third_party/rust/simd/src/lib.rs
|
|
+++ /dev/null
|
|
@@ -1,804 +0,0 @@
|
|
-//! `simd` offers a basic interface to the SIMD functionality of CPUs.
|
|
-#![no_std]
|
|
-
|
|
-#![feature(cfg_target_feature, repr_simd, platform_intrinsics, const_fn)]
|
|
-#![allow(non_camel_case_types)]
|
|
-
|
|
-#[cfg(feature = "with-serde")]
|
|
-extern crate serde;
|
|
-#[cfg(feature = "with-serde")]
|
|
-#[macro_use]
|
|
-extern crate serde_derive;
|
|
-
|
|
-use core::mem;
|
|
-
|
|
-/// Boolean type for 8-bit integers.
|
|
-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))]
|
|
-#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)]
|
|
-pub struct bool8i(i8);
|
|
-/// Boolean type for 16-bit integers.
|
|
-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))]
|
|
-#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)]
|
|
-pub struct bool16i(i16);
|
|
-/// Boolean type for 32-bit integers.
|
|
-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))]
|
|
-#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)]
|
|
-pub struct bool32i(i32);
|
|
-/// Boolean type for 32-bit floats.
|
|
-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))]
|
|
-#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)]
|
|
-pub struct bool32f(i32);
|
|
-
|
|
-macro_rules! bool {
|
|
- ($($name: ident, $inner: ty;)*) => {
|
|
- $(
|
|
- impl From<bool> for $name {
|
|
- #[inline]
|
|
- fn from(b: bool) -> $name {
|
|
- $name(-(b as $inner))
|
|
- }
|
|
- }
|
|
- impl From<$name> for bool {
|
|
- #[inline]
|
|
- fn from(b: $name) -> bool {
|
|
- b.0 != 0
|
|
- }
|
|
- }
|
|
- )*
|
|
- }
|
|
-}
|
|
-bool! {
|
|
- bool8i, i8;
|
|
- bool16i, i16;
|
|
- bool32i, i32;
|
|
- bool32f, i32;
|
|
-}
|
|
-
|
|
-/// Types that are SIMD vectors.
|
|
-pub unsafe trait Simd {
|
|
- /// The corresponding boolean vector type.
|
|
- type Bool: Simd;
|
|
- /// The element that this vector stores.
|
|
- type Elem;
|
|
-}
|
|
-
|
|
-/// A SIMD vector of 4 `u32`s.
|
|
-#[repr(simd)]
|
|
-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))]
|
|
-#[derive(Debug, Copy)]
|
|
-pub struct u32x4(u32, u32, u32, u32);
|
|
-/// A SIMD vector of 4 `i32`s.
|
|
-#[repr(simd)]
|
|
-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))]
|
|
-#[derive(Debug, Copy)]
|
|
-pub struct i32x4(i32, i32, i32, i32);
|
|
-/// A SIMD vector of 4 `f32`s.
|
|
-#[repr(simd)]
|
|
-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))]
|
|
-#[derive(Debug, Copy)]
|
|
-pub struct f32x4(f32, f32, f32, f32);
|
|
-/// A SIMD boolean vector for length-4 vectors of 32-bit integers.
|
|
-#[repr(simd)]
|
|
-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))]
|
|
-#[derive(Debug, Copy)]
|
|
-pub struct bool32ix4(i32, i32, i32, i32);
|
|
-/// A SIMD boolean vector for length-4 vectors of 32-bit floats.
|
|
-#[repr(simd)]
|
|
-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))]
|
|
-#[derive(Debug, Copy)]
|
|
-pub struct bool32fx4(i32, i32, i32, i32);
|
|
-
|
|
-#[allow(dead_code)]
|
|
-#[repr(simd)]
|
|
-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))]
|
|
-#[derive(Debug, Copy)]
|
|
-struct u32x2(u32, u32);
|
|
-#[allow(dead_code)]
|
|
-#[repr(simd)]
|
|
-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))]
|
|
-#[derive(Debug, Copy)]
|
|
-struct i32x2(i32, i32);
|
|
-#[allow(dead_code)]
|
|
-#[repr(simd)]
|
|
-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))]
|
|
-#[derive(Debug, Copy)]
|
|
-struct f32x2(f32, f32);
|
|
-#[allow(dead_code)]
|
|
-#[repr(simd)]
|
|
-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))]
|
|
-#[derive(Debug, Copy)]
|
|
-struct bool32ix2(i32, i32);
|
|
-#[allow(dead_code)]
|
|
-#[repr(simd)]
|
|
-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))]
|
|
-#[derive(Debug, Copy)]
|
|
-struct bool32fx2(i32, i32);
|
|
-
|
|
-/// A SIMD vector of 8 `u16`s.
|
|
-#[repr(simd)]
|
|
-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))]
|
|
-#[derive(Debug, Copy)]
|
|
-pub struct u16x8(u16, u16, u16, u16,
|
|
- u16, u16, u16, u16);
|
|
-/// A SIMD vector of 8 `i16`s.
|
|
-#[repr(simd)]
|
|
-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))]
|
|
-#[derive(Debug, Copy)]
|
|
-pub struct i16x8(i16, i16, i16, i16,
|
|
- i16, i16, i16, i16);
|
|
-/// A SIMD boolean vector for length-8 vectors of 16-bit integers.
|
|
-#[repr(simd)]
|
|
-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))]
|
|
-#[derive(Debug, Copy)]
|
|
-pub struct bool16ix8(i16, i16, i16, i16,
|
|
- i16, i16, i16, i16);
|
|
-
|
|
-/// A SIMD vector of 16 `u8`s.
|
|
-#[repr(simd)]
|
|
-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))]
|
|
-#[derive(Debug, Copy)]
|
|
-pub struct u8x16(u8, u8, u8, u8, u8, u8, u8, u8,
|
|
- u8, u8, u8, u8, u8, u8, u8, u8);
|
|
-/// A SIMD vector of 16 `i8`s.
|
|
-#[repr(simd)]
|
|
-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))]
|
|
-#[derive(Debug, Copy)]
|
|
-pub struct i8x16(i8, i8, i8, i8, i8, i8, i8, i8,
|
|
- i8, i8, i8, i8, i8, i8, i8, i8);
|
|
-/// A SIMD boolean vector for length-16 vectors of 8-bit integers.
|
|
-#[repr(simd)]
|
|
-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))]
|
|
-#[derive(Debug, Copy)]
|
|
-pub struct bool8ix16(i8, i8, i8, i8, i8, i8, i8, i8,
|
|
- i8, i8, i8, i8, i8, i8, i8, i8);
|
|
-
|
|
-
|
|
-macro_rules! simd {
|
|
- ($($bool: ty: $($ty: ty = $elem: ty),*;)*) => {
|
|
- $($(unsafe impl Simd for $ty {
|
|
- type Bool = $bool;
|
|
- type Elem = $elem;
|
|
- }
|
|
- impl Clone for $ty { #[inline] fn clone(&self) -> Self { *self } }
|
|
- )*)*}
|
|
-}
|
|
-simd! {
|
|
- bool8ix16: i8x16 = i8, u8x16 = u8, bool8ix16 = bool8i;
|
|
- bool16ix8: i16x8 = i16, u16x8 = u16, bool16ix8 = bool16i;
|
|
- bool32ix4: i32x4 = i32, u32x4 = u32, bool32ix4 = bool32i;
|
|
- bool32fx4: f32x4 = f32, bool32fx4 = bool32f;
|
|
-
|
|
- bool32ix2: i32x2 = i32, u32x2 = u32, bool32ix2 = bool32i;
|
|
- bool32fx2: f32x2 = f32, bool32fx2 = bool32f;
|
|
-}
|
|
-
|
|
-#[allow(dead_code)]
|
|
-#[inline]
|
|
-fn bitcast<T: Simd, U: Simd>(x: T) -> U {
|
|
- assert_eq!(mem::size_of::<T>(),
|
|
- mem::size_of::<U>());
|
|
- unsafe {mem::transmute_copy(&x)}
|
|
-}
|
|
-
|
|
-#[allow(dead_code)]
|
|
-extern "platform-intrinsic" {
|
|
- fn simd_eq<T: Simd<Bool = U>, U>(x: T, y: T) -> U;
|
|
- fn simd_ne<T: Simd<Bool = U>, U>(x: T, y: T) -> U;
|
|
- fn simd_lt<T: Simd<Bool = U>, U>(x: T, y: T) -> U;
|
|
- fn simd_le<T: Simd<Bool = U>, U>(x: T, y: T) -> U;
|
|
- fn simd_gt<T: Simd<Bool = U>, U>(x: T, y: T) -> U;
|
|
- fn simd_ge<T: Simd<Bool = U>, U>(x: T, y: T) -> U;
|
|
-
|
|
- fn simd_shuffle2<T: Simd, U: Simd<Elem = T::Elem>>(x: T, y: T, idx: [u32; 2]) -> U;
|
|
- fn simd_shuffle4<T: Simd, U: Simd<Elem = T::Elem>>(x: T, y: T, idx: [u32; 4]) -> U;
|
|
- fn simd_shuffle8<T: Simd, U: Simd<Elem = T::Elem>>(x: T, y: T, idx: [u32; 8]) -> U;
|
|
- fn simd_shuffle16<T: Simd, U: Simd<Elem = T::Elem>>(x: T, y: T, idx: [u32; 16]) -> U;
|
|
-
|
|
- fn simd_insert<T: Simd<Elem = U>, U>(x: T, idx: u32, val: U) -> T;
|
|
- fn simd_extract<T: Simd<Elem = U>, U>(x: T, idx: u32) -> U;
|
|
-
|
|
- fn simd_cast<T: Simd, U: Simd>(x: T) -> U;
|
|
-
|
|
- fn simd_add<T: Simd>(x: T, y: T) -> T;
|
|
- fn simd_sub<T: Simd>(x: T, y: T) -> T;
|
|
- fn simd_mul<T: Simd>(x: T, y: T) -> T;
|
|
- fn simd_div<T: Simd>(x: T, y: T) -> T;
|
|
- fn simd_shl<T: Simd>(x: T, y: T) -> T;
|
|
- fn simd_shr<T: Simd>(x: T, y: T) -> T;
|
|
- fn simd_and<T: Simd>(x: T, y: T) -> T;
|
|
- fn simd_or<T: Simd>(x: T, y: T) -> T;
|
|
- fn simd_xor<T: Simd>(x: T, y: T) -> T;
|
|
-}
|
|
-#[repr(packed)]
|
|
-#[derive(Copy)]
|
|
-struct Unalign<T>(T);
|
|
-
|
|
-impl<T: Clone> Clone for Unalign<T> {
|
|
- fn clone(&self) -> Unalign<T> {
|
|
- Unalign(unsafe { self.0.clone() })
|
|
- }
|
|
-}
|
|
-
|
|
-#[macro_use]
|
|
-mod common;
|
|
-mod sixty_four;
|
|
-mod v256;
|
|
-
|
|
-#[cfg(any(feature = "doc",
|
|
- target_arch = "x86",
|
|
- target_arch = "x86_64"))]
|
|
-pub mod x86;
|
|
-#[cfg(any(feature = "doc", target_arch = "arm"))]
|
|
-pub mod arm;
|
|
-#[cfg(any(feature = "doc", target_arch = "aarch64"))]
|
|
-pub mod aarch64;
|
|
-
|
|
-#[cfg(test)]
|
|
-mod tests {
|
|
-
|
|
- use super::u8x16;
|
|
- use super::u16x8;
|
|
- use super::u32x4;
|
|
- use super::f32x4;
|
|
-
|
|
- #[test]
|
|
- fn test_u8x16_none_not_any() {
|
|
- let x1 = u8x16::splat(1);
|
|
- let x2 = u8x16::splat(2);
|
|
- assert!(!(x1.eq(x2)).any());
|
|
- }
|
|
-
|
|
- #[test]
|
|
- fn test_u8x16_none_not_all() {
|
|
- let x1 = u8x16::splat(1);
|
|
- let x2 = u8x16::splat(2);
|
|
- assert!(!(x1.eq(x2)).all());
|
|
- }
|
|
-
|
|
- #[test]
|
|
- fn test_u8x16_all_any() {
|
|
- let x1 = u8x16::splat(1);
|
|
- let x2 = u8x16::splat(1);
|
|
- assert!((x1.eq(x2)).any());
|
|
- }
|
|
-
|
|
- #[test]
|
|
- fn test_u8x16_all_all() {
|
|
- let x1 = u8x16::splat(1);
|
|
- let x2 = u8x16::splat(1);
|
|
- assert!((x1.eq(x2)).all());
|
|
- }
|
|
-
|
|
- #[test]
|
|
- fn test_u8x16_except_last_any() {
|
|
- let x1 = u8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1);
|
|
- let x2 = u8x16::splat(2);
|
|
- assert!((x1.eq(x2)).any());
|
|
- }
|
|
-
|
|
- #[test]
|
|
- fn test_u8x16_except_last_not_all() {
|
|
- let x1 = u8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1);
|
|
- let x2 = u8x16::splat(2);
|
|
- assert!(!(x1.eq(x2)).all());
|
|
- }
|
|
-
|
|
- #[test]
|
|
- fn test_u8x16_except_first_any() {
|
|
- let x1 = u8x16::new(1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
|
|
- let x2 = u8x16::splat(2);
|
|
- assert!((x1.eq(x2)).any());
|
|
- }
|
|
-
|
|
- #[test]
|
|
- fn test_u8x16_except_first_not_all() {
|
|
- let x1 = u8x16::new(1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
|
|
- let x2 = u8x16::splat(2);
|
|
- assert!(!(x1.eq(x2)).all());
|
|
- }
|
|
-
|
|
- #[test]
|
|
- fn test_u8x16_only_last_any() {
|
|
- let x1 = u8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1);
|
|
- let x2 = u8x16::splat(1);
|
|
- assert!((x1.eq(x2)).any());
|
|
- }
|
|
-
|
|
- #[test]
|
|
- fn test_u8x16_only_last_not_all() {
|
|
- let x1 = u8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1);
|
|
- let x2 = u8x16::splat(1);
|
|
- assert!(!(x1.eq(x2)).all());
|
|
- }
|
|
-
|
|
- #[test]
|
|
- fn test_u8x16_only_first_any() {
|
|
- let x1 = u8x16::new(1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
|
|
- let x2 = u8x16::splat(1);
|
|
- assert!((x1.eq(x2)).any());
|
|
- }
|
|
-
|
|
- #[test]
|
|
- fn test_u8x16_only_first_not_all() {
|
|
- let x1 = u8x16::new(1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
|
|
- let x2 = u8x16::splat(1);
|
|
- assert!(!(x1.eq(x2)).all());
|
|
- }
|
|
-
|
|
- #[test]
|
|
- fn test_u8x16_except_thirteenth_any() {
|
|
- let x1 = u8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2);
|
|
- let x2 = u8x16::splat(2);
|
|
- assert!((x1.eq(x2)).any());
|
|
- }
|
|
-
|
|
- #[test]
|
|
- fn test_u8x16_except_thirteenth_not_all() {
|
|
- let x1 = u8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2);
|
|
- let x2 = u8x16::splat(2);
|
|
- assert!(!(x1.eq(x2)).all());
|
|
- }
|
|
-
|
|
- #[test]
|
|
- fn test_u8x16_except_fifth_any() {
|
|
- let x1 = u8x16::new(2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
|
|
- let x2 = u8x16::splat(2);
|
|
- assert!((x1.eq(x2)).any());
|
|
- }
|
|
-
|
|
- #[test]
|
|
- fn test_u8x16_except_fifth_not_all() {
|
|
- let x1 = u8x16::new(2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
|
|
- let x2 = u8x16::splat(2);
|
|
- assert!(!(x1.eq(x2)).all());
|
|
- }
|
|
-
|
|
- #[test]
|
|
- fn test_u8x16_only_thirteenth_any() {
|
|
- let x1 = u8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2);
|
|
- let x2 = u8x16::splat(1);
|
|
- assert!((x1.eq(x2)).any());
|
|
- }
|
|
-
|
|
- #[test]
|
|
- fn test_u8x16_only_thirteenth_not_all() {
|
|
- let x1 = u8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2);
|
|
- let x2 = u8x16::splat(1);
|
|
- assert!(!(x1.eq(x2)).all());
|
|
- }
|
|
-
|
|
- #[test]
|
|
- fn test_u8x16_only_fifth_any() {
|
|
- let x1 = u8x16::new(2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
|
|
- let x2 = u8x16::splat(1);
|
|
- assert!((x1.eq(x2)).any());
|
|
- }
|
|
-
|
|
- #[test]
|
|
- fn test_u8x16_only_fifth_not_all() {
|
|
- let x1 = u8x16::new(2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
|
|
- let x2 = u8x16::splat(1);
|
|
- assert!(!(x1.eq(x2)).all());
|
|
- }
|
|
-
|
|
- #[test]
|
|
- fn test_u16x8_none_not_any() {
|
|
- let x1 = u16x8::splat(1);
|
|
- let x2 = u16x8::splat(2);
|
|
- assert!(!(x1.eq(x2)).any());
|
|
- }
|
|
-
|
|
- #[test]
|
|
- fn test_u16x8_none_not_all() {
|
|
- let x1 = u16x8::splat(1);
|
|
- let x2 = u16x8::splat(2);
|
|
- assert!(!(x1.eq(x2)).all());
|
|
- }
|
|
-
|
|
- #[test]
|
|
- fn test_u16x8_all_any() {
|
|
- let x1 = u16x8::splat(1);
|
|
- let x2 = u16x8::splat(1);
|
|
- assert!((x1.eq(x2)).any());
|
|
- }
|
|
-
|
|
- #[test]
|
|
- fn test_u16x8_all_all() {
|
|
- let x1 = u16x8::splat(1);
|
|
- let x2 = u16x8::splat(1);
|
|
- assert!((x1.eq(x2)).all());
|
|
- }
|
|
-
|
|
- #[test]
|
|
- fn test_u16x8_except_last_any() {
|
|
- let x1 = u16x8::new(2, 2, 2, 2, 2, 2, 2, 1);
|
|
- let x2 = u16x8::splat(2);
|
|
- assert!((x1.eq(x2)).any());
|
|
- }
|
|
-
|
|
- #[test]
|
|
- fn test_u16x8_except_last_not_all() {
|
|
- let x1 = u16x8::new(2, 2, 2, 2, 2, 2, 2, 1);
|
|
- let x2 = u16x8::splat(2);
|
|
- assert!(!(x1.eq(x2)).all());
|
|
- }
|
|
-
|
|
- #[test]
|
|
- fn test_u16x8_except_first_any() {
|
|
- let x1 = u16x8::new(1, 2, 2, 2, 2, 2, 2, 2);
|
|
- let x2 = u16x8::splat(2);
|
|
- assert!((x1.eq(x2)).any());
|
|
- }
|
|
-
|
|
- #[test]
|
|
- fn test_u16x8_except_first_not_all() {
|
|
- let x1 = u16x8::new(1, 2, 2, 2, 2, 2, 2, 2);
|
|
- let x2 = u16x8::splat(2);
|
|
- assert!(!(x1.eq(x2)).all());
|
|
- }
|
|
-
|
|
- #[test]
|
|
- fn test_u16x8_only_last_any() {
|
|
- let x1 = u16x8::new(2, 2, 2, 2, 2, 2, 2, 1);
|
|
- let x2 = u16x8::splat(1);
|
|
- assert!((x1.eq(x2)).any());
|
|
- }
|
|
-
|
|
- #[test]
|
|
- fn test_u16x8_only_last_not_all() {
|
|
- let x1 = u16x8::new(2, 2, 2, 2, 2, 2, 2, 1);
|
|
- let x2 = u16x8::splat(1);
|
|
- assert!(!(x1.eq(x2)).all());
|
|
- }
|
|
-
|
|
- #[test]
|
|
- fn test_u16x8_only_first_any() {
|
|
- let x1 = u16x8::new(1, 2, 2, 2, 2, 2, 2, 2);
|
|
- let x2 = u16x8::splat(1);
|
|
- assert!((x1.eq(x2)).any());
|
|
- }
|
|
-
|
|
- #[test]
|
|
- fn test_u16x8_only_first_not_all() {
|
|
- let x1 = u16x8::new(1, 2, 2, 2, 2, 2, 2, 2);
|
|
- let x2 = u16x8::splat(1);
|
|
- assert!(!(x1.eq(x2)).all());
|
|
- }
|
|
-
|
|
- #[test]
|
|
- fn test_u16x8_except_sixth_any() {
|
|
- let x1 = u16x8::new(2, 2, 2, 2, 2, 1, 2, 2);
|
|
- let x2 = u16x8::splat(2);
|
|
- assert!((x1.eq(x2)).any());
|
|
- }
|
|
-
|
|
- #[test]
|
|
- fn test_u16x8_except_sixth_not_all() {
|
|
- let x1 = u16x8::new(2, 2, 2, 2, 2, 1, 2, 2);
|
|
- let x2 = u16x8::splat(2);
|
|
- assert!(!(x1.eq(x2)).all());
|
|
- }
|
|
-
|
|
- #[test]
|
|
- fn test_u16x8_except_third_any() {
|
|
- let x1 = u16x8::new(2, 2, 1, 2, 2, 2, 2, 2);
|
|
- let x2 = u16x8::splat(2);
|
|
- assert!((x1.eq(x2)).any());
|
|
- }
|
|
-
|
|
- #[test]
|
|
- fn test_u16x8_except_third_not_all() {
|
|
- let x1 = u16x8::new(2, 2, 1, 2, 2, 2, 2, 2);
|
|
- let x2 = u16x8::splat(2);
|
|
- assert!(!(x1.eq(x2)).all());
|
|
- }
|
|
-
|
|
- #[test]
|
|
- fn test_u16x8_only_sixth_any() {
|
|
- let x1 = u16x8::new(2, 2, 2, 2, 2, 1, 2, 2);
|
|
- let x2 = u16x8::splat(1);
|
|
- assert!((x1.eq(x2)).any());
|
|
- }
|
|
-
|
|
- #[test]
|
|
- fn test_u16x8_only_sixth_not_all() {
|
|
- let x1 = u16x8::new(2, 2, 2, 2, 2, 1, 2, 2);
|
|
- let x2 = u16x8::splat(1);
|
|
- assert!(!(x1.eq(x2)).all());
|
|
- }
|
|
-
|
|
- #[test]
|
|
- fn test_u16x8_only_third_any() {
|
|
- let x1 = u16x8::new(2, 2, 1, 2, 2, 2, 2, 2);
|
|
- let x2 = u16x8::splat(1);
|
|
- assert!((x1.eq(x2)).any());
|
|
- }
|
|
-
|
|
- #[test]
|
|
- fn test_u16x8_only_third_not_all() {
|
|
- let x1 = u16x8::new(2, 2, 1, 2, 2, 2, 2, 2);
|
|
- let x2 = u16x8::splat(1);
|
|
- assert!(!(x1.eq(x2)).all());
|
|
- }
|
|
-
|
|
- #[test]
|
|
- fn test_u32x4_none_not_any() {
|
|
- let x1 = u32x4::splat(1);
|
|
- let x2 = u32x4::splat(2);
|
|
- assert!(!(x1.eq(x2)).any());
|
|
- }
|
|
-
|
|
- #[test]
|
|
- fn test_u32x4_none_not_all() {
|
|
- let x1 = u32x4::splat(1);
|
|
- let x2 = u32x4::splat(2);
|
|
- assert!(!(x1.eq(x2)).all());
|
|
- }
|
|
-
|
|
- #[test]
|
|
- fn test_u32x4_all_any() {
|
|
- let x1 = u32x4::splat(1);
|
|
- let x2 = u32x4::splat(1);
|
|
- assert!((x1.eq(x2)).any());
|
|
- }
|
|
-
|
|
- #[test]
|
|
- fn test_u32x4_all_all() {
|
|
- let x1 = u32x4::splat(1);
|
|
- let x2 = u32x4::splat(1);
|
|
- assert!((x1.eq(x2)).all());
|
|
- }
|
|
-
|
|
- #[test]
|
|
- fn test_u32x4_except_last_any() {
|
|
- let x1 = u32x4::new(2, 2, 2, 1);
|
|
- let x2 = u32x4::splat(2);
|
|
- assert!((x1.eq(x2)).any());
|
|
- }
|
|
-
|
|
- #[test]
|
|
- fn test_u32x4_except_last_not_all() {
|
|
- let x1 = u32x4::new(2, 2, 2, 1);
|
|
- let x2 = u32x4::splat(2);
|
|
- assert!(!(x1.eq(x2)).all());
|
|
- }
|
|
-
|
|
- #[test]
|
|
- fn test_u32x4_except_first_any() {
|
|
- let x1 = u32x4::new(1, 2, 2, 2);
|
|
- let x2 = u32x4::splat(2);
|
|
- assert!((x1.eq(x2)).any());
|
|
- }
|
|
-
|
|
- #[test]
|
|
- fn test_u32x4_except_first_not_all() {
|
|
- let x1 = u32x4::new(1, 2, 2, 2);
|
|
- let x2 = u32x4::splat(2);
|
|
- assert!(!(x1.eq(x2)).all());
|
|
- }
|
|
-
|
|
- #[test]
|
|
- fn test_u32x4_only_last_any() {
|
|
- let x1 = u32x4::new(2, 2, 2, 1);
|
|
- let x2 = u32x4::splat(1);
|
|
- assert!((x1.eq(x2)).any());
|
|
- }
|
|
-
|
|
- #[test]
|
|
- fn test_u32x4_only_last_not_all() {
|
|
- let x1 = u32x4::new(2, 2, 2, 1);
|
|
- let x2 = u32x4::splat(1);
|
|
- assert!(!(x1.eq(x2)).all());
|
|
- }
|
|
-
|
|
- #[test]
|
|
- fn test_u32x4_only_first_any() {
|
|
- let x1 = u32x4::new(1, 2, 2, 2);
|
|
- let x2 = u32x4::splat(1);
|
|
- assert!((x1.eq(x2)).any());
|
|
- }
|
|
-
|
|
- #[test]
|
|
- fn test_u32x4_only_first_not_all() {
|
|
- let x1 = u32x4::new(1, 2, 2, 2);
|
|
- let x2 = u32x4::splat(1);
|
|
- assert!(!(x1.eq(x2)).all());
|
|
- }
|
|
-
|
|
- #[test]
|
|
- fn test_u32x4_except_second_any() {
|
|
- let x1 = u32x4::new(1, 2, 2, 2);
|
|
- let x2 = u32x4::splat(2);
|
|
- assert!((x1.eq(x2)).any());
|
|
- }
|
|
-
|
|
- #[test]
|
|
- fn test_u32x4_except_second_not_all() {
|
|
- let x1 = u32x4::new(1, 2, 2, 2);
|
|
- let x2 = u32x4::splat(2);
|
|
- assert!(!(x1.eq(x2)).all());
|
|
- }
|
|
-
|
|
- #[test]
|
|
- fn test_u32x4_except_third_any() {
|
|
- let x1 = u32x4::new(2, 2, 1, 2);
|
|
- let x2 = u32x4::splat(2);
|
|
- assert!((x1.eq(x2)).any());
|
|
- }
|
|
-
|
|
- #[test]
|
|
- fn test_u32x4_except_third_not_all() {
|
|
- let x1 = u32x4::new(2, 2, 1, 2);
|
|
- let x2 = u32x4::splat(2);
|
|
- assert!(!(x1.eq(x2)).all());
|
|
- }
|
|
-
|
|
- #[test]
|
|
- fn test_u32x4_only_second_any() {
|
|
- let x1 = u32x4::new(1, 2, 2, 2);
|
|
- let x2 = u32x4::splat(1);
|
|
- assert!((x1.eq(x2)).any());
|
|
- }
|
|
-
|
|
- #[test]
|
|
- fn test_u32x4_only_second_not_all() {
|
|
- let x1 = u32x4::new(1, 2, 2, 2);
|
|
- let x2 = u32x4::splat(1);
|
|
- assert!(!(x1.eq(x2)).all());
|
|
- }
|
|
-
|
|
- #[test]
|
|
- fn test_u32x4_only_third_any() {
|
|
- let x1 = u32x4::new(2, 2, 1, 2);
|
|
- let x2 = u32x4::splat(1);
|
|
- assert!((x1.eq(x2)).any());
|
|
- }
|
|
-
|
|
- #[test]
|
|
- fn test_u32x4_only_third_not_all() {
|
|
- let x1 = u32x4::new(2, 2, 1, 2);
|
|
- let x2 = u32x4::splat(1);
|
|
- assert!(!(x1.eq(x2)).all());
|
|
- }
|
|
-
|
|
- #[test]
|
|
- fn test_f32x4_none_not_any() {
|
|
- let x1 = f32x4::splat(1.0);
|
|
- let x2 = f32x4::splat(2.0);
|
|
- assert!(!(x1.eq(x2)).any());
|
|
- }
|
|
-
|
|
- #[test]
|
|
- fn test_f32x4_none_not_all() {
|
|
- let x1 = f32x4::splat(1.0);
|
|
- let x2 = f32x4::splat(2.0);
|
|
- assert!(!(x1.eq(x2)).all());
|
|
- }
|
|
-
|
|
- #[test]
|
|
- fn test_f32x4_all_any() {
|
|
- let x1 = f32x4::splat(1.0);
|
|
- let x2 = f32x4::splat(1.0);
|
|
- assert!((x1.eq(x2)).any());
|
|
- }
|
|
-
|
|
- #[test]
|
|
- fn test_f32x4_all_all() {
|
|
- let x1 = f32x4::splat(1.0);
|
|
- let x2 = f32x4::splat(1.0);
|
|
- assert!((x1.eq(x2)).all());
|
|
- }
|
|
-
|
|
- #[test]
|
|
- fn test_f32x4_except_last_any() {
|
|
- let x1 = f32x4::new(2.0, 2.0, 2.0, 1.0);
|
|
- let x2 = f32x4::splat(2.0);
|
|
- assert!((x1.eq(x2)).any());
|
|
- }
|
|
-
|
|
- #[test]
|
|
- fn test_f32x4_except_last_not_all() {
|
|
- let x1 = f32x4::new(2.0, 2.0, 2.0, 1.0);
|
|
- let x2 = f32x4::splat(2.0);
|
|
- assert!(!(x1.eq(x2)).all());
|
|
- }
|
|
-
|
|
- #[test]
|
|
- fn test_f32x4_except_first_any() {
|
|
- let x1 = f32x4::new(1.0, 2.0, 2.0, 2.0);
|
|
- let x2 = f32x4::splat(2.0);
|
|
- assert!((x1.eq(x2)).any());
|
|
- }
|
|
-
|
|
- #[test]
|
|
- fn test_f32x4_except_first_not_all() {
|
|
- let x1 = f32x4::new(1.0, 2.0, 2.0, 2.0);
|
|
- let x2 = f32x4::splat(2.0);
|
|
- assert!(!(x1.eq(x2)).all());
|
|
- }
|
|
-
|
|
- #[test]
|
|
- fn test_f32x4_only_last_any() {
|
|
- let x1 = f32x4::new(2.0, 2.0, 2.0, 1.0);
|
|
- let x2 = f32x4::splat(1.0);
|
|
- assert!((x1.eq(x2)).any());
|
|
- }
|
|
-
|
|
- #[test]
|
|
- fn test_f32x4_only_last_not_all() {
|
|
- let x1 = f32x4::new(2.0, 2.0, 2.0, 1.0);
|
|
- let x2 = f32x4::splat(1.0);
|
|
- assert!(!(x1.eq(x2)).all());
|
|
- }
|
|
-
|
|
- #[test]
|
|
- fn test_f32x4_only_first_any() {
|
|
- let x1 = f32x4::new(1.0, 2.0, 2.0, 2.0);
|
|
- let x2 = f32x4::splat(1.0);
|
|
- assert!((x1.eq(x2)).any());
|
|
- }
|
|
-
|
|
- #[test]
|
|
- fn test_f32x4_only_first_not_all() {
|
|
- let x1 = f32x4::new(1.0, 2.0, 2.0, 2.0);
|
|
- let x2 = f32x4::splat(1.0);
|
|
- assert!(!(x1.eq(x2)).all());
|
|
- }
|
|
-
|
|
- #[test]
|
|
- fn test_f32x4_except_second_any() {
|
|
- let x1 = f32x4::new(1.0, 2.0, 2.0, 2.0);
|
|
- let x2 = f32x4::splat(2.0);
|
|
- assert!((x1.eq(x2)).any());
|
|
- }
|
|
-
|
|
- #[test]
|
|
- fn test_f32x4_except_second_not_all() {
|
|
- let x1 = f32x4::new(1.0, 2.0, 2.0, 2.0);
|
|
- let x2 = f32x4::splat(2.0);
|
|
- assert!(!(x1.eq(x2)).all());
|
|
- }
|
|
-
|
|
- #[test]
|
|
- fn test_f32x4_except_third_any() {
|
|
- let x1 = f32x4::new(2.0, 2.0, 1.0, 2.0);
|
|
- let x2 = f32x4::splat(2.0);
|
|
- assert!((x1.eq(x2)).any());
|
|
- }
|
|
-
|
|
- #[test]
|
|
- fn test_f32x4_except_third_not_all() {
|
|
- let x1 = f32x4::new(2.0, 2.0, 1.0, 2.0);
|
|
- let x2 = f32x4::splat(2.0);
|
|
- assert!(!(x1.eq(x2)).all());
|
|
- }
|
|
-
|
|
- #[test]
|
|
- fn test_f32x4_only_second_any() {
|
|
- let x1 = f32x4::new(1.0, 2.0, 2.0, 2.0);
|
|
- let x2 = f32x4::splat(1.0);
|
|
- assert!((x1.eq(x2)).any());
|
|
- }
|
|
-
|
|
- #[test]
|
|
- fn test_f32x4_only_second_not_all() {
|
|
- let x1 = f32x4::new(1.0, 2.0, 2.0, 2.0);
|
|
- let x2 = f32x4::splat(1.0);
|
|
- assert!(!(x1.eq(x2)).all());
|
|
- }
|
|
-
|
|
- #[test]
|
|
- fn test_f32x4_only_third_any() {
|
|
- let x1 = f32x4::new(2.0, 2.0, 1.0, 2.0);
|
|
- let x2 = f32x4::splat(1.0);
|
|
- assert!((x1.eq(x2)).any());
|
|
- }
|
|
-
|
|
- #[test]
|
|
- fn test_f32x4_only_third_not_all() {
|
|
- let x1 = f32x4::new(2.0, 2.0, 1.0, 2.0);
|
|
- let x2 = f32x4::splat(1.0);
|
|
- assert!(!(x1.eq(x2)).all());
|
|
- }
|
|
-
|
|
-}
|
|
diff --git a/third_party/rust/simd/src/sixty_four.rs b/third_party/rust/simd/src/sixty_four.rs
|
|
deleted file mode 100644
|
|
index a87f44a77ee7..000000000000
|
|
--- a/third_party/rust/simd/src/sixty_four.rs
|
|
+++ /dev/null
|
|
@@ -1,228 +0,0 @@
|
|
-#![allow(dead_code)]
|
|
-use super::*;
|
|
-#[allow(unused_imports)]
|
|
-use super::{
|
|
- f32x2,
|
|
- simd_eq, simd_ne, simd_lt, simd_le, simd_gt, simd_ge,
|
|
- simd_shuffle2, simd_shuffle4, simd_shuffle8, simd_shuffle16,
|
|
- simd_insert, simd_extract,
|
|
- simd_cast,
|
|
- simd_add, simd_sub, simd_mul, simd_div, simd_shl, simd_shr, simd_and, simd_or, simd_xor,
|
|
-
|
|
- Unalign, bitcast,
|
|
-};
|
|
-use core::{mem,ops};
|
|
-
|
|
-/// Boolean type for 64-bit integers.
|
|
-#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
|
-#[derive(Debug, Copy, Clone)]
|
|
-pub struct bool64i(i64);
|
|
-/// Boolean type for 64-bit floats.
|
|
-#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
|
-#[derive(Debug, Copy, Clone)]
|
|
-pub struct bool64f(i64);
|
|
-/// A SIMD vector of 2 `u64`s.
|
|
-#[repr(simd)]
|
|
-#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
|
-#[derive(Debug, Copy)]
|
|
-pub struct u64x2(u64, u64);
|
|
-/// A SIMD vector of 2 `i64`s.
|
|
-#[repr(simd)]
|
|
-#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
|
-#[derive(Debug, Copy)]
|
|
-pub struct i64x2(i64, i64);
|
|
-/// A SIMD vector of 2 `f64`s.
|
|
-#[repr(simd)]
|
|
-#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
|
-#[derive(Debug, Copy)]
|
|
-pub struct f64x2(f64, f64);
|
|
-/// A SIMD boolean vector for length-2 vectors of 64-bit integers.
|
|
-#[repr(simd)]
|
|
-#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
|
-#[derive(Debug, Copy)]
|
|
-pub struct bool64ix2(i64, i64);
|
|
-/// A SIMD boolean vector for length-2 vectors of 64-bit floats.
|
|
-#[repr(simd)]
|
|
-#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
|
-#[derive(Debug, Copy)]
|
|
-pub struct bool64fx2(i64, i64);
|
|
-
|
|
-simd! {
|
|
- bool64ix2: i64x2 = i64, u64x2 = u64, bool64ix2 = bool64i;
|
|
- bool64fx2: f64x2 = f64, bool64fx2 = bool64f;
|
|
-}
|
|
-basic_impls! {
|
|
- u64x2: u64, bool64ix2, simd_shuffle2, 2, x0 | x1;
|
|
- i64x2: i64, bool64ix2, simd_shuffle2, 2, x0 | x1;
|
|
- f64x2: f64, bool64fx2, simd_shuffle2, 2, x0 | x1;
|
|
-}
|
|
-
|
|
-mod common {
|
|
- use super::*;
|
|
- // naive for now
|
|
- #[inline]
|
|
- pub fn bool64ix2_all(x: bool64ix2) -> bool {
|
|
- x.0 != 0 && x.1 != 0
|
|
- }
|
|
- #[inline]
|
|
- pub fn bool64ix2_any(x: bool64ix2) -> bool {
|
|
- x.0 != 0 || x.1 != 0
|
|
- }
|
|
- #[inline]
|
|
- pub fn bool64fx2_all(x: bool64fx2) -> bool {
|
|
- x.0 != 0 && x.1 != 0
|
|
- }
|
|
- #[inline]
|
|
- pub fn bool64fx2_any(x: bool64fx2) -> bool {
|
|
- x.0 != 0 || x.1 != 0
|
|
- }}
|
|
-bool_impls! {
|
|
- bool64ix2: bool64i, i64x2, i64, 2, bool64ix2_all, bool64ix2_any, x0 | x1
|
|
- [/// Convert `self` to a boolean vector for interacting with floating point vectors.
|
|
- to_f -> bool64fx2];
|
|
-
|
|
- bool64fx2: bool64f, i64x2, i64, 2, bool64fx2_all, bool64fx2_any, x0 | x1
|
|
- [/// Convert `self` to a boolean vector for interacting with integer vectors.
|
|
- to_i -> bool64ix2];
|
|
-}
|
|
-
|
|
-impl u64x2 {
|
|
- /// Convert each lane to a signed integer.
|
|
- #[inline]
|
|
- pub fn to_i64(self) -> i64x2 {
|
|
- unsafe {simd_cast(self)}
|
|
- }
|
|
- /// Convert each lane to a 64-bit float.
|
|
- #[inline]
|
|
- pub fn to_f64(self) -> f64x2 {
|
|
- unsafe {simd_cast(self)}
|
|
- }
|
|
-}
|
|
-impl i64x2 {
|
|
- /// Convert each lane to an unsigned integer.
|
|
- #[inline]
|
|
- pub fn to_u64(self) -> u64x2 {
|
|
- unsafe {simd_cast(self)}
|
|
- }
|
|
- /// Convert each lane to a 64-bit float.
|
|
- #[inline]
|
|
- pub fn to_f64(self) -> f64x2 {
|
|
- unsafe {simd_cast(self)}
|
|
- }
|
|
-}
|
|
-impl f64x2 {
|
|
- /// Convert each lane to a signed integer.
|
|
- #[inline]
|
|
- pub fn to_i64(self) -> i64x2 {
|
|
- unsafe {simd_cast(self)}
|
|
- }
|
|
- /// Convert each lane to an unsigned integer.
|
|
- #[inline]
|
|
- pub fn to_u64(self) -> u64x2 {
|
|
- unsafe {simd_cast(self)}
|
|
- }
|
|
-
|
|
- /// Convert each lane to a 32-bit float.
|
|
- #[inline]
|
|
- pub fn to_f32(self) -> f32x4 {
|
|
- unsafe {
|
|
- let x: f32x2 = simd_cast(self);
|
|
- f32x4::new(x.0, x.1, 0.0, 0.0)
|
|
- }
|
|
- }
|
|
-}
|
|
-
|
|
-neg_impls!{
|
|
- 0,
|
|
- i64x2,
|
|
-}
|
|
-neg_impls! {
|
|
- 0.0,
|
|
- f64x2,
|
|
-}
|
|
-macro_rules! not_impls {
|
|
- ($($ty: ident,)*) => {
|
|
- $(impl ops::Not for $ty {
|
|
- type Output = Self;
|
|
- fn not(self) -> Self {
|
|
- $ty::splat(!0) ^ self
|
|
- }
|
|
- })*
|
|
- }
|
|
-}
|
|
-not_impls! {
|
|
- i64x2,
|
|
- u64x2,
|
|
-}
|
|
-
|
|
-macro_rules! operators {
|
|
- ($($trayt: ident ($func: ident, $method: ident): $($ty: ty),*;)*) => {
|
|
- $(
|
|
- $(impl ops::$trayt for $ty {
|
|
- type Output = Self;
|
|
- #[inline]
|
|
- fn $method(self, x: Self) -> Self {
|
|
- unsafe {$func(self, x)}
|
|
- }
|
|
- })*
|
|
- )*
|
|
- }
|
|
-}
|
|
-operators! {
|
|
- Add (simd_add, add):
|
|
- i64x2, u64x2,
|
|
- f64x2;
|
|
- Sub (simd_sub, sub):
|
|
- i64x2, u64x2,
|
|
- f64x2;
|
|
- Mul (simd_mul, mul):
|
|
- i64x2, u64x2,
|
|
- f64x2;
|
|
- Div (simd_div, div): f64x2;
|
|
-
|
|
- BitAnd (simd_and, bitand):
|
|
- i64x2, u64x2,
|
|
- bool64ix2,
|
|
- bool64fx2;
|
|
- BitOr (simd_or, bitor):
|
|
- i64x2, u64x2,
|
|
- bool64ix2,
|
|
- bool64fx2;
|
|
- BitXor (simd_xor, bitxor):
|
|
- i64x2, u64x2,
|
|
- bool64ix2,
|
|
- bool64fx2;
|
|
-}
|
|
-
|
|
-macro_rules! shift_one { ($ty: ident, $($by: ident),*) => {
|
|
- $(
|
|
- impl ops::Shl<$by> for $ty {
|
|
- type Output = Self;
|
|
- #[inline]
|
|
- fn shl(self, other: $by) -> Self {
|
|
- unsafe { simd_shl(self, $ty::splat(other as <$ty as Simd>::Elem)) }
|
|
- }
|
|
- }
|
|
- impl ops::Shr<$by> for $ty {
|
|
- type Output = Self;
|
|
- #[inline]
|
|
- fn shr(self, other: $by) -> Self {
|
|
- unsafe {simd_shr(self, $ty::splat(other as <$ty as Simd>::Elem))}
|
|
- }
|
|
- }
|
|
- )*
|
|
- }
|
|
-}
|
|
-
|
|
-macro_rules! shift {
|
|
- ($($ty: ident),*) => {
|
|
- $(shift_one! {
|
|
- $ty,
|
|
- u8, u16, u32, u64, usize,
|
|
- i8, i16, i32, i64, isize
|
|
- })*
|
|
- }
|
|
-}
|
|
-shift! {
|
|
- i64x2, u64x2
|
|
-}
|
|
diff --git a/third_party/rust/simd/src/v256.rs b/third_party/rust/simd/src/v256.rs
|
|
deleted file mode 100644
|
|
index 519eb14e7259..000000000000
|
|
--- a/third_party/rust/simd/src/v256.rs
|
|
+++ /dev/null
|
|
@@ -1,436 +0,0 @@
|
|
-#![allow(dead_code)]
|
|
-use core::{mem,ops};
|
|
-#[allow(unused_imports)]
|
|
-use super::{
|
|
- Simd,
|
|
- u32x4, i32x4, u16x8, i16x8, u8x16, i8x16, f32x4,
|
|
- bool32ix4, bool16ix8, bool8ix16, bool32fx4,
|
|
- simd_eq, simd_ne, simd_lt, simd_le, simd_gt, simd_ge,
|
|
- simd_shuffle2, simd_shuffle4, simd_shuffle8, simd_shuffle16,
|
|
- simd_insert, simd_extract,
|
|
- simd_cast,
|
|
- simd_add, simd_sub, simd_mul, simd_div, simd_shl, simd_shr, simd_and, simd_or, simd_xor,
|
|
- bool8i, bool16i, bool32i, bool32f,
|
|
- Unalign, bitcast,
|
|
-};
|
|
-use super::sixty_four::*;
|
|
-#[cfg(all(target_feature = "avx"))]
|
|
-use super::x86::avx::common;
|
|
-
|
|
-#[repr(simd)]
|
|
-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))]
|
|
-#[derive(Debug, Copy)]
|
|
-pub struct u64x4(u64, u64, u64, u64);
|
|
-#[repr(simd)]
|
|
-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))]
|
|
-#[derive(Debug, Copy)]
|
|
-pub struct i64x4(i64, i64, i64, i64);
|
|
-#[repr(simd)]
|
|
-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))]
|
|
-#[derive(Debug, Copy)]
|
|
-pub struct f64x4(f64, f64, f64, f64);
|
|
-#[repr(simd)]
|
|
-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))]
|
|
-#[derive(Debug, Copy)]
|
|
-pub struct bool64ix4(i64, i64, i64, i64);
|
|
-#[repr(simd)]
|
|
-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))]
|
|
-#[derive(Debug, Copy)]
|
|
-pub struct bool64fx4(i64, i64, i64, i64);
|
|
-
|
|
-#[repr(simd)]
|
|
-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))]
|
|
-#[derive(Debug, Copy)]
|
|
-pub struct u32x8(u32, u32, u32, u32,
|
|
- u32, u32, u32, u32);
|
|
-#[repr(simd)]
|
|
-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))]
|
|
-#[derive(Debug, Copy)]
|
|
-pub struct i32x8(i32, i32, i32, i32,
|
|
- i32, i32, i32, i32);
|
|
-#[repr(simd)]
|
|
-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))]
|
|
-#[derive(Debug, Copy)]
|
|
-pub struct f32x8(f32, f32, f32, f32,
|
|
- f32, f32, f32, f32);
|
|
-#[repr(simd)]
|
|
-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))]
|
|
-#[derive(Debug, Copy)]
|
|
-pub struct bool32ix8(i32, i32, i32, i32,
|
|
- i32, i32, i32, i32);#[repr(simd)]
|
|
-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))]
|
|
-#[derive(Debug, Copy)]
|
|
-pub struct bool32fx8(i32, i32, i32, i32,
|
|
- i32, i32, i32, i32);
|
|
-
|
|
-#[repr(simd)]
|
|
-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))]
|
|
-#[derive(Debug, Copy)]
|
|
-pub struct u16x16(u16, u16, u16, u16, u16, u16, u16, u16,
|
|
- u16, u16, u16, u16, u16, u16, u16, u16);
|
|
-#[repr(simd)]
|
|
-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))]
|
|
-#[derive(Debug, Copy)]
|
|
-pub struct i16x16(i16, i16, i16, i16, i16, i16, i16, i16,
|
|
- i16, i16, i16, i16, i16, i16, i16, i16);
|
|
-#[repr(simd)]
|
|
-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))]
|
|
-#[derive(Debug, Copy)]
|
|
-pub struct bool16ix16(i16, i16, i16, i16, i16, i16, i16, i16,
|
|
- i16, i16, i16, i16, i16, i16, i16, i16);
|
|
-
|
|
-#[repr(simd)]
|
|
-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))]
|
|
-#[derive(Debug, Copy)]
|
|
-pub struct u8x32(u8, u8, u8, u8, u8, u8, u8, u8,
|
|
- u8, u8, u8, u8, u8, u8, u8, u8,
|
|
- u8, u8, u8, u8, u8, u8, u8, u8,
|
|
- u8, u8, u8, u8, u8, u8, u8, u8);
|
|
-#[repr(simd)]
|
|
-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))]
|
|
-#[derive(Debug, Copy)]
|
|
-pub struct i8x32(i8, i8, i8, i8, i8, i8, i8, i8,
|
|
- i8, i8, i8, i8, i8, i8, i8, i8,
|
|
- i8, i8, i8, i8, i8, i8, i8, i8,
|
|
- i8, i8, i8, i8, i8, i8, i8, i8);
|
|
-#[repr(simd)]
|
|
-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))]
|
|
-#[derive(Debug, Copy)]
|
|
-pub struct bool8ix32(i8, i8, i8, i8, i8, i8, i8, i8,
|
|
- i8, i8, i8, i8, i8, i8, i8, i8,
|
|
- i8, i8, i8, i8, i8, i8, i8, i8,
|
|
- i8, i8, i8, i8, i8, i8, i8, i8);
|
|
-
|
|
-simd! {
|
|
- bool8ix32: i8x32 = i8, u8x32 = u8, bool8ix32 = bool8i;
|
|
- bool16ix16: i16x16 = i16, u16x16 = u16, bool16ix16 = bool16i;
|
|
- bool32ix8: i32x8 = i32, u32x8 = u32, bool32ix8 = bool32i;
|
|
- bool64ix4: i64x4 = i64, u64x4 = u64, bool64ix4 = bool64i;
|
|
-
|
|
- bool32fx8: f32x8 = f32, bool32fx8 = bool32f;
|
|
- bool64fx4: f64x4 = f64, bool64fx4 = bool64f;
|
|
-}
|
|
-
|
|
-basic_impls! {
|
|
- u64x4: u64, bool64ix4, simd_shuffle4, 4, x0, x1 | x2, x3;
|
|
- i64x4: i64, bool64ix4, simd_shuffle4, 4, x0, x1 | x2, x3;
|
|
- f64x4: f64, bool64fx4, simd_shuffle4, 4, x0, x1 | x2, x3;
|
|
-
|
|
- u32x8: u32, bool32ix8, simd_shuffle8, 8, x0, x1, x2, x3 | x4, x5, x6, x7;
|
|
- i32x8: i32, bool32ix8, simd_shuffle8, 8, x0, x1, x2, x3 | x4, x5, x6, x7;
|
|
- f32x8: f32, bool32fx8, simd_shuffle8, 8, x0, x1, x2, x3 | x4, x5, x6, x7;
|
|
-
|
|
- u16x16: u16, bool16ix16, simd_shuffle16, 16, x0, x1, x2, x3, x4, x5, x6, x7 | x8, x9, x10, x11, x12, x13, x14, x15;
|
|
- i16x16: i16, bool16ix16, simd_shuffle16, 16, x0, x1, x2, x3, x4, x5, x6, x7 | x8, x9, x10, x11, x12, x13, x14, x15;
|
|
-
|
|
- u8x32: u8, bool8ix32, simd_shuffle32, 32, x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 |
|
|
- x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31;
|
|
- i8x32: i8, bool8ix32, simd_shuffle32, 32, x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 |
|
|
- x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31;
|
|
-}
|
|
-
|
|
-#[cfg(all(not(target_feature = "avx")))]
|
|
-#[doc(hidden)]
|
|
-mod common {
|
|
- use super::*;
|
|
- // implementation via SSE vectors
|
|
- macro_rules! bools {
|
|
- ($($ty: ty, $all: ident, $any: ident;)*) => {
|
|
- $(
|
|
- #[inline]
|
|
- pub fn $all(x: $ty) -> bool {
|
|
- x.low().all() && x.high().all()
|
|
- }
|
|
- #[inline]
|
|
- pub fn $any(x: $ty) -> bool {
|
|
- x.low().any() || x.high().any()
|
|
- }
|
|
- )*
|
|
- }
|
|
- }
|
|
-
|
|
- bools! {
|
|
- bool64ix4, bool64ix4_all, bool64ix4_any;
|
|
- bool64fx4, bool64fx4_all, bool64fx4_any;
|
|
- bool32ix8, bool32ix8_all, bool32ix8_any;
|
|
- bool32fx8, bool32fx8_all, bool32fx8_any;
|
|
- bool16ix16, bool16ix16_all, bool16ix16_any;
|
|
- bool8ix32, bool8ix32_all, bool8ix32_any;
|
|
- }
|
|
-
|
|
-}
|
|
-
|
|
-bool_impls! {
|
|
- bool64ix4: bool64i, i64x4, i64, 4, bool64ix4_all, bool64ix4_any, x0, x1 | x2, x3
|
|
- [/// Convert `self` to a boolean vector for interacting with floating point vectors.
|
|
- to_f -> bool64fx4];
|
|
-
|
|
- bool64fx4: bool64f, i64x4, i64, 4, bool64fx4_all, bool64fx4_any, x0, x1 | x2, x3
|
|
- [/// Convert `self` to a boolean vector for interacting with integer vectors.
|
|
- to_i -> bool64ix4];
|
|
-
|
|
- bool32ix8: bool32i, i32x8, i32, 8, bool32ix8_all, bool32ix8_any, x0, x1, x2, x3 | x4, x5, x6, x7
|
|
- [/// Convert `self` to a boolean vector for interacting with floating point vectors.
|
|
- to_f -> bool32fx8];
|
|
-
|
|
- bool32fx8: bool32f, i32x8, i32, 8, bool32fx8_all, bool32fx8_any, x0, x1, x2, x3 | x4, x5, x6, x7
|
|
- [/// Convert `self` to a boolean vector for interacting with integer vectors.
|
|
- to_i -> bool32ix8];
|
|
-
|
|
- bool16ix16: bool16i, i16x16, i16, 16, bool16ix16_all, bool16ix16_any,
|
|
- x0, x1, x2, x3, x4, x5, x6, x7 | x8, x9, x10, x11, x12, x13, x14, x15 [];
|
|
-
|
|
- bool8ix32: bool8i, i8x32, i8, 32, bool8ix32_all, bool8ix32_any,
|
|
- x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 |
|
|
- x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31 [];
|
|
-}
|
|
-
|
|
-pub trait LowHigh128 {
|
|
- type Half: Simd;
|
|
- /// Extract the low 128 bit part.
|
|
- fn low(self) -> Self::Half;
|
|
- /// Extract the high 128 bit part.
|
|
- fn high(self) -> Self::Half;
|
|
-}
|
|
-
|
|
-macro_rules! expr { ($x:expr) => ($x) } // HACK
|
|
-macro_rules! low_high_impls {
|
|
- ($(
|
|
- $name: ident, $half: ident, $($first: tt),+ ... $($last: tt),+;
|
|
- )*) => {
|
|
- $(impl LowHigh128 for $name {
|
|
- type Half = $half;
|
|
- #[inline]
|
|
- fn low(self) -> Self::Half {
|
|
- $half::new($( expr!(self.$first), )*)
|
|
- }
|
|
-
|
|
- #[inline]
|
|
- fn high(self) -> Self::Half {
|
|
- $half::new($( expr!(self.$last), )*)
|
|
- }
|
|
- })*
|
|
- }
|
|
-}
|
|
-
|
|
-low_high_impls! {
|
|
- u64x4, u64x2, 0, 1 ... 2, 3;
|
|
- i64x4, i64x2, 0, 1 ... 2, 3;
|
|
- f64x4, f64x2, 0, 1 ... 2, 3;
|
|
-
|
|
- u32x8, u32x4, 0, 1, 2, 3 ... 4, 5, 6, 7;
|
|
- i32x8, i32x4, 0, 1, 2, 3 ... 4, 5, 6, 7;
|
|
- f32x8, f32x4, 0, 1, 2, 3 ... 4, 5, 6, 7;
|
|
-
|
|
- u16x16, u16x8, 0, 1, 2, 3, 4, 5, 6, 7 ... 8, 9, 10, 11, 12, 13, 14, 15;
|
|
- i16x16, i16x8, 0, 1, 2, 3, 4, 5, 6, 7 ... 8, 9, 10, 11, 12, 13, 14, 15;
|
|
-
|
|
- u8x32, u8x16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ...
|
|
- 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31;
|
|
- i8x32, i8x16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ...
|
|
- 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31;
|
|
-
|
|
-}
|
|
-
|
|
-macro_rules! bool_low_high_impls {
|
|
- ($(
|
|
- $name: ident: $half: ident;
|
|
- )*) => {
|
|
- $(impl LowHigh128 for $name {
|
|
- type Half = $half;
|
|
- /// Extract the low 128 bit part.
|
|
- #[inline]
|
|
- fn low(self) -> Self::Half {
|
|
- Self::Half::from_repr(self.to_repr().low())
|
|
- }
|
|
-
|
|
- /// Extract the high 128 bit part.
|
|
- #[inline]
|
|
- fn high(self) -> Self::Half {
|
|
- Self::Half::from_repr(self.to_repr().high())
|
|
- }
|
|
- })*
|
|
- }
|
|
-}
|
|
-
|
|
-bool_low_high_impls! {
|
|
- bool64fx4: bool64fx2;
|
|
- bool32fx8: bool32fx4;
|
|
-
|
|
- bool64ix4: bool64ix2;
|
|
- bool32ix8: bool32ix4;
|
|
- bool16ix16: bool16ix8;
|
|
- bool8ix32: bool8ix16;
|
|
-}
|
|
-
|
|
-impl u64x4 {
|
|
- /// Convert each lane to a signed integer.
|
|
- #[inline]
|
|
- pub fn to_i64(self) -> i64x4 {
|
|
- unsafe {simd_cast(self)}
|
|
- }
|
|
- /// Convert each lane to a 64-bit float.
|
|
- #[inline]
|
|
- pub fn to_f64(self) -> f64x4 {
|
|
- unsafe {simd_cast(self)}
|
|
- }
|
|
-}
|
|
-
|
|
-impl i64x4 {
|
|
- /// Convert each lane to an unsigned integer.
|
|
- #[inline]
|
|
- pub fn to_u64(self) -> u64x4 {
|
|
- unsafe {simd_cast(self)}
|
|
- }
|
|
- /// Convert each lane to a 64-bit float.
|
|
- #[inline]
|
|
- pub fn to_f64(self) -> f64x4 {
|
|
- unsafe {simd_cast(self)}
|
|
- }
|
|
-}
|
|
-
|
|
-impl f64x4 {
|
|
- /// Convert each lane to a signed integer.
|
|
- #[inline]
|
|
- pub fn to_i64(self) -> i64x4 {
|
|
- unsafe {simd_cast(self)}
|
|
- }
|
|
- /// Convert each lane to an unsigned integer.
|
|
- #[inline]
|
|
- pub fn to_u64(self) -> u64x4 {
|
|
- unsafe {simd_cast(self)}
|
|
- }
|
|
-}
|
|
-
|
|
-impl u32x8 {
|
|
- /// Convert each lane to a signed integer.
|
|
- #[inline]
|
|
- pub fn to_i32(self) -> i32x8 {
|
|
- unsafe {simd_cast(self)}
|
|
- }
|
|
- /// Convert each lane to a 32-bit float.
|
|
- #[inline]
|
|
- pub fn to_f32(self) -> f32x8 {
|
|
- unsafe {simd_cast(self)}
|
|
- }
|
|
-}
|
|
-
|
|
-impl i32x8 {
|
|
- /// Convert each lane to an unsigned integer.
|
|
- #[inline]
|
|
- pub fn to_u32(self) -> u32x8 {
|
|
- unsafe {simd_cast(self)}
|
|
- }
|
|
- /// Convert each lane to a 32-bit float.
|
|
- #[inline]
|
|
- pub fn to_f32(self) -> f32x8 {
|
|
- unsafe {simd_cast(self)}
|
|
- }
|
|
-}
|
|
-
|
|
-impl f32x8 {
|
|
- /// Convert each lane to a signed integer.
|
|
- #[inline]
|
|
- pub fn to_i32(self) -> i32x8 {
|
|
- unsafe {simd_cast(self)}
|
|
- }
|
|
- /// Convert each lane to an unsigned integer.
|
|
- #[inline]
|
|
- pub fn to_u32(self) -> u32x8 {
|
|
- unsafe {simd_cast(self)}
|
|
- }
|
|
-}
|
|
-
|
|
-impl i16x16 {
|
|
- /// Convert each lane to an unsigned integer.
|
|
- #[inline]
|
|
- pub fn to_u16(self) -> u16x16 {
|
|
- unsafe {simd_cast(self)}
|
|
- }
|
|
-}
|
|
-
|
|
-impl u16x16 {
|
|
- /// Convert each lane to a signed integer.
|
|
- #[inline]
|
|
- pub fn to_i16(self) -> i16x16 {
|
|
- unsafe {simd_cast(self)}
|
|
- }
|
|
-}
|
|
-
|
|
-impl i8x32 {
|
|
- /// Convert each lane to an unsigned integer.
|
|
- #[inline]
|
|
- pub fn to_u8(self) -> u8x32 {
|
|
- unsafe {simd_cast(self)}
|
|
- }
|
|
-}
|
|
-
|
|
-impl u8x32 {
|
|
- /// Convert each lane to a signed integer.
|
|
- #[inline]
|
|
- pub fn to_i8(self) -> i8x32 {
|
|
- unsafe {simd_cast(self)}
|
|
- }
|
|
-}
|
|
-
|
|
-operators! {
|
|
- Add (simd_add, add):
|
|
- i8x32, u8x32, i16x16, u16x16, i32x8, u32x8, i64x4, u64x4,
|
|
- f64x4, f32x8;
|
|
- Sub (simd_sub, sub):
|
|
- i8x32, u8x32, i16x16, u16x16, i32x8, u32x8, i64x4, u64x4,
|
|
- f64x4, f32x8;
|
|
- Mul (simd_mul, mul):
|
|
- i8x32, u8x32, i16x16, u16x16, i32x8, u32x8, i64x4, u64x4,
|
|
- f64x4, f32x8;
|
|
- Div (simd_div, div): f64x4, f32x8;
|
|
-
|
|
- BitAnd (simd_and, bitand):
|
|
- i8x32, u8x32, i16x16, u16x16, i32x8, u32x8, i64x4, u64x4,
|
|
- bool64ix4, bool32ix8, bool16ix16,
|
|
- bool64fx4, bool32fx8;
|
|
- BitOr (simd_or, bitor):
|
|
- i8x32, u8x32, i16x16, u16x16, i32x8, u32x8, i64x4, u64x4,
|
|
- bool64ix4, bool32ix8, bool16ix16,
|
|
- bool64fx4, bool32fx8;
|
|
- BitXor (simd_xor, bitxor):
|
|
- i8x32, u8x32, i16x16, u16x16, i32x8, u32x8, i64x4, u64x4,
|
|
- bool64ix4, bool32ix8, bool16ix16,
|
|
- bool64fx4, bool32fx8;
|
|
-}
|
|
-
|
|
-neg_impls!{
|
|
- 0,
|
|
- i64x4,
|
|
- i32x8,
|
|
- i16x16,
|
|
- i8x32,
|
|
-}
|
|
-
|
|
-neg_impls! {
|
|
- 0.0,
|
|
- f64x4,
|
|
- f32x8,
|
|
-}
|
|
-
|
|
-not_impls! {
|
|
- i64x4,
|
|
- u64x4,
|
|
- i32x8,
|
|
- u32x8,
|
|
- i16x16,
|
|
- u16x16,
|
|
- i8x32,
|
|
- u8x32,
|
|
-}
|
|
-
|
|
-shift! {
|
|
- i64x4,
|
|
- u64x4,
|
|
- i32x8,
|
|
- u32x8,
|
|
- i16x16,
|
|
- u16x16,
|
|
- i8x32,
|
|
- u8x32
|
|
-}
|
|
diff --git a/third_party/rust/simd/src/x86/avx.rs b/third_party/rust/simd/src/x86/avx.rs
|
|
deleted file mode 100644
|
|
index 180247e36561..000000000000
|
|
--- a/third_party/rust/simd/src/x86/avx.rs
|
|
+++ /dev/null
|
|
@@ -1,290 +0,0 @@
|
|
-use super::super::*;
|
|
-use sixty_four::*;
|
|
-
|
|
-use super::super::bitcast;
|
|
-
|
|
-pub use v256::{
|
|
- f64x4, bool64fx4, u64x4, i64x4, bool64ix4,
|
|
- f32x8, bool32fx8, u32x8, i32x8, bool32ix8,
|
|
- u16x16, i16x16, bool16ix16,
|
|
- u8x32, i8x32, bool8ix32,
|
|
- LowHigh128
|
|
-};
|
|
-
|
|
-#[allow(dead_code)]
|
|
-extern "platform-intrinsic" {
|
|
- fn x86_mm256_addsub_ps(x: f32x8, y: f32x8) -> f32x8;
|
|
- fn x86_mm256_addsub_pd(x: f64x4, y: f64x4) -> f64x4;
|
|
- fn x86_mm256_dp_ps(x: f32x8, y: f32x8, z: i32) -> f32x8;
|
|
- fn x86_mm256_hadd_ps(x: f32x8, y: f32x8) -> f32x8;
|
|
- fn x86_mm256_hadd_pd(x: f64x4, y: f64x4) -> f64x4;
|
|
- fn x86_mm256_hsub_ps(x: f32x8, y: f32x8) -> f32x8;
|
|
- fn x86_mm256_hsub_pd(x: f64x4, y: f64x4) -> f64x4;
|
|
- fn x86_mm256_max_ps(x: f32x8, y: f32x8) -> f32x8;
|
|
- fn x86_mm256_max_pd(x: f64x4, y: f64x4) -> f64x4;
|
|
- fn x86_mm256_min_ps(x: f32x8, y: f32x8) -> f32x8;
|
|
- fn x86_mm256_min_pd(x: f64x4, y: f64x4) -> f64x4;
|
|
- fn x86_mm256_movemask_ps(x: f32x8) -> i32;
|
|
- fn x86_mm256_movemask_pd(x: f64x4) -> i32;
|
|
- fn x86_mm_permutevar_ps(x: f32x4, y: i32x4) -> f32x4;
|
|
- fn x86_mm_permutevar_pd(x: f64x2, y: i64x2) -> f64x2;
|
|
- fn x86_mm256_permutevar_ps(x: f32x8, y: i32x8) -> f32x8;
|
|
- fn x86_mm256_permutevar_pd(x: f64x4, y: i64x4) -> f64x4;
|
|
- fn x86_mm256_rcp_ps(x: f32x8) -> f32x8;
|
|
- fn x86_mm256_rsqrt_ps(x: f32x8) -> f32x8;
|
|
- fn x86_mm256_sqrt_ps(x: f32x8) -> f32x8;
|
|
- fn x86_mm256_sqrt_pd(x: f64x4) -> f64x4;
|
|
- fn x86_mm_testc_ps(x: f32x4, y: f32x4) -> i32;
|
|
- fn x86_mm256_testc_ps(x: f32x8, y: f32x8) -> i32;
|
|
- fn x86_mm_testc_pd(x: f64x2, y: f64x2) -> i32;
|
|
- fn x86_mm256_testc_pd(x: f64x4, y: f64x4) -> i32;
|
|
- fn x86_mm256_testc_si256(x: u64x4, y: u64x4) -> i32;
|
|
- fn x86_mm_testnzc_ps(x: f32x4, y: f32x4) -> i32;
|
|
- fn x86_mm256_testnzc_ps(x: f32x8, y: f32x8) -> i32;
|
|
- fn x86_mm_testnzc_pd(x: f64x2, y: f64x2) -> i32;
|
|
- fn x86_mm256_testnzc_pd(x: f64x4, y: f64x4) -> i32;
|
|
- fn x86_mm256_testnzc_si256(x: u64x4, y: u64x4) -> i32;
|
|
- fn x86_mm_testz_ps(x: f32x4, y: f32x4) -> i32;
|
|
- fn x86_mm256_testz_ps(x: f32x8, y: f32x8) -> i32;
|
|
- fn x86_mm_testz_pd(x: f64x2, y: f64x2) -> i32;
|
|
- fn x86_mm256_testz_pd(x: f64x4, y: f64x4) -> i32;
|
|
- fn x86_mm256_testz_si256(x: u64x4, y: u64x4) -> i32;
|
|
-}
|
|
-
|
|
-#[doc(hidden)]
|
|
-pub mod common {
|
|
- use super::*;
|
|
- use core::mem;
|
|
-
|
|
- macro_rules! bools {
|
|
- ($($ty: ty, $all: ident, $any: ident, $testc: ident, $testz: ident;)*) => {
|
|
- $(
|
|
- #[inline]
|
|
- pub fn $all(x: $ty) -> bool {
|
|
- unsafe {
|
|
- super::$testc(mem::transmute(x), mem::transmute(<$ty>::splat(true))) != 0
|
|
- }
|
|
- }
|
|
- #[inline]
|
|
- pub fn $any(x: $ty) -> bool {
|
|
- unsafe {
|
|
- super::$testz(mem::transmute(x), mem::transmute(x)) == 0
|
|
- }
|
|
- }
|
|
- )*
|
|
- }
|
|
- }
|
|
-
|
|
- bools! {
|
|
- bool32fx8, bool32fx8_all, bool32fx8_any, x86_mm256_testc_ps, x86_mm256_testz_ps;
|
|
- bool64fx4, bool64fx4_all, bool64fx4_any, x86_mm256_testc_pd, x86_mm256_testz_pd;
|
|
- bool8ix32, bool8ix32_all, bool8ix32_any, x86_mm256_testc_si256, x86_mm256_testz_si256;
|
|
- bool16ix16, bool16ix16_all, bool16ix16_any, x86_mm256_testc_si256, x86_mm256_testz_si256;
|
|
- bool32ix8, bool32ix8_all, bool32ix8_any, x86_mm256_testc_si256, x86_mm256_testz_si256;
|
|
- bool64ix4, bool64ix4_all, bool64ix4_any, x86_mm256_testc_si256, x86_mm256_testz_si256;
|
|
- }
|
|
-}
|
|
-
|
|
-// 128-bit vectors:
|
|
-
|
|
-// 32 bit floats
|
|
-
|
|
-pub trait AvxF32x4 {
|
|
- fn permutevar(self, other: i32x4) -> f32x4;
|
|
-}
|
|
-impl AvxF32x4 for f32x4 {
|
|
- fn permutevar(self, other: i32x4) -> f32x4 {
|
|
- unsafe { x86_mm_permutevar_ps(self, other) }
|
|
- }
|
|
-}
|
|
-
|
|
-pub trait AvxF64x4 {
|
|
- fn sqrt(self) -> Self;
|
|
- fn addsub(self, other: Self) -> Self;
|
|
- fn hadd(self, other: Self) -> Self;
|
|
- fn hsub(self, other: Self) -> Self;
|
|
- fn max(self, other: Self) -> Self;
|
|
- fn min(self, other: Self) -> Self;
|
|
- fn move_mask(self) -> u32;
|
|
-}
|
|
-
|
|
-impl AvxF64x4 for f64x4 {
|
|
- #[inline]
|
|
- fn sqrt(self) -> Self {
|
|
- unsafe { x86_mm256_sqrt_pd(self) }
|
|
- }
|
|
-
|
|
- #[inline]
|
|
- fn addsub(self, other: Self) -> Self {
|
|
- unsafe { x86_mm256_addsub_pd(self, other) }
|
|
- }
|
|
-
|
|
- #[inline]
|
|
- fn hadd(self, other: Self) -> Self {
|
|
- unsafe { x86_mm256_hadd_pd(self, other) }
|
|
- }
|
|
-
|
|
- #[inline]
|
|
- fn hsub(self, other: Self) -> Self {
|
|
- unsafe { x86_mm256_hsub_pd(self, other) }
|
|
- }
|
|
-
|
|
- #[inline]
|
|
- fn max(self, other: Self) -> Self {
|
|
- unsafe { x86_mm256_max_pd(self, other) }
|
|
- }
|
|
-
|
|
- #[inline]
|
|
- fn min(self, other: Self) -> Self {
|
|
- unsafe { x86_mm256_min_pd(self, other) }
|
|
- }
|
|
-
|
|
- #[inline]
|
|
- fn move_mask(self) -> u32 {
|
|
- unsafe { x86_mm256_movemask_pd(self) as u32 }
|
|
- }
|
|
-}
|
|
-
|
|
-pub trait AvxBool64fx4 {
|
|
- fn move_mask(self) -> u32;
|
|
-}
|
|
-impl AvxBool64fx4 for bool64fx4 {
|
|
- #[inline]
|
|
- fn move_mask(self) -> u32 {
|
|
- unsafe { x86_mm256_movemask_pd(bitcast(self)) as u32 }
|
|
- }
|
|
-}
|
|
-
|
|
-pub trait AvxF32x8 {
|
|
- fn sqrt(self) -> Self;
|
|
- fn addsub(self, other: Self) -> Self;
|
|
- fn hadd(self, other: Self) -> Self;
|
|
- fn hsub(self, other: Self) -> Self;
|
|
- fn max(self, other: Self) -> Self;
|
|
- fn min(self, other: Self) -> Self;
|
|
- fn move_mask(self) -> u32;
|
|
- /// Compute an approximation to the reciprocal of the square root
|
|
- /// of `self`, that is, `f32x8::splat(1.0) / self.sqrt()`.
|
|
- ///
|
|
- /// The accuracy of this approximation is platform dependent.
|
|
- fn approx_rsqrt(self) -> Self;
|
|
- /// Compute an approximation to the reciprocal of `self`, that is,
|
|
- /// `f32x8::splat(1.0) / self`.
|
|
- ///
|
|
- /// The accuracy of this approximation is platform dependent.
|
|
- fn approx_reciprocal(self) -> Self;
|
|
-}
|
|
-
|
|
-impl AvxF32x8 for f32x8 {
|
|
- #[inline]
|
|
- fn sqrt(self) -> Self {
|
|
- unsafe { x86_mm256_sqrt_ps(self) }
|
|
- }
|
|
-
|
|
- #[inline]
|
|
- fn addsub(self, other: Self) -> Self {
|
|
- unsafe { x86_mm256_addsub_ps(self, other) }
|
|
- }
|
|
-
|
|
- #[inline]
|
|
- fn hadd(self, other: Self) -> Self {
|
|
- unsafe { x86_mm256_hadd_ps(self, other) }
|
|
- }
|
|
-
|
|
- #[inline]
|
|
- fn hsub(self, other: Self) -> Self {
|
|
- unsafe { x86_mm256_hsub_ps(self, other) }
|
|
- }
|
|
-
|
|
- #[inline]
|
|
- fn max(self, other: Self) -> Self {
|
|
- unsafe { x86_mm256_max_ps(self, other) }
|
|
- }
|
|
-
|
|
- #[inline]
|
|
- fn min(self, other: Self) -> Self {
|
|
- unsafe { x86_mm256_min_ps(self, other) }
|
|
- }
|
|
-
|
|
- #[inline]
|
|
- fn move_mask(self) -> u32 {
|
|
- unsafe { x86_mm256_movemask_ps(self) as u32 }
|
|
- }
|
|
-
|
|
- #[inline]
|
|
- fn approx_reciprocal(self) -> Self {
|
|
- unsafe { x86_mm256_rcp_ps(self) }
|
|
- }
|
|
-
|
|
- #[inline]
|
|
- fn approx_rsqrt(self) -> Self {
|
|
- unsafe { x86_mm256_rsqrt_ps(self) }
|
|
- }
|
|
-}
|
|
-
|
|
-pub trait AvxBool32fx8 {
|
|
- fn move_mask(self) -> u32;
|
|
-}
|
|
-impl AvxBool32fx8 for bool32fx8 {
|
|
- #[inline]
|
|
- fn move_mask(self) -> u32 {
|
|
- unsafe { x86_mm256_movemask_ps(bitcast(self)) as u32 }
|
|
- }
|
|
-}
|
|
-
|
|
-pub trait AvxBool32fx4 {}
|
|
-impl AvxBool32fx4 for bool32fx4 {}
|
|
-
|
|
-// 64 bit floats
|
|
-
|
|
-pub trait AvxF64x2 {
|
|
- fn permutevar(self, other: i64x2) -> f64x2;
|
|
-}
|
|
-impl AvxF64x2 for f64x2 {
|
|
- fn permutevar(self, other: i64x2) -> f64x2 {
|
|
- unsafe { x86_mm_permutevar_pd(self, other) }
|
|
- }
|
|
-}
|
|
-
|
|
-pub trait AvxBool64fx2 {}
|
|
-impl AvxBool64fx2 for bool64fx2 {}
|
|
-
|
|
-// 64 bit integers
|
|
-
|
|
-pub trait AvxU64x2 {}
|
|
-impl AvxU64x2 for u64x2 {}
|
|
-pub trait AvxI64x2 {}
|
|
-impl AvxI64x2 for i64x2 {}
|
|
-
|
|
-pub trait AvxBool64ix2 {}
|
|
-impl AvxBool64ix2 for bool64ix2 {}
|
|
-
|
|
-// 32 bit integers
|
|
-
|
|
-pub trait AvxU32x4 {}
|
|
-impl AvxU32x4 for u32x4 {}
|
|
-pub trait AvxI32x4 {}
|
|
-impl AvxI32x4 for i32x4 {}
|
|
-
|
|
-pub trait AvxBool32ix4 {}
|
|
-impl AvxBool32ix4 for bool32ix4 {}
|
|
-
|
|
-// 16 bit integers
|
|
-
|
|
-pub trait AvxU16x8 {}
|
|
-impl AvxU16x8 for u16x8 {}
|
|
-pub trait AvxI16x8 {}
|
|
-impl AvxI16x8 for i16x8 {}
|
|
-
|
|
-pub trait AvxBool16ix8 {}
|
|
-impl AvxBool16ix8 for bool16ix8 {}
|
|
-
|
|
-// 8 bit integers
|
|
-
|
|
-pub trait AvxU8x16 {}
|
|
-impl AvxU8x16 for u8x16 {}
|
|
-pub trait AvxI8x16 {}
|
|
-impl AvxI8x16 for i8x16 {}
|
|
-
|
|
-pub trait AvxBool8ix16 {}
|
|
-impl AvxBool8ix16 for bool8ix16 {}
|
|
diff --git a/third_party/rust/simd/src/x86/avx2.rs b/third_party/rust/simd/src/x86/avx2.rs
|
|
deleted file mode 100644
|
|
index e86a33d3b5bb..000000000000
|
|
--- a/third_party/rust/simd/src/x86/avx2.rs
|
|
+++ /dev/null
|
|
@@ -1,65 +0,0 @@
|
|
-use x86::avx::*;
|
|
-
|
|
-#[allow(dead_code)]
|
|
-extern "platform-intrinsic" {
|
|
- fn x86_mm256_abs_epi8(x: i8x32) -> i8x32;
|
|
- fn x86_mm256_abs_epi16(x: i16x16) -> i16x16;
|
|
- fn x86_mm256_abs_epi32(x: i32x8) -> i32x8;
|
|
- fn x86_mm256_adds_epi8(x: i8x32, y: i8x32) -> i8x32;
|
|
- fn x86_mm256_adds_epu8(x: u8x32, y: u8x32) -> u8x32;
|
|
- fn x86_mm256_adds_epi16(x: i16x16, y: i16x16) -> i16x16;
|
|
- fn x86_mm256_adds_epu16(x: u16x16, y: u16x16) -> u16x16;
|
|
- fn x86_mm256_avg_epu8(x: u8x32, y: u8x32) -> u8x32;
|
|
- fn x86_mm256_avg_epu16(x: u16x16, y: u16x16) -> u16x16;
|
|
- fn x86_mm256_hadd_epi16(x: i16x16, y: i16x16) -> i16x16;
|
|
- fn x86_mm256_hadd_epi32(x: i32x8, y: i32x8) -> i32x8;
|
|
- fn x86_mm256_hadds_epi16(x: i16x16, y: i16x16) -> i16x16;
|
|
- fn x86_mm256_hsub_epi16(x: i16x16, y: i16x16) -> i16x16;
|
|
- fn x86_mm256_hsub_epi32(x: i32x8, y: i32x8) -> i32x8;
|
|
- fn x86_mm256_hsubs_epi16(x: i16x16, y: i16x16) -> i16x16;
|
|
- fn x86_mm256_madd_epi16(x: i16x16, y: i16x16) -> i32x8;
|
|
- fn x86_mm256_maddubs_epi16(x: i8x32, y: i8x32) -> i16x16;
|
|
- fn x86_mm256_max_epi8(x: i8x32, y: i8x32) -> i8x32;
|
|
- fn x86_mm256_max_epu8(x: u8x32, y: u8x32) -> u8x32;
|
|
- fn x86_mm256_max_epi16(x: i16x16, y: i16x16) -> i16x16;
|
|
- fn x86_mm256_max_epu16(x: u16x16, y: u16x16) -> u16x16;
|
|
- fn x86_mm256_max_epi32(x: i32x8, y: i32x8) -> i32x8;
|
|
- fn x86_mm256_max_epu32(x: u32x8, y: u32x8) -> u32x8;
|
|
- fn x86_mm256_min_epi8(x: i8x32, y: i8x32) -> i8x32;
|
|
- fn x86_mm256_min_epu8(x: u8x32, y: u8x32) -> u8x32;
|
|
- fn x86_mm256_min_epi16(x: i16x16, y: i16x16) -> i16x16;
|
|
- fn x86_mm256_min_epu16(x: u16x16, y: u16x16) -> u16x16;
|
|
- fn x86_mm256_min_epi32(x: i32x8, y: i32x8) -> i32x8;
|
|
- fn x86_mm256_min_epu32(x: u32x8, y: u32x8) -> u32x8;
|
|
- fn x86_mm256_mul_epi64(x: i32x8, y: i32x8) -> i64x4;
|
|
- fn x86_mm256_mul_epu64(x: u32x8, y: u32x8) -> u64x4;
|
|
- fn x86_mm256_mulhi_epi16(x: i16x16, y: i16x16) -> i16x16;
|
|
- fn x86_mm256_mulhi_epu16(x: u16x16, y: u16x16) -> u16x16;
|
|
- fn x86_mm256_mulhrs_epi16(x: i16x16, y: i16x16) -> i16x16;
|
|
- fn x86_mm256_packs_epi16(x: i16x16, y: i16x16) -> i8x32;
|
|
- fn x86_mm256_packus_epi16(x: i16x16, y: i16x16) -> u8x32;
|
|
- fn x86_mm256_packs_epi32(x: i32x8, y: i32x8) -> i16x16;
|
|
- fn x86_mm256_packus_epi32(x: i32x8, y: i32x8) -> u16x16;
|
|
- fn x86_mm256_permutevar8x32_epi32(x: i32x8, y: i32x8) -> i32x8;
|
|
- fn x86_mm256_permutevar8x32_ps(x: f32x8, y: i32x8) -> f32x8;
|
|
- fn x86_mm256_sad_epu8(x: u8x32, y: u8x32) -> u64x4;
|
|
- fn x86_mm256_shuffle_epi8(x: i8x32, y: i8x32) -> i8x32;
|
|
- fn x86_mm256_sign_epi8(x: i8x32, y: i8x32) -> i8x32;
|
|
- fn x86_mm256_sign_epi16(x: i16x16, y: i16x16) -> i16x16;
|
|
- fn x86_mm256_sign_epi32(x: i32x8, y: i32x8) -> i32x8;
|
|
- fn x86_mm256_subs_epi8(x: i8x32, y: i8x32) -> i8x32;
|
|
- fn x86_mm256_subs_epu8(x: u8x32, y: u8x32) -> u8x32;
|
|
- fn x86_mm256_subs_epi16(x: i16x16, y: i16x16) -> i16x16;
|
|
- fn x86_mm256_subs_epu16(x: u16x16, y: u16x16) -> u16x16;
|
|
-}
|
|
-
|
|
-// broken on rustc 1.7.0-nightly (1ddaf8bdf 2015-12-12)
|
|
-// pub trait Avx2F32x8 {
|
|
-// fn permutevar(self, other: i32x8) -> f32x8;
|
|
-// }
|
|
-//
|
|
-// impl Avx2F32x8 for f32x8 {
|
|
-// fn permutevar(self, other: i32x8) -> f32x8 {
|
|
-// unsafe { x86_mm256_permutevar8x32_ps(self, other) }
|
|
-// }
|
|
-// }
|
|
diff --git a/third_party/rust/simd/src/x86/mod.rs b/third_party/rust/simd/src/x86/mod.rs
|
|
deleted file mode 100644
|
|
index 8763fb16ccfd..000000000000
|
|
--- a/third_party/rust/simd/src/x86/mod.rs
|
|
+++ /dev/null
|
|
@@ -1,16 +0,0 @@
|
|
-//! Features specific to x86 and x86-64 CPUs.
|
|
-
|
|
-#[cfg(any(feature = "doc", target_feature = "sse2"))]
|
|
-pub mod sse2;
|
|
-#[cfg(any(feature = "doc", target_feature = "sse3"))]
|
|
-pub mod sse3;
|
|
-#[cfg(any(feature = "doc", target_feature = "ssse3"))]
|
|
-pub mod ssse3;
|
|
-#[cfg(any(feature = "doc", target_feature = "sse4.1"))]
|
|
-pub mod sse4_1;
|
|
-#[cfg(any(feature = "doc", target_feature = "sse4.2"))]
|
|
-pub mod sse4_2;
|
|
-#[cfg(any(feature = "doc", target_feature = "avx"))]
|
|
-pub mod avx;
|
|
-#[cfg(any(feature = "doc", target_feature = "avx2"))]
|
|
-pub mod avx2;
|
|
diff --git a/third_party/rust/simd/src/x86/sse2.rs b/third_party/rust/simd/src/x86/sse2.rs
|
|
deleted file mode 100644
|
|
index 5cbc853694d5..000000000000
|
|
--- a/third_party/rust/simd/src/x86/sse2.rs
|
|
+++ /dev/null
|
|
@@ -1,359 +0,0 @@
|
|
-use super::super::*;
|
|
-use {bitcast, simd_cast, f32x2};
|
|
-
|
|
-pub use sixty_four::{f64x2, i64x2, u64x2, bool64ix2, bool64fx2};
|
|
-
|
|
-//pub use super::{u64x2, i64x2, f64x2, bool64ix2, bool64fx2};
|
|
-
|
|
-// strictly speaking, these are SSE instructions, not SSE2.
|
|
-extern "platform-intrinsic" {
|
|
- fn x86_mm_movemask_ps(x: f32x4) -> i32;
|
|
- fn x86_mm_max_ps(x: f32x4, y: f32x4) -> f32x4;
|
|
- fn x86_mm_min_ps(x: f32x4, y: f32x4) -> f32x4;
|
|
- fn x86_mm_rsqrt_ps(x: f32x4) -> f32x4;
|
|
- fn x86_mm_rcp_ps(x: f32x4) -> f32x4;
|
|
- fn x86_mm_sqrt_ps(x: f32x4) -> f32x4;
|
|
-}
|
|
-
|
|
-extern "platform-intrinsic" {
|
|
- fn x86_mm_adds_epi8(x: i8x16, y: i8x16) -> i8x16;
|
|
- fn x86_mm_adds_epu8(x: u8x16, y: u8x16) -> u8x16;
|
|
- fn x86_mm_adds_epi16(x: i16x8, y: i16x8) -> i16x8;
|
|
- fn x86_mm_adds_epu16(x: u16x8, y: u16x8) -> u16x8;
|
|
- fn x86_mm_avg_epu8(x: u8x16, y: u8x16) -> u8x16;
|
|
- fn x86_mm_avg_epu16(x: u16x8, y: u16x8) -> u16x8;
|
|
- fn x86_mm_madd_epi16(x: i16x8, y: i16x8) -> i32x4;
|
|
- fn x86_mm_max_epi16(x: i16x8, y: i16x8) -> i16x8;
|
|
- fn x86_mm_max_epu8(x: u8x16, y: u8x16) -> u8x16;
|
|
- fn x86_mm_max_pd(x: f64x2, y: f64x2) -> f64x2;
|
|
- fn x86_mm_min_epi16(x: i16x8, y: i16x8) -> i16x8;
|
|
- fn x86_mm_min_epu8(x: u8x16, y: u8x16) -> u8x16;
|
|
- fn x86_mm_min_pd(x: f64x2, y: f64x2) -> f64x2;
|
|
- fn x86_mm_movemask_pd(x: f64x2) -> i32;
|
|
- fn x86_mm_movemask_epi8(x: i8x16) -> i32;
|
|
- fn x86_mm_mul_epu32(x: u32x4, y: u32x4) -> u64x2;
|
|
- fn x86_mm_mulhi_epi16(x: i16x8, y: i16x8) -> i16x8;
|
|
- fn x86_mm_mulhi_epu16(x: u16x8, y: u16x8) -> u16x8;
|
|
- fn x86_mm_packs_epi16(x: i16x8, y: i16x8) -> i8x16;
|
|
- fn x86_mm_packs_epi32(x: i32x4, y: i32x4) -> i16x8;
|
|
- fn x86_mm_packus_epi16(x: i16x8, y: i16x8) -> u8x16;
|
|
- fn x86_mm_sad_epu8(x: u8x16, y: u8x16) -> u64x2;
|
|
- fn x86_mm_sqrt_pd(x: f64x2) -> f64x2;
|
|
- fn x86_mm_subs_epi8(x: i8x16, y: i8x16) -> i8x16;
|
|
- fn x86_mm_subs_epu8(x: u8x16, y: u8x16) -> u8x16;
|
|
- fn x86_mm_subs_epi16(x: i16x8, y: i16x8) -> i16x8;
|
|
- fn x86_mm_subs_epu16(x: u16x8, y: u16x8) -> u16x8;
|
|
-}
|
|
-
|
|
-#[doc(hidden)]
|
|
-pub mod common {
|
|
- use super::super::super::*;
|
|
- use core::mem;
|
|
-
|
|
- #[inline]
|
|
- pub fn f32x4_sqrt(x: f32x4) -> f32x4 {
|
|
- unsafe {super::x86_mm_sqrt_ps(x)}
|
|
- }
|
|
- #[inline]
|
|
- pub fn f32x4_approx_rsqrt(x: f32x4) -> f32x4 {
|
|
- unsafe {super::x86_mm_rsqrt_ps(x)}
|
|
- }
|
|
- #[inline]
|
|
- pub fn f32x4_approx_reciprocal(x: f32x4) -> f32x4 {
|
|
- unsafe {super::x86_mm_rcp_ps(x)}
|
|
- }
|
|
- #[inline]
|
|
- pub fn f32x4_max(x: f32x4, y: f32x4) -> f32x4 {
|
|
- unsafe {super::x86_mm_max_ps(x, y)}
|
|
- }
|
|
- #[inline]
|
|
- pub fn f32x4_min(x: f32x4, y: f32x4) -> f32x4 {
|
|
- unsafe {super::x86_mm_min_ps(x, y)}
|
|
- }
|
|
-
|
|
- macro_rules! bools {
|
|
- ($($ty: ty, $all: ident, $any: ident, $movemask: ident, $width: expr;)*) => {
|
|
- $(
|
|
- #[inline]
|
|
- pub fn $all(x: $ty) -> bool {
|
|
- unsafe {
|
|
- super::$movemask(mem::transmute(x)) == (1 << $width) - 1
|
|
- }
|
|
- }
|
|
- #[inline]
|
|
- pub fn $any(x: $ty) -> bool {
|
|
- unsafe {
|
|
- super::$movemask(mem::transmute(x)) != 0
|
|
- }
|
|
- }
|
|
- )*
|
|
- }
|
|
- }
|
|
-
|
|
- bools! {
|
|
- bool32fx4, bool32fx4_all, bool32fx4_any, x86_mm_movemask_ps, 4;
|
|
- bool8ix16, bool8ix16_all, bool8ix16_any, x86_mm_movemask_epi8, 16;
|
|
- bool16ix8, bool16ix8_all, bool16ix8_any, x86_mm_movemask_epi8, 16;
|
|
- bool32ix4, bool32ix4_all, bool32ix4_any, x86_mm_movemask_epi8, 16;
|
|
- }
|
|
-}
|
|
-
|
|
-// 32 bit floats
|
|
-
|
|
-pub trait Sse2F32x4 {
|
|
- fn to_f64(self) -> f64x2;
|
|
- fn move_mask(self) -> u32;
|
|
-}
|
|
-impl Sse2F32x4 for f32x4 {
|
|
- #[inline]
|
|
- fn to_f64(self) -> f64x2 {
|
|
- unsafe {
|
|
- simd_cast(f32x2(self.0, self.1))
|
|
- }
|
|
- }
|
|
- fn move_mask(self) -> u32 {
|
|
- unsafe {x86_mm_movemask_ps(self) as u32}
|
|
- }
|
|
-}
|
|
-pub trait Sse2Bool32fx4 {
|
|
- fn move_mask(self) -> u32;
|
|
-}
|
|
-impl Sse2Bool32fx4 for bool32fx4 {
|
|
- #[inline]
|
|
- fn move_mask(self) -> u32 {
|
|
- unsafe { x86_mm_movemask_ps(bitcast(self)) as u32}
|
|
- }
|
|
-}
|
|
-
|
|
-// 64 bit floats
|
|
-
|
|
-pub trait Sse2F64x2 {
|
|
- fn move_mask(self) -> u32;
|
|
- fn sqrt(self) -> Self;
|
|
- fn max(self, other: Self) -> Self;
|
|
- fn min(self, other: Self) -> Self;
|
|
-}
|
|
-impl Sse2F64x2 for f64x2 {
|
|
- #[inline]
|
|
- fn move_mask(self) -> u32 {
|
|
- unsafe { x86_mm_movemask_pd(bitcast(self)) as u32}
|
|
- }
|
|
-
|
|
- #[inline]
|
|
- fn sqrt(self) -> Self {
|
|
- unsafe { x86_mm_sqrt_pd(self) }
|
|
- }
|
|
-
|
|
- #[inline]
|
|
- fn max(self, other: Self) -> Self {
|
|
- unsafe { x86_mm_max_pd(self, other) }
|
|
- }
|
|
- #[inline]
|
|
- fn min(self, other: Self) -> Self {
|
|
- unsafe { x86_mm_min_pd(self, other) }
|
|
- }
|
|
-}
|
|
-
|
|
-pub trait Sse2Bool64fx2 {
|
|
- fn move_mask(self) -> u32;
|
|
-}
|
|
-impl Sse2Bool64fx2 for bool64fx2 {
|
|
- #[inline]
|
|
- fn move_mask(self) -> u32 {
|
|
- unsafe { x86_mm_movemask_pd(bitcast(self)) as u32}
|
|
- }
|
|
-}
|
|
-
|
|
-// 64 bit ints
|
|
-
|
|
-pub trait Sse2U64x2 {}
|
|
-impl Sse2U64x2 for u64x2 {}
|
|
-
|
|
-pub trait Sse2I64x2 {}
|
|
-impl Sse2I64x2 for i64x2 {}
|
|
-
|
|
-pub trait Sse2Bool64ix2 {}
|
|
-impl Sse2Bool64ix2 for bool64ix2 {}
|
|
-
|
|
-// 32 bit ints
|
|
-
|
|
-pub trait Sse2U32x4 {
|
|
- fn low_mul(self, other: Self) -> u64x2;
|
|
-}
|
|
-impl Sse2U32x4 for u32x4 {
|
|
- #[inline]
|
|
- fn low_mul(self, other: Self) -> u64x2 {
|
|
- unsafe { x86_mm_mul_epu32(self, other) }
|
|
- }
|
|
-}
|
|
-
|
|
-pub trait Sse2I32x4 {
|
|
- fn packs(self, other: Self) -> i16x8;
|
|
-}
|
|
-impl Sse2I32x4 for i32x4 {
|
|
- #[inline]
|
|
- fn packs(self, other: Self) -> i16x8 {
|
|
- unsafe { x86_mm_packs_epi32(self, other) }
|
|
- }
|
|
-}
|
|
-
|
|
-pub trait Sse2Bool32ix4 {}
|
|
-impl Sse2Bool32ix4 for bool32ix4 {}
|
|
-
|
|
-// 16 bit ints
|
|
-
|
|
-pub trait Sse2U16x8 {
|
|
- fn adds(self, other: Self) -> Self;
|
|
- fn subs(self, other: Self) -> Self;
|
|
- fn avg(self, other: Self) -> Self;
|
|
- fn mulhi(self, other: Self) -> Self;
|
|
-}
|
|
-impl Sse2U16x8 for u16x8 {
|
|
- #[inline]
|
|
- fn adds(self, other: Self) -> Self {
|
|
- unsafe { x86_mm_adds_epu16(self, other) }
|
|
- }
|
|
- #[inline]
|
|
- fn subs(self, other: Self) -> Self {
|
|
- unsafe { x86_mm_subs_epu16(self, other) }
|
|
- }
|
|
-
|
|
- #[inline]
|
|
- fn avg(self, other: Self) -> Self {
|
|
- unsafe { x86_mm_avg_epu16(self, other) }
|
|
- }
|
|
-
|
|
- #[inline]
|
|
- fn mulhi(self, other: Self) -> Self {
|
|
- unsafe { x86_mm_mulhi_epu16(self, other) }
|
|
- }
|
|
-}
|
|
-
|
|
-pub trait Sse2I16x8 {
|
|
- fn adds(self, other: Self) -> Self;
|
|
- fn subs(self, other: Self) -> Self;
|
|
- fn madd(self, other: Self) -> i32x4;
|
|
- fn max(self, other: Self) -> Self;
|
|
- fn min(self, other: Self) -> Self;
|
|
- fn mulhi(self, other: Self) -> Self;
|
|
- fn packs(self, other: Self) -> i8x16;
|
|
- fn packus(self, other: Self) -> u8x16;
|
|
-}
|
|
-impl Sse2I16x8 for i16x8 {
|
|
- #[inline]
|
|
- fn adds(self, other: Self) -> Self {
|
|
- unsafe { x86_mm_adds_epi16(self, other) }
|
|
- }
|
|
- #[inline]
|
|
- fn subs(self, other: Self) -> Self {
|
|
- unsafe { x86_mm_subs_epi16(self, other) }
|
|
- }
|
|
-
|
|
- #[inline]
|
|
- fn madd(self, other: Self) -> i32x4 {
|
|
- unsafe { x86_mm_madd_epi16(self, other) }
|
|
- }
|
|
-
|
|
- #[inline]
|
|
- fn max(self, other: Self) -> Self {
|
|
- unsafe { x86_mm_max_epi16(self, other) }
|
|
- }
|
|
- #[inline]
|
|
- fn min(self, other: Self) -> Self {
|
|
- unsafe { x86_mm_min_epi16(self, other) }
|
|
- }
|
|
-
|
|
- #[inline]
|
|
- fn mulhi(self, other: Self) -> Self {
|
|
- unsafe { x86_mm_mulhi_epi16(self, other) }
|
|
- }
|
|
-
|
|
- #[inline]
|
|
- fn packs(self, other: Self) -> i8x16 {
|
|
- unsafe { x86_mm_packs_epi16(self, other) }
|
|
- }
|
|
- #[inline]
|
|
- fn packus(self, other: Self) -> u8x16 {
|
|
- unsafe { x86_mm_packus_epi16(self, other) }
|
|
- }
|
|
-}
|
|
-
|
|
-pub trait Sse2Bool16ix8 {}
|
|
-impl Sse2Bool16ix8 for bool16ix8 {}
|
|
-
|
|
-// 8 bit ints
|
|
-
|
|
-pub trait Sse2U8x16 {
|
|
- fn move_mask(self) -> u32;
|
|
- fn adds(self, other: Self) -> Self;
|
|
- fn subs(self, other: Self) -> Self;
|
|
- fn avg(self, other: Self) -> Self;
|
|
- fn max(self, other: Self) -> Self;
|
|
- fn min(self, other: Self) -> Self;
|
|
- fn sad(self, other: Self) -> u64x2;
|
|
-}
|
|
-impl Sse2U8x16 for u8x16 {
|
|
- #[inline]
|
|
- fn move_mask(self) -> u32 {
|
|
- unsafe { x86_mm_movemask_epi8(bitcast(self)) as u32}
|
|
- }
|
|
-
|
|
- #[inline]
|
|
- fn adds(self, other: Self) -> Self {
|
|
- unsafe { x86_mm_adds_epu8(self, other) }
|
|
- }
|
|
- #[inline]
|
|
- fn subs(self, other: Self) -> Self {
|
|
- unsafe { x86_mm_subs_epu8(self, other) }
|
|
- }
|
|
-
|
|
- #[inline]
|
|
- fn avg(self, other: Self) -> Self {
|
|
- unsafe { x86_mm_avg_epu8(self, other) }
|
|
- }
|
|
-
|
|
- #[inline]
|
|
- fn max(self, other: Self) -> Self {
|
|
- unsafe { x86_mm_max_epu8(self, other) }
|
|
- }
|
|
- #[inline]
|
|
- fn min(self, other: Self) -> Self {
|
|
- unsafe { x86_mm_min_epu8(self, other) }
|
|
- }
|
|
-
|
|
- #[inline]
|
|
- fn sad(self, other: Self) -> u64x2 {
|
|
- unsafe { x86_mm_sad_epu8(self, other) }
|
|
- }
|
|
-}
|
|
-
|
|
-pub trait Sse2I8x16 {
|
|
- fn move_mask(self) -> u32;
|
|
- fn adds(self, other: Self) -> Self;
|
|
- fn subs(self, other: Self) -> Self;
|
|
-}
|
|
-impl Sse2I8x16 for i8x16 {
|
|
- #[inline]
|
|
- fn move_mask(self) -> u32 {
|
|
- unsafe { x86_mm_movemask_epi8(bitcast(self)) as u32}
|
|
- }
|
|
-
|
|
- #[inline]
|
|
- fn adds(self, other: Self) -> Self {
|
|
- unsafe { x86_mm_adds_epi8(self, other) }
|
|
- }
|
|
- #[inline]
|
|
- fn subs(self, other: Self) -> Self {
|
|
- unsafe { x86_mm_subs_epi8(self, other) }
|
|
- }
|
|
-}
|
|
-
|
|
-pub trait Sse2Bool8ix16 {
|
|
- fn move_mask(self) -> u32;
|
|
-}
|
|
-impl Sse2Bool8ix16 for bool8ix16 {
|
|
- #[inline]
|
|
- fn move_mask(self) -> u32 {
|
|
- unsafe { x86_mm_movemask_epi8(bitcast(self)) as u32}
|
|
- }
|
|
-}
|
|
diff --git a/third_party/rust/simd/src/x86/sse3.rs b/third_party/rust/simd/src/x86/sse3.rs
|
|
deleted file mode 100644
|
|
index bd70b569f9c0..000000000000
|
|
--- a/third_party/rust/simd/src/x86/sse3.rs
|
|
+++ /dev/null
|
|
@@ -1,57 +0,0 @@
|
|
-use sixty_four::*;
|
|
-use super::super::*;
|
|
-
|
|
-extern "platform-intrinsic" {
|
|
- fn x86_mm_addsub_ps(x: f32x4, y: f32x4) -> f32x4;
|
|
- fn x86_mm_addsub_pd(x: f64x2, y: f64x2) -> f64x2;
|
|
- fn x86_mm_hadd_ps(x: f32x4, y: f32x4) -> f32x4;
|
|
- fn x86_mm_hadd_pd(x: f64x2, y: f64x2) -> f64x2;
|
|
- fn x86_mm_hsub_ps(x: f32x4, y: f32x4) -> f32x4;
|
|
- fn x86_mm_hsub_pd(x: f64x2, y: f64x2) -> f64x2;
|
|
-}
|
|
-
|
|
-pub trait Sse3F32x4 {
|
|
- fn addsub(self, other: Self) -> Self;
|
|
- fn hadd(self, other: Self) -> Self;
|
|
- fn hsub(self, other: Self) -> Self;
|
|
-}
|
|
-
|
|
-impl Sse3F32x4 for f32x4 {
|
|
- #[inline]
|
|
- fn addsub(self, other: Self) -> Self {
|
|
- unsafe { x86_mm_addsub_ps(self, other) }
|
|
- }
|
|
-
|
|
- #[inline]
|
|
- fn hadd(self, other: Self) -> Self {
|
|
- unsafe { x86_mm_hadd_ps(self, other) }
|
|
- }
|
|
-
|
|
- #[inline]
|
|
- fn hsub(self, other: Self) -> Self {
|
|
- unsafe { x86_mm_hsub_ps(self, other) }
|
|
- }
|
|
-}
|
|
-
|
|
-pub trait Sse3F64x2 {
|
|
- fn addsub(self, other: Self) -> Self;
|
|
- fn hadd(self, other: Self) -> Self;
|
|
- fn hsub(self, other: Self) -> Self;
|
|
-}
|
|
-
|
|
-impl Sse3F64x2 for f64x2 {
|
|
- #[inline]
|
|
- fn addsub(self, other: Self) -> Self {
|
|
- unsafe { x86_mm_addsub_pd(self, other) }
|
|
- }
|
|
-
|
|
- #[inline]
|
|
- fn hadd(self, other: Self) -> Self {
|
|
- unsafe { x86_mm_hadd_pd(self, other) }
|
|
- }
|
|
-
|
|
- #[inline]
|
|
- fn hsub(self, other: Self) -> Self {
|
|
- unsafe { x86_mm_hsub_pd(self, other) }
|
|
- }
|
|
-}
|
|
diff --git a/third_party/rust/simd/src/x86/sse4_1.rs b/third_party/rust/simd/src/x86/sse4_1.rs
|
|
deleted file mode 100644
|
|
index fa44678a0584..000000000000
|
|
--- a/third_party/rust/simd/src/x86/sse4_1.rs
|
|
+++ /dev/null
|
|
@@ -1,155 +0,0 @@
|
|
-use super::super::*;
|
|
-use x86::sse2::*;
|
|
-
|
|
-#[allow(dead_code)]
|
|
-extern "platform-intrinsic" {
|
|
- fn x86_mm_dp_ps(x: f32x4, y: f32x4, z: i32) -> f32x4;
|
|
- fn x86_mm_dp_pd(x: f64x2, y: f64x2, z: i32) -> f64x2;
|
|
- fn x86_mm_max_epi8(x: i8x16, y: i8x16) -> i8x16;
|
|
- fn x86_mm_max_epu16(x: u16x8, y: u16x8) -> u16x8;
|
|
- fn x86_mm_max_epi32(x: i32x4, y: i32x4) -> i32x4;
|
|
- fn x86_mm_max_epu32(x: u32x4, y: u32x4) -> u32x4;
|
|
- fn x86_mm_min_epi8(x: i8x16, y: i8x16) -> i8x16;
|
|
- fn x86_mm_min_epu16(x: u16x8, y: u16x8) -> u16x8;
|
|
- fn x86_mm_min_epi32(x: i32x4, y: i32x4) -> i32x4;
|
|
- fn x86_mm_min_epu32(x: u32x4, y: u32x4) -> u32x4;
|
|
- fn x86_mm_minpos_epu16(x: u16x8) -> u16x8;
|
|
- fn x86_mm_mpsadbw_epu8(x: u8x16, y: u8x16, z: i32) -> u16x8;
|
|
- fn x86_mm_mul_epi32(x: i32x4, y: i32x4) -> i64x2;
|
|
- fn x86_mm_packus_epi32(x: i32x4, y: i32x4) -> u16x8;
|
|
- fn x86_mm_testc_si128(x: u64x2, y: u64x2) -> i32;
|
|
- fn x86_mm_testnzc_si128(x: u64x2, y: u64x2) -> i32;
|
|
- fn x86_mm_testz_si128(x: u64x2, y: u64x2) -> i32;
|
|
-}
|
|
-
|
|
-// 32 bit floats
|
|
-
|
|
-pub trait Sse41F32x4 {}
|
|
-impl Sse41F32x4 for f32x4 {}
|
|
-
|
|
-// 64 bit floats
|
|
-
|
|
-pub trait Sse41F64x2 {}
|
|
-impl Sse41F64x2 for f64x2 {}
|
|
-
|
|
-// 64 bit integers
|
|
-
|
|
-pub trait Sse41U64x2 {
|
|
- fn testc(self, other: Self) -> i32;
|
|
- fn testnzc(self, other: Self) -> i32;
|
|
- fn testz(self, other: Self) -> i32;
|
|
-}
|
|
-impl Sse41U64x2 for u64x2 {
|
|
- #[inline]
|
|
- fn testc(self, other: Self) -> i32 {
|
|
- unsafe { x86_mm_testc_si128(self, other) }
|
|
- }
|
|
- #[inline]
|
|
- fn testnzc(self, other: Self) -> i32 {
|
|
- unsafe { x86_mm_testnzc_si128(self, other) }
|
|
- }
|
|
- #[inline]
|
|
- fn testz(self, other: Self) -> i32 {
|
|
- unsafe { x86_mm_testz_si128(self, other) }
|
|
- }
|
|
-}
|
|
-pub trait Sse41I64x2 {}
|
|
-impl Sse41I64x2 for i64x2 {}
|
|
-
|
|
-pub trait Sse41Bool64ix2 {}
|
|
-impl Sse41Bool64ix2 for bool64ix2 {}
|
|
-
|
|
-// 32 bit integers
|
|
-
|
|
-pub trait Sse41U32x4 {
|
|
- fn max(self, other: Self) -> Self;
|
|
- fn min(self, other: Self) -> Self;
|
|
-}
|
|
-impl Sse41U32x4 for u32x4 {
|
|
- #[inline]
|
|
- fn max(self, other: Self) -> Self {
|
|
- unsafe { x86_mm_max_epu32(self, other) }
|
|
- }
|
|
- #[inline]
|
|
- fn min(self, other: Self) -> Self {
|
|
- unsafe { x86_mm_min_epu32(self, other) }
|
|
- }
|
|
-}
|
|
-pub trait Sse41I32x4 {
|
|
- fn max(self, other: Self) -> Self;
|
|
- fn min(self, other: Self) -> Self;
|
|
- fn low_mul(self, other: Self) -> i64x2;
|
|
- fn packus(self, other: Self) -> u16x8;
|
|
-}
|
|
-impl Sse41I32x4 for i32x4 {
|
|
- #[inline]
|
|
- fn max(self, other: Self) -> Self {
|
|
- unsafe { x86_mm_max_epi32(self, other) }
|
|
- }
|
|
- #[inline]
|
|
- fn min(self, other: Self) -> Self {
|
|
- unsafe { x86_mm_min_epi32(self, other) }
|
|
- }
|
|
-
|
|
- #[inline]
|
|
- fn low_mul(self, other: Self) -> i64x2 {
|
|
- unsafe { x86_mm_mul_epi32(self, other) }
|
|
- }
|
|
- #[inline]
|
|
- fn packus(self, other: Self) -> u16x8 {
|
|
- unsafe { x86_mm_packus_epi32(self, other) }
|
|
- }
|
|
-}
|
|
-
|
|
-pub trait Sse41Bool32ix4 {}
|
|
-impl Sse41Bool32ix4 for bool32ix4 {}
|
|
-
|
|
-// 16 bit integers
|
|
-
|
|
-pub trait Sse41U16x8 {
|
|
- fn max(self, other: Self) -> Self;
|
|
- fn min(self, other: Self) -> Self;
|
|
- fn minpos(self) -> Self;
|
|
-}
|
|
-impl Sse41U16x8 for u16x8 {
|
|
- #[inline]
|
|
- fn max(self, other: Self) -> Self {
|
|
- unsafe { x86_mm_max_epu16(self, other) }
|
|
- }
|
|
- #[inline]
|
|
- fn min(self, other: Self) -> Self {
|
|
- unsafe { x86_mm_min_epu16(self, other) }
|
|
- }
|
|
-
|
|
- #[inline]
|
|
- fn minpos(self) -> Self {
|
|
- unsafe { x86_mm_minpos_epu16(self) }
|
|
- }
|
|
-}
|
|
-pub trait Sse41I16x8 {}
|
|
-impl Sse41I16x8 for i16x8 {}
|
|
-
|
|
-pub trait Sse41Bool16ix8 {}
|
|
-impl Sse41Bool16ix8 for bool16ix8 {}
|
|
-
|
|
-// 8 bit integers
|
|
-
|
|
-pub trait Sse41U8x16 {}
|
|
-impl Sse41U8x16 for u8x16 {}
|
|
-pub trait Sse41I8x16 {
|
|
- fn max(self, other: Self) -> Self;
|
|
- fn min(self, other: Self) -> Self;
|
|
-}
|
|
-impl Sse41I8x16 for i8x16 {
|
|
- #[inline]
|
|
- fn max(self, other: Self) -> Self {
|
|
- unsafe { x86_mm_max_epi8(self, other) }
|
|
- }
|
|
- #[inline]
|
|
- fn min(self, other: Self) -> Self {
|
|
- unsafe { x86_mm_min_epi8(self, other) }
|
|
- }
|
|
-}
|
|
-
|
|
-pub trait Sse41Bool8ix16 {}
|
|
-impl Sse41Bool8ix16 for bool8ix16 {}
|
|
diff --git a/third_party/rust/simd/src/x86/sse4_2.rs b/third_party/rust/simd/src/x86/sse4_2.rs
|
|
deleted file mode 100644
|
|
index 5afe4583cf71..000000000000
|
|
--- a/third_party/rust/simd/src/x86/sse4_2.rs
|
|
+++ /dev/null
|
|
@@ -1,19 +0,0 @@
|
|
-use i8x16;
|
|
-
|
|
-#[allow(dead_code)]
|
|
-extern "platform-intrinsic" {
|
|
- fn x86_mm_cmpestra(x: i8x16, y: i32, z: i8x16, w: i32, a: i32) -> i32;
|
|
- fn x86_mm_cmpestrc(x: i8x16, y: i32, z: i8x16, w: i32, a: i32) -> i32;
|
|
- fn x86_mm_cmpestri(x: i8x16, y: i32, z: i8x16, w: i32, a: i32) -> i32;
|
|
- fn x86_mm_cmpestrm(x: i8x16, y: i32, z: i8x16, w: i32, a: i32) -> i8x16;
|
|
- fn x86_mm_cmpestro(x: i8x16, y: i32, z: i8x16, w: i32, a: i32) -> i32;
|
|
- fn x86_mm_cmpestrs(x: i8x16, y: i32, z: i8x16, w: i32, a: i32) -> i32;
|
|
- fn x86_mm_cmpestrz(x: i8x16, y: i32, z: i8x16, w: i32, a: i32) -> i32;
|
|
- fn x86_mm_cmpistra(x: i8x16, y: i8x16, z: i32) -> i32;
|
|
- fn x86_mm_cmpistrc(x: i8x16, y: i8x16, z: i32) -> i32;
|
|
- fn x86_mm_cmpistri(x: i8x16, y: i8x16, z: i32) -> i32;
|
|
- fn x86_mm_cmpistrm(x: i8x16, y: i8x16, z: i32) -> i8x16;
|
|
- fn x86_mm_cmpistro(x: i8x16, y: i8x16, z: i32) -> i32;
|
|
- fn x86_mm_cmpistrs(x: i8x16, y: i8x16, z: i32) -> i32;
|
|
- fn x86_mm_cmpistrz(x: i8x16, y: i8x16, z: i32) -> i32;
|
|
-}
|
|
diff --git a/third_party/rust/simd/src/x86/ssse3.rs b/third_party/rust/simd/src/x86/ssse3.rs
|
|
deleted file mode 100644
|
|
index aa22a08a68a4..000000000000
|
|
--- a/third_party/rust/simd/src/x86/ssse3.rs
|
|
+++ /dev/null
|
|
@@ -1,172 +0,0 @@
|
|
-use super::super::*;
|
|
-use bitcast;
|
|
-
|
|
-macro_rules! bitcast {
|
|
- ($func: ident($a: ident, $b: ident)) => {
|
|
- bitcast($func(bitcast($a), bitcast($b)))
|
|
- }
|
|
-}
|
|
-
|
|
-extern "platform-intrinsic" {
|
|
- fn x86_mm_abs_epi8(x: i8x16) -> i8x16;
|
|
- fn x86_mm_abs_epi16(x: i16x8) -> i16x8;
|
|
- fn x86_mm_abs_epi32(x: i32x4) -> i32x4;
|
|
- fn x86_mm_hadd_epi16(x: i16x8, y: i16x8) -> i16x8;
|
|
- fn x86_mm_hadd_epi32(x: i32x4, y: i32x4) -> i32x4;
|
|
- fn x86_mm_hadds_epi16(x: i16x8, y: i16x8) -> i16x8;
|
|
- fn x86_mm_hsub_epi16(x: i16x8, y: i16x8) -> i16x8;
|
|
- fn x86_mm_hsub_epi32(x: i32x4, y: i32x4) -> i32x4;
|
|
- fn x86_mm_hsubs_epi16(x: i16x8, y: i16x8) -> i16x8;
|
|
- fn x86_mm_maddubs_epi16(x: u8x16, y: i8x16) -> i16x8;
|
|
- fn x86_mm_mulhrs_epi16(x: i16x8, y: i16x8) -> i16x8;
|
|
- fn x86_mm_shuffle_epi8(x: i8x16, y: i8x16) -> i8x16;
|
|
- fn x86_mm_sign_epi8(x: i8x16, y: i8x16) -> i8x16;
|
|
- fn x86_mm_sign_epi16(x: i16x8, y: i16x8) -> i16x8;
|
|
- fn x86_mm_sign_epi32(x: i32x4, y: i32x4) -> i32x4;
|
|
-}
|
|
-
|
|
-// 32 bit integers
|
|
-
|
|
-pub trait Ssse3I32x4 {
|
|
- fn abs(self) -> Self;
|
|
- fn hadd(self, other: Self) -> Self;
|
|
- fn hsub(self, other: Self) -> Self;
|
|
- fn sign(self, other: Self) -> Self;
|
|
-}
|
|
-impl Ssse3I32x4 for i32x4 {
|
|
- #[inline]
|
|
- fn abs(self) -> Self {
|
|
- unsafe { x86_mm_abs_epi32(self) }
|
|
- }
|
|
-
|
|
- #[inline]
|
|
- fn hadd(self, other: Self) -> Self {
|
|
- unsafe { x86_mm_hadd_epi32(self, other) }
|
|
- }
|
|
- #[inline]
|
|
- fn hsub(self, other: Self) -> Self {
|
|
- unsafe { x86_mm_hsub_epi32(self, other) }
|
|
- }
|
|
-
|
|
- #[inline]
|
|
- fn sign(self, other: Self) -> Self {
|
|
- unsafe { x86_mm_sign_epi32(self, other) }
|
|
- }
|
|
-}
|
|
-
|
|
-pub trait Ssse3U32x4 {
|
|
- fn hadd(self, other: Self) -> Self;
|
|
- fn hsub(self, other: Self) -> Self;
|
|
-}
|
|
-impl Ssse3U32x4 for u32x4 {
|
|
- #[inline]
|
|
- fn hadd(self, other: Self) -> Self {
|
|
- unsafe { bitcast!(x86_mm_hadd_epi32(self, other)) }
|
|
- }
|
|
- #[inline]
|
|
- fn hsub(self, other: Self) -> Self {
|
|
- unsafe { bitcast!(x86_mm_hsub_epi32(self, other)) }
|
|
- }
|
|
-}
|
|
-
|
|
-// 16 bit integers
|
|
-
|
|
-pub trait Ssse3I16x8 {
|
|
- fn abs(self) -> Self;
|
|
- fn hadd(self, other: Self) -> Self;
|
|
- fn hadds(self, other: Self) -> Self;
|
|
- fn hsub(self, other: Self) -> Self;
|
|
- fn hsubs(self, other: Self) -> Self;
|
|
- fn sign(self, other: Self) -> Self;
|
|
- fn mulhrs(self, other: Self) -> Self;
|
|
-}
|
|
-impl Ssse3I16x8 for i16x8 {
|
|
- #[inline]
|
|
- fn abs(self) -> Self {
|
|
- unsafe { x86_mm_abs_epi16(self) }
|
|
- }
|
|
-
|
|
- #[inline]
|
|
- fn hadd(self, other: Self) -> Self {
|
|
- unsafe { x86_mm_hadd_epi16(self, other) }
|
|
- }
|
|
- #[inline]
|
|
- fn hadds(self, other: Self) -> Self {
|
|
- unsafe { x86_mm_hadds_epi16(self, other) }
|
|
- }
|
|
- #[inline]
|
|
- fn hsub(self, other: Self) -> Self {
|
|
- unsafe { x86_mm_hsub_epi16(self, other) }
|
|
- }
|
|
- #[inline]
|
|
- fn hsubs(self, other: Self) -> Self {
|
|
- unsafe { x86_mm_hsubs_epi16(self, other) }
|
|
- }
|
|
-
|
|
- #[inline]
|
|
- fn sign(self, other: Self) -> Self {
|
|
- unsafe { x86_mm_sign_epi16(self, other) }
|
|
- }
|
|
-
|
|
- #[inline]
|
|
- fn mulhrs(self, other: Self) -> Self {
|
|
- unsafe { x86_mm_mulhrs_epi16(self, other) }
|
|
- }
|
|
-}
|
|
-
|
|
-pub trait Ssse3U16x8 {
|
|
- fn hadd(self, other: Self) -> Self;
|
|
- fn hsub(self, other: Self) -> Self;
|
|
-}
|
|
-impl Ssse3U16x8 for u16x8 {
|
|
- #[inline]
|
|
- fn hadd(self, other: Self) -> Self {
|
|
- unsafe { bitcast!(x86_mm_hadd_epi16(self, other)) }
|
|
- }
|
|
- #[inline]
|
|
- fn hsub(self, other: Self) -> Self {
|
|
- unsafe { bitcast!(x86_mm_hsub_epi16(self, other)) }
|
|
- }
|
|
-}
|
|
-
|
|
-
|
|
-// 8 bit integers
|
|
-
|
|
-pub trait Ssse3U8x16 {
|
|
- fn shuffle_bytes(self, indices: Self) -> Self;
|
|
- fn maddubs(self, other: i8x16) -> i16x8;
|
|
-}
|
|
-
|
|
-impl Ssse3U8x16 for u8x16 {
|
|
- #[inline]
|
|
- fn shuffle_bytes(self, indices: Self) -> Self {
|
|
- unsafe {bitcast!(x86_mm_shuffle_epi8(self, indices))}
|
|
- }
|
|
-
|
|
- fn maddubs(self, other: i8x16) -> i16x8 {
|
|
- unsafe { x86_mm_maddubs_epi16(self, other) }
|
|
- }
|
|
-}
|
|
-
|
|
-pub trait Ssse3I8x16 {
|
|
- fn abs(self) -> Self;
|
|
- fn shuffle_bytes(self, indices: Self) -> Self;
|
|
- fn sign(self, other: Self) -> Self;
|
|
-}
|
|
-impl Ssse3I8x16 for i8x16 {
|
|
- #[inline]
|
|
- fn abs(self) -> Self {
|
|
- unsafe {x86_mm_abs_epi8(self)}
|
|
- }
|
|
- #[inline]
|
|
- fn shuffle_bytes(self, indices: Self) -> Self {
|
|
- unsafe {
|
|
- x86_mm_shuffle_epi8(self, indices)
|
|
- }
|
|
- }
|
|
-
|
|
- #[inline]
|
|
- fn sign(self, other: Self) -> Self {
|
|
- unsafe { x86_mm_sign_epi8(self, other) }
|
|
- }
|
|
-}
|
|
diff --git a/toolkit/moz.configure b/toolkit/moz.configure
|
|
index c2b3c768cba9..c3f3de62f09a 100644
|
|
--- a/toolkit/moz.configure
|
|
+++ b/toolkit/moz.configure
|
|
@@ -696,14 +696,11 @@ set_config('MOZ_ENABLE_WEBRENDER', webrender.enable)
|
|
option('--enable-rust-simd', env='MOZ_RUST_SIMD',
|
|
help='Enable explicit SIMD in Rust code.')
|
|
|
|
-@depends('--enable-rust-simd', target, rustc_info)
|
|
-def rust_simd(value, target, rustc_info):
|
|
- # As of 2018-06-05, the simd crate only works on aarch64,
|
|
- # armv7, x86 and x86_64.
|
|
+@depends('--enable-rust-simd', target)
|
|
+def rust_simd(value, target):
|
|
+ # As of 2019-03-04, the simd-accel feature of encoding_rs has not
|
|
+ # been properly set up outside aarch64, armv7, x86 and x86_64.
|
|
if target.cpu in ('aarch64', 'arm', 'x86', 'x86_64') and value:
|
|
- if rustc_info and rustc_info.version >= Version('1.33.0'):
|
|
- die('--enable-rust-simd does not work with Rust 1.33 or later. '
|
|
- 'See https://bugzilla.mozilla.org/show_bug.cgi?id=1521249 .')
|
|
return True
|
|
|
|
set_config('MOZ_RUST_SIMD', rust_simd)
|
|
--
|
|
2.21.0
|
|
|