aboutsummaryrefslogtreecommitdiff
path: root/externals/dynarmic/tests/rsqrt_test.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'externals/dynarmic/tests/rsqrt_test.cpp')
-rw-r--r--externals/dynarmic/tests/rsqrt_test.cpp152
1 files changed, 152 insertions, 0 deletions
diff --git a/externals/dynarmic/tests/rsqrt_test.cpp b/externals/dynarmic/tests/rsqrt_test.cpp
new file mode 100644
index 0000000000..7adbe39e12
--- /dev/null
+++ b/externals/dynarmic/tests/rsqrt_test.cpp
@@ -0,0 +1,152 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2021 MerryMage
+ * SPDX-License-Identifier: 0BSD
+ */
+
+#include <catch2/benchmark/catch_benchmark.hpp>
+#include <catch2/catch_test_macros.hpp>
+#include <fmt/printf.h>
+#include <mcl/stdint.hpp>
+
+#include "dynarmic/common/fp/fpcr.h"
+#include "dynarmic/common/fp/fpsr.h"
+#include "dynarmic/common/fp/op/FPRSqrtEstimate.h"
+
+extern "C" u32 rsqrt_inaccurate(u32);
+extern "C" u32 rsqrt_full(u32);
+extern "C" u32 rsqrt_full_gpr(u32);
+extern "C" u32 rsqrt_full_nb(u32);
+extern "C" u32 rsqrt_full_nb2(u32);
+extern "C" u32 rsqrt_full_nb_gpr(u32);
+extern "C" u32 rsqrt_newton(u32);
+extern "C" u32 rsqrt_hack(u32);
+
+using namespace Dynarmic;
+
+extern "C" u32 rsqrt_fallback(u32 value) {
+ FP::FPCR fpcr;
+ FP::FPSR fpsr;
+ return FP::FPRSqrtEstimate(value, fpcr, fpsr);
+}
+extern "C" u32 _rsqrt_fallback(u32 value) {
+ return rsqrt_fallback(value);
+}
+
+void Test(u32 value) {
+ FP::FPCR fpcr;
+ FP::FPSR fpsr;
+
+ const u32 expect = FP::FPRSqrtEstimate(value, fpcr, fpsr);
+ const u32 full = rsqrt_full(value);
+ const u32 full_gpr = rsqrt_full_gpr(value);
+ const u32 newton = rsqrt_newton(value);
+ const u32 hack = rsqrt_hack(value);
+
+ if (expect != full || expect != full_gpr || expect != newton || expect != hack) {
+ fmt::print("{:08x} = {:08x} : {:08x} : {:08x} : {:08x} : {:08x}\n", value, expect, full, full_gpr, newton, hack);
+
+ REQUIRE(expect == full);
+ REQUIRE(expect == full_gpr);
+ REQUIRE(expect == newton);
+ REQUIRE(expect == hack);
+ }
+}
+
+TEST_CASE("RSqrt Tests", "[fp][.]") {
+ Test(0x00000000);
+ Test(0x80000000);
+ Test(0x7f8b7201);
+ Test(0x7f800000);
+ Test(0x7fc00000);
+ Test(0xff800000);
+ Test(0xffc00000);
+ Test(0xff800001);
+
+ for (u64 i = 0; i < 0x1'0000'0000; i++) {
+ const u32 value = static_cast<u32>(i);
+ Test(value);
+ }
+}
+
+TEST_CASE("Benchmark RSqrt", "[fp][.]") {
+ BENCHMARK("Inaccurate") {
+ u64 total = 0;
+ for (u64 i = 0; i < 0x1'0000'0000; i += 0x1234) {
+ const u32 value = static_cast<u32>(i);
+ total += rsqrt_inaccurate(value);
+ }
+ return total;
+ };
+
+ BENCHMARK("Full divss") {
+ u64 total = 0;
+ for (u64 i = 0; i < 0x1'0000'0000; i += 0x1234) {
+ const u32 value = static_cast<u32>(i);
+ total += rsqrt_full(value);
+ }
+ return total;
+ };
+
+ BENCHMARK("Full divss (GPR)") {
+ u64 total = 0;
+ for (u64 i = 0; i < 0x1'0000'0000; i += 0x1234) {
+ const u32 value = static_cast<u32>(i);
+ total += rsqrt_full_gpr(value);
+ }
+ return total;
+ };
+
+ BENCHMARK("Full divss (NB)") {
+ u64 total = 0;
+ for (u64 i = 0; i < 0x1'0000'0000; i += 0x1234) {
+ const u32 value = static_cast<u32>(i);
+ total += rsqrt_full_nb(value);
+ }
+ return total;
+ };
+
+ BENCHMARK("Full divss (NB2)") {
+ u64 total = 0;
+ for (u64 i = 0; i < 0x1'0000'0000; i += 0x1234) {
+ const u32 value = static_cast<u32>(i);
+ total += rsqrt_full_nb2(value);
+ }
+ return total;
+ };
+
+ BENCHMARK("Full divss (NB + GPR)") {
+ u64 total = 0;
+ for (u64 i = 0; i < 0x1'0000'0000; i += 0x1234) {
+ const u32 value = static_cast<u32>(i);
+ total += rsqrt_full_nb_gpr(value);
+ }
+ return total;
+ };
+
+ BENCHMARK("One Newton iteration") {
+ u64 total = 0;
+ for (u64 i = 0; i < 0x1'0000'0000; i += 0x1234) {
+ const u32 value = static_cast<u32>(i);
+ total += rsqrt_newton(value);
+ }
+ return total;
+ };
+
+ BENCHMARK("Ugly Hack") {
+ u64 total = 0;
+ for (u64 i = 0; i < 0x1'0000'0000; i += 0x1234) {
+ const u32 value = static_cast<u32>(i);
+ total += rsqrt_hack(value);
+ }
+ return total;
+ };
+
+ BENCHMARK("Softfloat") {
+ u64 total = 0;
+ for (u64 i = 0; i < 0x1'0000'0000; i += 0x1234) {
+ const u32 value = static_cast<u32>(i);
+ total += rsqrt_fallback(value);
+ }
+ return total;
+ };
+}