aboutsummaryrefslogtreecommitdiff
path: root/src/Ryujinx.Cpu/LightningJit/Arm32/Target/Arm64/InstEmitVfpCompare.cs
blob: f5f306099403caf6185ba9cc2cc725bf3431d337 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
using Ryujinx.Cpu.LightningJit.CodeGen;
using System;
using System.Diagnostics;

namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
{
    static class InstEmitVfpCompare
    {
        public static void VcmpI(CodeGenContext context, uint cond, uint rd, uint size)
        {
            EmitVcmpVcmpe(context, cond, rd, 0, size, zero: true, e: false);
        }

        public static void VcmpR(CodeGenContext context, uint cond, uint rd, uint rm, uint size)
        {
            EmitVcmpVcmpe(context, cond, rd, rm, size, zero: false, e: false);
        }

        public static void VcmpeI(CodeGenContext context, uint cond, uint rd, uint size)
        {
            EmitVcmpVcmpe(context, cond, rd, 0, size, zero: true, e: true);
        }

        public static void VcmpeR(CodeGenContext context, uint cond, uint rd, uint rm, uint size)
        {
            EmitVcmpVcmpe(context, cond, rd, rm, size, zero: false, e: true);
        }

        private static void EmitVcmpVcmpe(CodeGenContext context, uint cond, uint rd, uint rm, uint size, bool zero, bool e)
        {
            Debug.Assert(size == 1 || size == 2 || size == 3);

            bool singleRegs = size != 3;
            uint ftype = size ^ 2u;
            uint opc = zero ? 1u : 0u;

            using ScopedRegister rdReg = InstEmitNeonCommon.MoveScalarToSide(context, rd, singleRegs);
            ScopedRegister rmReg;
            Operand rmOrZero;

            if (zero)
            {
                rmReg = default;
                rmOrZero = new Operand(0, RegisterType.Vector, OperandType.V128);
            }
            else
            {
                rmReg = InstEmitNeonCommon.MoveScalarToSide(context, rm, singleRegs);
                rmOrZero = rmReg.Operand;
            }

            using ScopedRegister oldFlags = context.RegisterAllocator.AllocateTempGprRegisterScoped();

            bool canPeepholeOptimize = CanFuseVcmpVmrs(context, cond);
            if (!canPeepholeOptimize)
            {
                InstEmitCommon.GetCurrentFlags(context, oldFlags.Operand);
            }

            if (e)
            {
                context.Arm64Assembler.FcmpeFloat(rdReg.Operand, rmOrZero, opc, ftype);
            }
            else
            {
                context.Arm64Assembler.FcmpFloat(rdReg.Operand, rmOrZero, opc, ftype);
            }

            // Save result flags from the FCMP operation on FPSCR register, then restore the old flags if needed.

            WriteUpdateFpsrNzcv(context);

            if (!canPeepholeOptimize)
            {
                InstEmitCommon.RestoreNzcvFlags(context, oldFlags.Operand);
            }

            if (!zero)
            {
                rmReg.Dispose();
            }
        }

        private static void WriteUpdateFpsrNzcv(CodeGenContext context)
        {
            using ScopedRegister fpsrRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
            using ScopedRegister flagsRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();

            Operand ctx = InstEmitSystem.Register(context.RegisterAllocator.FixedContextRegister);

            context.Arm64Assembler.LdrRiUn(fpsrRegister.Operand, ctx, NativeContextOffsets.FpFlagsBaseOffset);

            InstEmitCommon.GetCurrentFlags(context, flagsRegister.Operand);

            context.Arm64Assembler.Bfi(fpsrRegister.Operand, flagsRegister.Operand, 28, 4);
            context.Arm64Assembler.StrRiUn(fpsrRegister.Operand, ctx, NativeContextOffsets.FpFlagsBaseOffset);
        }

        private static bool CanFuseVcmpVmrs(CodeGenContext context, uint vcmpCond)
        {
            // Conditions might be different for the VCMP and VMRS instructions if they are inside a IT block,
            // we don't bother to check right now, so just always skip if inside an IT block.
            if (context.InITBlock)
            {
                return false;
            }

            InstInfo nextInfo = context.PeekNextInstruction();

            // We're looking for a VMRS instructions.
            if (nextInfo.Name != InstName.Vmrs)
            {
                return false;
            }

            // Conditions must match.
            if (vcmpCond != (nextInfo.Encoding >> 28))
            {
                return false;
            }

            // Reg must be 1, Rt must be PC indicating VMRS to PSTATE.NZCV.
            if (((nextInfo.Encoding >> 16) & 0xf) != 1 || ((nextInfo.Encoding >> 12) & 0xf) != RegisterUtils.PcRegister)
            {
                return false;
            }

            context.SetSkipNextInstruction();

            return true;
        }
    }
}