aboutsummaryrefslogtreecommitdiff
path: root/src/Ryujinx.Cpu/LightningJit/Arm32/Target/Arm64/InstEmitHalve.cs
blob: 567acfbf1c30de2edf1a9377d7a27f422e6c114d (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
using Ryujinx.Cpu.LightningJit.CodeGen;

namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
{
    static class InstEmitHalve
    {
        public static void Shadd16(CodeGenContext context, uint rd, uint rn, uint rm)
        {
            EmitHadd(context, rd, rn, rm, 0x7fff7fff, unsigned: false);
        }

        public static void Shadd8(CodeGenContext context, uint rd, uint rn, uint rm)
        {
            EmitHadd(context, rd, rn, rm, 0x7f7f7f7f, unsigned: false);
        }

        public static void Shsub16(CodeGenContext context, uint rd, uint rn, uint rm)
        {
            EmitHsub(context, rd, rn, rm, 0x7fff7fff, unsigned: false);
        }

        public static void Shsub8(CodeGenContext context, uint rd, uint rn, uint rm)
        {
            EmitHsub(context, rd, rn, rm, 0x7f7f7f7f, unsigned: false);
        }

        public static void Shasx(CodeGenContext context, uint rd, uint rn, uint rm)
        {
            InstEmitCommon.EmitSigned16BitXPair(context, rd, rn, rm, (d, n, m, e) =>
            {
                if (e == 0)
                {
                    context.Arm64Assembler.Sub(d, n, m);
                }
                else
                {
                    context.Arm64Assembler.Add(d, n, m);
                }

                context.Arm64Assembler.Lsr(d, d, InstEmitCommon.Const(1));
            });
        }

        public static void Shsax(CodeGenContext context, uint rd, uint rn, uint rm)
        {
            InstEmitCommon.EmitSigned16BitXPair(context, rd, rn, rm, (d, n, m, e) =>
            {
                if (e == 0)
                {
                    context.Arm64Assembler.Add(d, n, m);
                }
                else
                {
                    context.Arm64Assembler.Sub(d, n, m);
                }

                context.Arm64Assembler.Lsr(d, d, InstEmitCommon.Const(1));
            });
        }

        public static void Uhadd16(CodeGenContext context, uint rd, uint rn, uint rm)
        {
            EmitHadd(context, rd, rn, rm, 0x7fff7fff, unsigned: true);
        }

        public static void Uhadd8(CodeGenContext context, uint rd, uint rn, uint rm)
        {
            EmitHadd(context, rd, rn, rm, 0x7f7f7f7f, unsigned: true);
        }

        public static void Uhasx(CodeGenContext context, uint rd, uint rn, uint rm)
        {
            InstEmitCommon.EmitUnsigned16BitXPair(context, rd, rn, rm, (d, n, m, e) =>
            {
                if (e == 0)
                {
                    context.Arm64Assembler.Sub(d, n, m);
                }
                else
                {
                    context.Arm64Assembler.Add(d, n, m);
                }

                context.Arm64Assembler.Lsr(d, d, InstEmitCommon.Const(1));
            });
        }

        public static void Uhsax(CodeGenContext context, uint rd, uint rn, uint rm)
        {
            InstEmitCommon.EmitUnsigned16BitXPair(context, rd, rn, rm, (d, n, m, e) =>
            {
                if (e == 0)
                {
                    context.Arm64Assembler.Add(d, n, m);
                }
                else
                {
                    context.Arm64Assembler.Sub(d, n, m);
                }

                context.Arm64Assembler.Lsr(d, d, InstEmitCommon.Const(1));
            });
        }

        public static void Uhsub16(CodeGenContext context, uint rd, uint rn, uint rm)
        {
            EmitHsub(context, rd, rn, rm, 0x7fff7fff, unsigned: true);
        }

        public static void Uhsub8(CodeGenContext context, uint rd, uint rn, uint rm)
        {
            EmitHsub(context, rd, rn, rm, 0x7f7f7f7f, unsigned: true);
        }

        private static void EmitHadd(CodeGenContext context, uint rd, uint rn, uint rm, int mask, bool unsigned)
        {
            Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
            Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
            Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);

            using ScopedRegister res = context.RegisterAllocator.AllocateTempGprRegisterScoped();
            using ScopedRegister carry = context.RegisterAllocator.AllocateTempGprRegisterScoped();

            // This relies on the equality x+y == ((x&y) << 1) + (x^y).
            // Note that x^y always contains the LSB of the result.
            // Since we want to calculate (x+y)/2, we can instead calculate (x&y) + ((x^y)>>1).
            // We mask by 0x7F/0x7FFF to remove the LSB so that it doesn't leak into the field below.

            context.Arm64Assembler.And(res.Operand, rmOperand, rnOperand);
            context.Arm64Assembler.Eor(carry.Operand, rmOperand, rnOperand);
            context.Arm64Assembler.Lsr(rdOperand, carry.Operand, InstEmitCommon.Const(1));
            context.Arm64Assembler.And(rdOperand, rdOperand, InstEmitCommon.Const(mask));
            context.Arm64Assembler.Add(rdOperand, rdOperand, res.Operand);

            if (!unsigned)
            {
                // Propagates the sign bit from (x^y)>>1 upwards by one.
                context.Arm64Assembler.And(carry.Operand, carry.Operand, InstEmitCommon.Const(~mask));
                context.Arm64Assembler.Eor(rdOperand, rdOperand, carry.Operand);
            }
        }

        private static void EmitHsub(CodeGenContext context, uint rd, uint rn, uint rm, int mask, bool unsigned)
        {
            Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
            Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
            Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);

            using ScopedRegister carry = context.RegisterAllocator.AllocateTempGprRegisterScoped();
            using ScopedRegister left = context.RegisterAllocator.AllocateTempGprRegisterScoped();
            using ScopedRegister right = context.RegisterAllocator.AllocateTempGprRegisterScoped();

            // This relies on the equality x-y == (x^y) - (((x^y)&y) << 1).
            // Note that x^y always contains the LSB of the result.
            // Since we want to calculate (x+y)/2, we can instead calculate ((x^y)>>1) - ((x^y)&y).

            context.Arm64Assembler.Eor(carry.Operand, rmOperand, rnOperand);
            context.Arm64Assembler.Lsr(left.Operand, carry.Operand, InstEmitCommon.Const(1));
            context.Arm64Assembler.And(right.Operand, carry.Operand, rmOperand);

            // We must now perform a partitioned subtraction.
            // We can do this because minuend contains 7/15 bit fields.
            // We use the extra bit in minuend as a bit to borrow from; we set this bit.
            // We invert this bit at the end as this tells us if that bit was borrowed from.

            context.Arm64Assembler.Orr(rdOperand, left.Operand, InstEmitCommon.Const(~mask));
            context.Arm64Assembler.Sub(rdOperand, rdOperand, right.Operand);
            context.Arm64Assembler.Eor(rdOperand, rdOperand, InstEmitCommon.Const(~mask));

            if (!unsigned)
            {
                // We then sign extend the result into this bit.
                context.Arm64Assembler.And(carry.Operand, carry.Operand, InstEmitCommon.Const(~mask));
                context.Arm64Assembler.Eor(rdOperand, rdOperand, carry.Operand);
            }
        }
    }
}