aboutsummaryrefslogtreecommitdiff
path: root/src/Ryujinx.Graphics.Shader/Instructions/InstEmitWarp.cs
blob: 73eea5c34d25c4f63339fbfc7bd82e49c2ece42e (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
using Ryujinx.Graphics.Shader.Decoders;
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.Translation;
using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;

namespace Ryujinx.Graphics.Shader.Instructions
{
    static partial class InstEmit
    {
        public static void Fswzadd(EmitterContext context)
        {
            InstFswzadd op = context.GetOp<InstFswzadd>();

            Operand srcA = GetSrcReg(context, op.SrcA);
            Operand srcB = GetSrcReg(context, op.SrcB);
            Operand dest = GetDest(op.Dest);

            context.Copy(dest, context.FPSwizzleAdd(srcA, srcB, op.PnWord));

            InstEmitAluHelper.SetFPZnFlags(context, dest, op.WriteCC);
        }

        public static void Shfl(EmitterContext context)
        {
            InstShfl op = context.GetOp<InstShfl>();

            Operand pred = Register(op.DestPred, RegisterType.Predicate);

            Operand srcA = GetSrcReg(context, op.SrcA);

            Operand srcB = op.BFixShfl ? Const(op.SrcBImm) : GetSrcReg(context, op.SrcB);
            Operand srcC = op.CFixShfl ? Const(op.SrcCImm) : GetSrcReg(context, op.SrcC);

            (Operand res, Operand valid) = op.ShflMode switch
            {
                ShflMode.Idx => context.Shuffle(srcA, srcB, srcC),
                ShflMode.Up => context.ShuffleUp(srcA, srcB, srcC),
                ShflMode.Down => context.ShuffleDown(srcA, srcB, srcC),
                ShflMode.Bfly => context.ShuffleXor(srcA, srcB, srcC),
                _ => (null, null),
            };

            context.Copy(GetDest(op.Dest), res);
            context.Copy(pred, valid);
        }

        public static void Vote(EmitterContext context)
        {
            InstVote op = context.GetOp<InstVote>();

            Operand pred = GetPredicate(context, op.SrcPred, op.SrcPredInv);
            Operand res = EmitVote(context, op.VoteMode, pred);

            if (res != null)
            {
                context.Copy(Register(op.VpDest, RegisterType.Predicate), res);
            }
            else
            {
                context.TranslatorContext.GpuAccessor.Log($"Invalid vote operation: {op.VoteMode}.");
            }

            if (op.Dest != RegisterConsts.RegisterZeroIndex)
            {
                context.Copy(GetDest(op.Dest), EmitBallot(context, pred));
            }
        }

        private static Operand EmitVote(EmitterContext context, VoteMode voteMode, Operand pred)
        {
            int subgroupSize = context.TranslatorContext.GpuAccessor.QueryHostSubgroupSize();

            if (subgroupSize <= 32)
            {
                return voteMode switch
                {
                    VoteMode.All => context.VoteAll(pred),
                    VoteMode.Any => context.VoteAny(pred),
                    VoteMode.Eq => context.VoteAllEqual(pred),
                    _ => null,
                };
            }

            // Emulate vote with ballot masks.
            // We do that when the GPU thread count is not 32,
            // since the shader code assumes it is 32.
            // allInvocations => ballot(pred) == ballot(true),
            // anyInvocation => ballot(pred) != 0,
            // allInvocationsEqual => ballot(pred) == balot(true) || ballot(pred) == 0
            Operand ballotMask = EmitBallot(context, pred);

            Operand AllTrue() => context.ICompareEqual(ballotMask, EmitBallot(context, Const(IrConsts.True)));

            return voteMode switch
            {
                VoteMode.All => AllTrue(),
                VoteMode.Any => context.ICompareNotEqual(ballotMask, Const(0)),
                VoteMode.Eq => context.BitwiseOr(AllTrue(), context.ICompareEqual(ballotMask, Const(0))),
                _ => null,
            };
        }

        private static Operand EmitBallot(EmitterContext context, Operand pred)
        {
            int subgroupSize = context.TranslatorContext.GpuAccessor.QueryHostSubgroupSize();

            if (subgroupSize <= 32)
            {
                return context.Ballot(pred, 0);
            }
            else if (subgroupSize == 64)
            {
                // TODO: Add support for vector destination and do that with a single operation.

                Operand laneId = context.Load(StorageKind.Input, IoVariable.SubgroupLaneId);
                Operand low = context.Ballot(pred, 0);
                Operand high = context.Ballot(pred, 1);

                return context.ConditionalSelect(context.BitwiseAnd(laneId, Const(32)), high, low);
            }
            else
            {
                // TODO: Add support for vector destination and do that with a single operation.

                Operand laneId = context.Load(StorageKind.Input, IoVariable.SubgroupLaneId);
                Operand element = context.ShiftRightU32(laneId, Const(5));

                Operand res = context.Ballot(pred, 0);
                res = context.ConditionalSelect(
                    context.ICompareEqual(element, Const(1)),
                    context.Ballot(pred, 1), res);
                res = context.ConditionalSelect(
                    context.ICompareEqual(element, Const(2)),
                    context.Ballot(pred, 2), res);
                res = context.ConditionalSelect(
                    context.ICompareEqual(element, Const(3)),
                    context.Ballot(pred, 3), res);

                return res;
            }
        }
    }
}