aboutsummaryrefslogblamecommitdiff
path: root/ARMeilleure/Instructions/InstEmitSimdHelper.cs
blob: 0e7af794a587e9c947efa26e4ae0c0cc35da4447 (plain) (tree)
1
2
3
4
5
6
7
8
9
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033




                                             
                        
                                                     
                                                                    







                                                            













                                                                                                             
                                                                                                                                                   






                                                                                                                        
          

























































































































































                                                                                        







                                                                                       
















                                                                                         
                                                                                         



                                                                                           




                                                                                   


                                                                      
                                                                               






                                                                                   
                                                                                                                         
         
                                                     
 
































                                                                                                                                          
         










                                                                                                                                
















































































                                                                                                             
                                                                                                  

                                                         

                                                                               
 
                                         
         
                                                                                                                 

                                                         
                                             
 

                                                                                                         
 
                                                                
         














































                                                                                       
                                                                                                                  

                                                         

                                                      
 



                                                       















































































































































































































































































































































































































































































































































































                                                                                                              
                                                                                      





























                                                                                                               
                                                                                      

















































                                                                                                     







                                                                                                 
                                                                                             



























                                                                                                                











































                                                                                                 


















                                                                                            
 





















                                                                                                









































                                                                                            



























                                                                                        
                                                                                            

                                                             
                                                        





                                                           
                                                                                              




                                                                                            
                                                                    




                                                                       
                                                                                                        
 
                                                                    


                                      
                                                                                          
 
                                                                

             









                                                                                                            
                                               







                                                                                                                       
                                                                                                                 
         
                                                                            







                                                                                                 




                                                              
                                                     







                                                            


                                   
                     
 

                            
 
                         
 
                               


                                                                                                
                                                                                                    


                                                                                                
                                                                           
         
                                                                                                                 














                                                                                 
                                                                                        










                                                                         
                                                                                                                                                      
         
                                                                                                           


                                                                                                           
                                                                                  
         
                                                                                                                                                      
         
                                                                                  


                                                                                                           




                                                                                                       






                                                                                                                
                                                                










                                                                        

                                                           
                                                                                                            

                                     
                                                                                            
 
                                                                                          
                     
                                                
                     






                                                                                                                                















                                                                                            
                                                                                          

                                                
                                                                                                                            





                                                                             
                                     





                                                                                                    

                                                           




                                                                                                        
 
                                                                                                      


































                                                                                                         

                                                                             



                                                                                              

                                                                           


                                                                                  
                                 
         

















                                                                                                 
                                                         



                                                                           
                                                         






















                                                                                                   
                                                         





                                      

                                                                                                                   
         


                                                                            
 
                                     
 
                                                                                                           

                                                                                   
                                                                     
                                    
                                                         

                                    
                                                                          
                                    
                                                         









                                                                                                                     
                                        
 
                                                                            
 
                                     
 
                                                                                                           

                                                                                   
                                                                         
                                                         

















                                                                                                 
                                                                    
                                    
                                                         






                                                        
                                                                                                          





                                                                                     
                                      
 
                                                                                    
                                                                                    
                                                                                                        
 
                                                                            
                                                                                 
                                                         



                                      
         
                                                             
                                                                                                            
         




                                                                                     
                                                                                    
 
                                                                            
                                     
                                                         



                                      
         
                                                        
                                                                                                          
         




                                                                                     
                                      
 
                                                                                    
 
                                                                
                                                                                                        
 
                                                                            
                                                                                 
                                                         



                                      
         
                                                             
                                                                                                            
         


                                                                                     
                                      
 
                                                                                    
 
                                                                            
                                     
                                                         















                                                                                                           
                                      
 
                                                                                    
                                                                    
                                                                                     
                                    
                                                         

                                    
                                                                
                                    
                                                         

                                    
                                                                          
                                    
                                                         



                                      
         
                                                                                                             
         





                                                                                     
                                      
 
                                                                                    
 
                                                                            
                                     
                                                         


                                                                      
                                                                            
                                                         
                                   
 
                                      
 
                       
         




                                                                                                              
             






                                                                                                         












                                                                                                                     
                                  












































                                                                                                                             
                                                          













                                                                                     
                                                              











                                                                     
using ARMeilleure.Decoders;
using ARMeilleure.IntermediateRepresentation;
using ARMeilleure.State;
using ARMeilleure.Translation;
using System;
using System.Diagnostics;
using System.Reflection;

using static ARMeilleure.Instructions.InstEmitHelper;
using static ARMeilleure.IntermediateRepresentation.Operand.Factory;

namespace ARMeilleure.Instructions
{
    using Func1I = Func<Operand, Operand>;
    using Func2I = Func<Operand, Operand, Operand>;
    using Func3I = Func<Operand, Operand, Operand, Operand>;

    static class InstEmitSimdHelper
    {
#region "Masks"
        public static readonly long[] EvenMasks = new long[]
        {
            14L << 56 | 12L << 48 | 10L << 40 | 08L << 32 | 06L << 24 | 04L << 16 | 02L << 8 | 00L << 0, // B
            13L << 56 | 12L << 48 | 09L << 40 | 08L << 32 | 05L << 24 | 04L << 16 | 01L << 8 | 00L << 0, // H
            11L << 56 | 10L << 48 | 09L << 40 | 08L << 32 | 03L << 24 | 02L << 16 | 01L << 8 | 00L << 0  // S
        };

        public static readonly long[] OddMasks = new long[]
        {
            15L << 56 | 13L << 48 | 11L << 40 | 09L << 32 | 07L << 24 | 05L << 16 | 03L << 8 | 01L << 0, // B
            15L << 56 | 14L << 48 | 11L << 40 | 10L << 32 | 07L << 24 | 06L << 16 | 03L << 8 | 02L << 0, // H
            15L << 56 | 14L << 48 | 13L << 40 | 12L << 32 | 07L << 24 | 06L << 16 | 05L << 8 | 04L << 0  // S
        };

        public static readonly long ZeroMask = 128L << 56 | 128L << 48 | 128L << 40 | 128L << 32 | 128L << 24 | 128L << 16 | 128L << 8 | 128L << 0;

        public static ulong X86GetGf2p8LogicalShiftLeft(int shift)
        {
            ulong identity = (0b00000001UL << 56) | (0b00000010UL << 48) | (0b00000100UL << 40) | (0b00001000UL << 32) |
                             (0b00010000UL << 24) | (0b00100000UL << 16) | (0b01000000UL <<  8) | (0b10000000UL <<  0);

            return shift >= 0 ? identity >> (shift * 8) : identity << (-shift * 8);
        }
#endregion

#region "X86 SSE Intrinsics"
        public static readonly Intrinsic[] X86PaddInstruction = new Intrinsic[]
        {
            Intrinsic.X86Paddb,
            Intrinsic.X86Paddw,
            Intrinsic.X86Paddd,
            Intrinsic.X86Paddq
        };

        public static readonly Intrinsic[] X86PcmpeqInstruction = new Intrinsic[]
        {
            Intrinsic.X86Pcmpeqb,
            Intrinsic.X86Pcmpeqw,
            Intrinsic.X86Pcmpeqd,
            Intrinsic.X86Pcmpeqq
        };

        public static readonly Intrinsic[] X86PcmpgtInstruction = new Intrinsic[]
        {
            Intrinsic.X86Pcmpgtb,
            Intrinsic.X86Pcmpgtw,
            Intrinsic.X86Pcmpgtd,
            Intrinsic.X86Pcmpgtq
        };

        public static readonly Intrinsic[] X86PmaxsInstruction = new Intrinsic[]
        {
            Intrinsic.X86Pmaxsb,
            Intrinsic.X86Pmaxsw,
            Intrinsic.X86Pmaxsd
        };

        public static readonly Intrinsic[] X86PmaxuInstruction = new Intrinsic[]
        {
            Intrinsic.X86Pmaxub,
            Intrinsic.X86Pmaxuw,
            Intrinsic.X86Pmaxud
        };

        public static readonly Intrinsic[] X86PminsInstruction = new Intrinsic[]
        {
            Intrinsic.X86Pminsb,
            Intrinsic.X86Pminsw,
            Intrinsic.X86Pminsd
        };

        public static readonly Intrinsic[] X86PminuInstruction = new Intrinsic[]
        {
            Intrinsic.X86Pminub,
            Intrinsic.X86Pminuw,
            Intrinsic.X86Pminud
        };

        public static readonly Intrinsic[] X86PmovsxInstruction = new Intrinsic[]
        {
            Intrinsic.X86Pmovsxbw,
            Intrinsic.X86Pmovsxwd,
            Intrinsic.X86Pmovsxdq
        };

        public static readonly Intrinsic[] X86PmovzxInstruction = new Intrinsic[]
        {
            Intrinsic.X86Pmovzxbw,
            Intrinsic.X86Pmovzxwd,
            Intrinsic.X86Pmovzxdq
        };

        public static readonly Intrinsic[] X86PsllInstruction = new Intrinsic[]
        {
            0,
            Intrinsic.X86Psllw,
            Intrinsic.X86Pslld,
            Intrinsic.X86Psllq
        };

        public static readonly Intrinsic[] X86PsraInstruction = new Intrinsic[]
        {
            0,
            Intrinsic.X86Psraw,
            Intrinsic.X86Psrad
        };

        public static readonly Intrinsic[] X86PsrlInstruction = new Intrinsic[]
        {
            0,
            Intrinsic.X86Psrlw,
            Intrinsic.X86Psrld,
            Intrinsic.X86Psrlq
        };

        public static readonly Intrinsic[] X86PsubInstruction = new Intrinsic[]
        {
            Intrinsic.X86Psubb,
            Intrinsic.X86Psubw,
            Intrinsic.X86Psubd,
            Intrinsic.X86Psubq
        };

        public static readonly Intrinsic[] X86PunpckhInstruction = new Intrinsic[]
        {
            Intrinsic.X86Punpckhbw,
            Intrinsic.X86Punpckhwd,
            Intrinsic.X86Punpckhdq,
            Intrinsic.X86Punpckhqdq
        };

        public static readonly Intrinsic[] X86PunpcklInstruction = new Intrinsic[]
        {
            Intrinsic.X86Punpcklbw,
            Intrinsic.X86Punpcklwd,
            Intrinsic.X86Punpckldq,
            Intrinsic.X86Punpcklqdq
        };
#endregion

        public static int GetImmShl(OpCodeSimdShImm op)
        {
            return op.Imm - (8 << op.Size);
        }

        public static int GetImmShr(OpCodeSimdShImm op)
        {
            return (8 << (op.Size + 1)) - op.Imm;
        }

        public static Operand X86GetScalar(ArmEmitterContext context, float value)
        {
            return X86GetScalar(context, BitConverter.SingleToInt32Bits(value));
        }

        public static Operand X86GetScalar(ArmEmitterContext context, double value)
        {
            return X86GetScalar(context, BitConverter.DoubleToInt64Bits(value));
        }

        public static Operand X86GetScalar(ArmEmitterContext context, int value)
        {
            return context.VectorCreateScalar(Const(value));
        }

        public static Operand X86GetScalar(ArmEmitterContext context, long value)
        {
            return context.VectorCreateScalar(Const(value));
        }

        public static Operand X86GetAllElements(ArmEmitterContext context, float value)
        {
            return X86GetAllElements(context, BitConverter.SingleToInt32Bits(value));
        }

        public static Operand X86GetAllElements(ArmEmitterContext context, double value)
        {
            return X86GetAllElements(context, BitConverter.DoubleToInt64Bits(value));
        }

        public static Operand X86GetAllElements(ArmEmitterContext context, short value)
        {
            ulong value1 = (ushort)value;
            ulong value2 = value1 << 16 | value1;
            ulong value4 = value2 << 32 | value2;

            return X86GetAllElements(context, (long)value4);
        }

        public static Operand X86GetAllElements(ArmEmitterContext context, int value)
        {
            Operand vector = context.VectorCreateScalar(Const(value));

            vector = context.AddIntrinsic(Intrinsic.X86Shufps, vector, vector, Const(0));

            return vector;
        }

        public static Operand X86GetAllElements(ArmEmitterContext context, long value)
        {
            Operand vector = context.VectorCreateScalar(Const(value));

            vector = context.AddIntrinsic(Intrinsic.X86Movlhps, vector, vector);

            return vector;
        }

        public static Operand X86GetElements(ArmEmitterContext context, long e1, long e0)
        {
            return X86GetElements(context, (ulong)e1, (ulong)e0);
        }

        public static Operand X86GetElements(ArmEmitterContext context, ulong e1, ulong e0)
        {
            Operand vector0 = context.VectorCreateScalar(Const(e0));
            Operand vector1 = context.VectorCreateScalar(Const(e1));

            return context.AddIntrinsic(Intrinsic.X86Punpcklqdq, vector0, vector1);
        }

        public static int X86GetRoundControl(FPRoundingMode roundMode)
        {
            switch (roundMode)
            {
                case FPRoundingMode.ToNearest:            return 8 | 0; // even
                case FPRoundingMode.TowardsPlusInfinity:  return 8 | 2;
                case FPRoundingMode.TowardsMinusInfinity: return 8 | 1;
                case FPRoundingMode.TowardsZero:          return 8 | 3;
            }

            throw new ArgumentException($"Invalid rounding mode \"{roundMode}\".");
        }

        public static Operand EmitSse41RoundToNearestWithTiesToAwayOpF(ArmEmitterContext context, Operand n, bool scalar)
        {
            Debug.Assert(n.Type == OperandType.V128);

            Operand nCopy = context.Copy(n);

            Operand rC = Const(X86GetRoundControl(FPRoundingMode.TowardsZero));

            IOpCodeSimd op = (IOpCodeSimd)context.CurrOp;

            if ((op.Size & 1) == 0)
            {
                Operand signMask = scalar ? X86GetScalar(context, int.MinValue) : X86GetAllElements(context, int.MinValue);
                        signMask = context.AddIntrinsic(Intrinsic.X86Pand, signMask, nCopy);

                // 0x3EFFFFFF == BitConverter.SingleToInt32Bits(0.5f) - 1
                Operand valueMask = scalar ? X86GetScalar(context, 0x3EFFFFFF) : X86GetAllElements(context, 0x3EFFFFFF);
                        valueMask = context.AddIntrinsic(Intrinsic.X86Por, valueMask, signMask);

                nCopy = context.AddIntrinsic(scalar ? Intrinsic.X86Addss : Intrinsic.X86Addps, nCopy, valueMask);

                nCopy = context.AddIntrinsic(scalar ? Intrinsic.X86Roundss : Intrinsic.X86Roundps, nCopy, rC);
            }
            else
            {
                Operand signMask = scalar ? X86GetScalar(context, long.MinValue) : X86GetAllElements(context, long.MinValue);
                        signMask = context.AddIntrinsic(Intrinsic.X86Pand, signMask, nCopy);

                // 0x3FDFFFFFFFFFFFFFL == BitConverter.DoubleToInt64Bits(0.5d) - 1L
                Operand valueMask = scalar ? X86GetScalar(context, 0x3FDFFFFFFFFFFFFFL) : X86GetAllElements(context, 0x3FDFFFFFFFFFFFFFL);
                        valueMask = context.AddIntrinsic(Intrinsic.X86Por, valueMask, signMask);

                nCopy = context.AddIntrinsic(scalar ? Intrinsic.X86Addsd : Intrinsic.X86Addpd, nCopy, valueMask);

                nCopy = context.AddIntrinsic(scalar ? Intrinsic.X86Roundsd : Intrinsic.X86Roundpd, nCopy, rC);
            }

            return nCopy;
        }

        public static Operand EmitCountSetBits8(ArmEmitterContext context, Operand op) // "size" is 8 (SIMD&FP Inst.).
        {
            Debug.Assert(op.Type == OperandType.I32 || op.Type == OperandType.I64);

            Operand op0 = context.Subtract(op, context.BitwiseAnd(context.ShiftRightUI(op, Const(1)), Const(op.Type, 0x55L)));

            Operand c1 = Const(op.Type, 0x33L);
            Operand op1 = context.Add(context.BitwiseAnd(context.ShiftRightUI(op0, Const(2)), c1), context.BitwiseAnd(op0, c1));

            return context.BitwiseAnd(context.Add(op1, context.ShiftRightUI(op1, Const(4))), Const(op.Type, 0x0fL));
        }

        public static void EmitScalarUnaryOpF(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64)
        {
            OpCodeSimd op = (OpCodeSimd)context.CurrOp;

            Operand n = GetVec(op.Rn);

            Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32;

            Operand res = context.AddIntrinsic(inst, n);

            if ((op.Size & 1) != 0)
            {
                res = context.VectorZeroUpper64(res);
            }
            else
            {
                res = context.VectorZeroUpper96(res);
            }

            context.Copy(GetVec(op.Rd), res);
        }

        public static void EmitScalarBinaryOpF(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64)
        {
            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;

            Operand n = GetVec(op.Rn);
            Operand m = GetVec(op.Rm);

            Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32;

            Operand res = context.AddIntrinsic(inst, n, m);

            if ((op.Size & 1) != 0)
            {
                res = context.VectorZeroUpper64(res);
            }
            else
            {
                res = context.VectorZeroUpper96(res);
            }

            context.Copy(GetVec(op.Rd), res);
        }

        public static void EmitVectorUnaryOpF(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64)
        {
            OpCodeSimd op = (OpCodeSimd)context.CurrOp;

            Operand n = GetVec(op.Rn);

            Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32;

            Operand res = context.AddIntrinsic(inst, n);

            if (op.RegisterSize == RegisterSize.Simd64)
            {
                res = context.VectorZeroUpper64(res);
            }

            context.Copy(GetVec(op.Rd), res);
        }

        public static void EmitVectorBinaryOpF(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64)
        {
            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;

            Operand n = GetVec(op.Rn);
            Operand m = GetVec(op.Rm);

            Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32;

            Operand res = context.AddIntrinsic(inst, n, m);

            if (op.RegisterSize == RegisterSize.Simd64)
            {
                res = context.VectorZeroUpper64(res);
            }

            context.Copy(GetVec(op.Rd), res);
        }

        public static Operand EmitUnaryMathCall(ArmEmitterContext context, string name, Operand n)
        {
            IOpCodeSimd op = (IOpCodeSimd)context.CurrOp;

            MethodInfo info = (op.Size & 1) == 0
                ? typeof(MathF).GetMethod(name, new Type[] { typeof(float) })
                : typeof(Math). GetMethod(name, new Type[] { typeof(double) });

            return context.Call(info, n);
        }

        public static Operand EmitRoundMathCall(ArmEmitterContext context, MidpointRounding roundMode, Operand n)
        {
            IOpCodeSimd op = (IOpCodeSimd)context.CurrOp;

            string name = nameof(Math.Round);

            MethodInfo info = (op.Size & 1) == 0
                ? typeof(MathF).GetMethod(name, new Type[] { typeof(float),  typeof(MidpointRounding) })
                : typeof(Math). GetMethod(name, new Type[] { typeof(double), typeof(MidpointRounding) });

            return context.Call(info, n, Const((int)roundMode));
        }

        public static Operand EmitGetRoundingMode(ArmEmitterContext context)
        {
            Operand rMode = context.ShiftLeft(GetFpFlag(FPState.RMode1Flag), Const(1));
                    rMode = context.BitwiseOr(rMode, GetFpFlag(FPState.RMode0Flag));

            return rMode;
        }

        public static Operand EmitRoundByRMode(ArmEmitterContext context, Operand op)
        {
            Debug.Assert(op.Type == OperandType.FP32 || op.Type == OperandType.FP64);

            Operand lbl1 = Label();
            Operand lbl2 = Label();
            Operand lbl3 = Label();
            Operand lblEnd = Label();

            Operand rN = Const((int)FPRoundingMode.ToNearest);
            Operand rP = Const((int)FPRoundingMode.TowardsPlusInfinity);
            Operand rM = Const((int)FPRoundingMode.TowardsMinusInfinity);

            Operand res = context.AllocateLocal(op.Type);

            Operand rMode = EmitGetRoundingMode(context);

            context.BranchIf(lbl1, rMode, rN, Comparison.NotEqual);
            context.Copy(res, EmitRoundMathCall(context, MidpointRounding.ToEven, op));
            context.Branch(lblEnd);

            context.MarkLabel(lbl1);
            context.BranchIf(lbl2, rMode, rP, Comparison.NotEqual);
            context.Copy(res, EmitUnaryMathCall(context, nameof(Math.Ceiling), op));
            context.Branch(lblEnd);

            context.MarkLabel(lbl2);
            context.BranchIf(lbl3, rMode, rM, Comparison.NotEqual);
            context.Copy(res, EmitUnaryMathCall(context, nameof(Math.Floor), op));
            context.Branch(lblEnd);

            context.MarkLabel(lbl3);
            context.Copy(res, EmitUnaryMathCall(context, nameof(Math.Truncate), op));
            context.Branch(lblEnd);

            context.MarkLabel(lblEnd);

            return res;
        }

        public static Operand EmitSoftFloatCall(ArmEmitterContext context, string name, params Operand[] callArgs)
        {
            IOpCodeSimd op = (IOpCodeSimd)context.CurrOp;

            MethodInfo info = (op.Size & 1) == 0
                ? typeof(SoftFloat32).GetMethod(name)
                : typeof(SoftFloat64).GetMethod(name);

            context.StoreToContext();
            Operand res = context.Call(info, callArgs);
            context.LoadFromContext();

            return res;
        }

        public static void EmitScalarBinaryOpByElemF(ArmEmitterContext context, Func2I emit)
        {
            OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;

            OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32;

            Operand n = context.VectorExtract(type, GetVec(op.Rn), 0);
            Operand m = context.VectorExtract(type, GetVec(op.Rm), op.Index);

            context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), emit(n, m), 0));
        }

        public static void EmitScalarTernaryOpByElemF(ArmEmitterContext context, Func3I emit)
        {
            OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;

            OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32;

            Operand d = context.VectorExtract(type, GetVec(op.Rd), 0);
            Operand n = context.VectorExtract(type, GetVec(op.Rn), 0);
            Operand m = context.VectorExtract(type, GetVec(op.Rm), op.Index);

            context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), emit(d, n, m), 0));
        }

        public static void EmitScalarUnaryOpSx(ArmEmitterContext context, Func1I emit)
        {
            OpCodeSimd op = (OpCodeSimd)context.CurrOp;

            Operand n = EmitVectorExtractSx(context, op.Rn, 0, op.Size);

            Operand d = EmitVectorInsert(context, context.VectorZero(), emit(n), 0, op.Size);

            context.Copy(GetVec(op.Rd), d);
        }

        public static void EmitScalarBinaryOpSx(ArmEmitterContext context, Func2I emit)
        {
            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;

            Operand n = EmitVectorExtractSx(context, op.Rn, 0, op.Size);
            Operand m = EmitVectorExtractSx(context, op.Rm, 0, op.Size);

            Operand d = EmitVectorInsert(context, context.VectorZero(), emit(n, m), 0, op.Size);

            context.Copy(GetVec(op.Rd), d);
        }

        public static void EmitScalarUnaryOpZx(ArmEmitterContext context, Func1I emit)
        {
            OpCodeSimd op = (OpCodeSimd)context.CurrOp;

            Operand n = EmitVectorExtractZx(context, op.Rn, 0, op.Size);

            Operand d = EmitVectorInsert(context, context.VectorZero(), emit(n), 0, op.Size);

            context.Copy(GetVec(op.Rd), d);
        }

        public static void EmitScalarBinaryOpZx(ArmEmitterContext context, Func2I emit)
        {
            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;

            Operand n = EmitVectorExtractZx(context, op.Rn, 0, op.Size);
            Operand m = EmitVectorExtractZx(context, op.Rm, 0, op.Size);

            Operand d = EmitVectorInsert(context, context.VectorZero(), emit(n, m), 0, op.Size);

            context.Copy(GetVec(op.Rd), d);
        }

        public static void EmitScalarTernaryOpZx(ArmEmitterContext context, Func3I emit)
        {
            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;

            Operand d = EmitVectorExtractZx(context, op.Rd, 0, op.Size);
            Operand n = EmitVectorExtractZx(context, op.Rn, 0, op.Size);
            Operand m = EmitVectorExtractZx(context, op.Rm, 0, op.Size);

            d = EmitVectorInsert(context, context.VectorZero(), emit(d, n, m), 0, op.Size);

            context.Copy(GetVec(op.Rd), d);
        }

        public static void EmitScalarUnaryOpF(ArmEmitterContext context, Func1I emit)
        {
            OpCodeSimd op = (OpCodeSimd)context.CurrOp;

            OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32;

            Operand n = context.VectorExtract(type, GetVec(op.Rn), 0);

            context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), emit(n), 0));
        }

        public static void EmitScalarBinaryOpF(ArmEmitterContext context, Func2I emit)
        {
            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;

            OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32;

            Operand n = context.VectorExtract(type, GetVec(op.Rn), 0);
            Operand m = context.VectorExtract(type, GetVec(op.Rm), 0);

            context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), emit(n, m), 0));
        }

        public static void EmitScalarTernaryRaOpF(ArmEmitterContext context, Func3I emit)
        {
            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;

            OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32;

            Operand a = context.VectorExtract(type, GetVec(op.Ra), 0);
            Operand n = context.VectorExtract(type, GetVec(op.Rn), 0);
            Operand m = context.VectorExtract(type, GetVec(op.Rm), 0);

            context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), emit(a, n, m), 0));
        }

        public static void EmitVectorUnaryOpF(ArmEmitterContext context, Func1I emit)
        {
            OpCodeSimd op = (OpCodeSimd)context.CurrOp;

            Operand res = context.VectorZero();

            int sizeF = op.Size & 1;

            OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;

            int elems = op.GetBytesCount() >> sizeF + 2;

            for (int index = 0; index < elems; index++)
            {
                Operand ne = context.VectorExtract(type, GetVec(op.Rn), index);

                res = context.VectorInsert(res, emit(ne), index);
            }

            context.Copy(GetVec(op.Rd), res);
        }

        public static void EmitVectorBinaryOpF(ArmEmitterContext context, Func2I emit)
        {
            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;

            Operand res = context.VectorZero();

            int sizeF = op.Size & 1;

            OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;

            int elems = op.GetBytesCount() >> sizeF + 2;

            for (int index = 0; index < elems; index++)
            {
                Operand ne = context.VectorExtract(type, GetVec(op.Rn), index);
                Operand me = context.VectorExtract(type, GetVec(op.Rm), index);

                res = context.VectorInsert(res, emit(ne, me), index);
            }

            context.Copy(GetVec(op.Rd), res);
        }

        public static void EmitVectorTernaryOpF(ArmEmitterContext context, Func3I emit)
        {
            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;

            Operand res = context.VectorZero();

            int sizeF = op.Size & 1;

            OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;

            int elems = op.GetBytesCount() >> sizeF + 2;

            for (int index = 0; index < elems; index++)
            {
                Operand de = context.VectorExtract(type, GetVec(op.Rd), index);
                Operand ne = context.VectorExtract(type, GetVec(op.Rn), index);
                Operand me = context.VectorExtract(type, GetVec(op.Rm), index);

                res = context.VectorInsert(res, emit(de, ne, me), index);
            }

            context.Copy(GetVec(op.Rd), res);
        }

        public static void EmitVectorBinaryOpByElemF(ArmEmitterContext context, Func2I emit)
        {
            OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;

            Operand res = context.VectorZero();

            int sizeF = op.Size & 1;

            OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;

            int elems = op.GetBytesCount() >> sizeF + 2;

            for (int index = 0; index < elems; index++)
            {
                Operand ne = context.VectorExtract(type, GetVec(op.Rn), index);
                Operand me = context.VectorExtract(type, GetVec(op.Rm), op.Index);

                res = context.VectorInsert(res, emit(ne, me), index);
            }

            context.Copy(GetVec(op.Rd), res);
        }

        public static void EmitVectorTernaryOpByElemF(ArmEmitterContext context, Func3I emit)
        {
            OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;

            Operand res = context.VectorZero();

            int sizeF = op.Size & 1;

            OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;

            int elems = op.GetBytesCount() >> sizeF + 2;

            for (int index = 0; index < elems; index++)
            {
                Operand de = context.VectorExtract(type, GetVec(op.Rd), index);
                Operand ne = context.VectorExtract(type, GetVec(op.Rn), index);
                Operand me = context.VectorExtract(type, GetVec(op.Rm), op.Index);

                res = context.VectorInsert(res, emit(de, ne, me), index);
            }

            context.Copy(GetVec(op.Rd), res);
        }

        public static void EmitVectorUnaryOpSx(ArmEmitterContext context, Func1I emit)
        {
            OpCodeSimd op = (OpCodeSimd)context.CurrOp;

            Operand res = context.VectorZero();

            int elems = op.GetBytesCount() >> op.Size;

            for (int index = 0; index < elems; index++)
            {
                Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size);

                res = EmitVectorInsert(context, res, emit(ne), index, op.Size);
            }

            context.Copy(GetVec(op.Rd), res);
        }

        public static void EmitVectorBinaryOpSx(ArmEmitterContext context, Func2I emit)
        {
            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;

            Operand res = context.VectorZero();

            int elems = op.GetBytesCount() >> op.Size;

            for (int index = 0; index < elems; index++)
            {
                Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size);
                Operand me = EmitVectorExtractSx(context, op.Rm, index, op.Size);

                res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size);
            }

            context.Copy(GetVec(op.Rd), res);
        }

        public static void EmitVectorTernaryOpSx(ArmEmitterContext context, Func3I emit)
        {
            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;

            Operand res = context.VectorZero();

            int elems = op.GetBytesCount() >> op.Size;

            for (int index = 0; index < elems; index++)
            {
                Operand de = EmitVectorExtractSx(context, op.Rd, index, op.Size);
                Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size);
                Operand me = EmitVectorExtractSx(context, op.Rm, index, op.Size);

                res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size);
            }

            context.Copy(GetVec(op.Rd), res);
        }

        public static void EmitVectorUnaryOpZx(ArmEmitterContext context, Func1I emit)
        {
            OpCodeSimd op = (OpCodeSimd)context.CurrOp;

            Operand res = context.VectorZero();

            int elems = op.GetBytesCount() >> op.Size;

            for (int index = 0; index < elems; index++)
            {
                Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);

                res = EmitVectorInsert(context, res, emit(ne), index, op.Size);
            }

            context.Copy(GetVec(op.Rd), res);
        }

        public static void EmitVectorBinaryOpZx(ArmEmitterContext context, Func2I emit)
        {
            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;

            Operand res = context.VectorZero();

            int elems = op.GetBytesCount() >> op.Size;

            for (int index = 0; index < elems; index++)
            {
                Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
                Operand me = EmitVectorExtractZx(context, op.Rm, index, op.Size);

                res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size);
            }

            context.Copy(GetVec(op.Rd), res);
        }

        public static void EmitVectorTernaryOpZx(ArmEmitterContext context, Func3I emit)
        {
            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;

            Operand res = context.VectorZero();

            int elems = op.GetBytesCount() >> op.Size;

            for (int index = 0; index < elems; index++)
            {
                Operand de = EmitVectorExtractZx(context, op.Rd, index, op.Size);
                Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
                Operand me = EmitVectorExtractZx(context, op.Rm, index, op.Size);

                res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size);
            }

            context.Copy(GetVec(op.Rd), res);
        }

        public static void EmitVectorBinaryOpByElemSx(ArmEmitterContext context, Func2I emit)
        {
            OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp;

            Operand res = context.VectorZero();

            Operand me = EmitVectorExtractSx(context, op.Rm, op.Index, op.Size);

            int elems = op.GetBytesCount() >> op.Size;

            for (int index = 0; index < elems; index++)
            {
                Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size);

                res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size);
            }

            context.Copy(GetVec(op.Rd), res);
        }

        public static void EmitVectorBinaryOpByElemZx(ArmEmitterContext context, Func2I emit)
        {
            OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp;

            Operand res = context.VectorZero();

            Operand me = EmitVectorExtractZx(context, op.Rm, op.Index, op.Size);

            int elems = op.GetBytesCount() >> op.Size;

            for (int index = 0; index < elems; index++)
            {
                Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);

                res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size);
            }

            context.Copy(GetVec(op.Rd), res);
        }

        public static void EmitVectorTernaryOpByElemZx(ArmEmitterContext context, Func3I emit)
        {
            OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp;

            Operand res = context.VectorZero();

            Operand me = EmitVectorExtractZx(context, op.Rm, op.Index, op.Size);

            int elems = op.GetBytesCount() >> op.Size;

            for (int index = 0; index < elems; index++)
            {
                Operand de = EmitVectorExtractZx(context, op.Rd, index, op.Size);
                Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);

                res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size);
            }

            context.Copy(GetVec(op.Rd), res);
        }

        public static void EmitVectorImmUnaryOp(ArmEmitterContext context, Func1I emit)
        {
            OpCodeSimdImm op = (OpCodeSimdImm)context.CurrOp;

            Operand imm = Const(op.Immediate);

            Operand res = context.VectorZero();

            int elems = op.GetBytesCount() >> op.Size;

            for (int index = 0; index < elems; index++)
            {
                res = EmitVectorInsert(context, res, emit(imm), index, op.Size);
            }

            context.Copy(GetVec(op.Rd), res);
        }

        public static void EmitVectorImmBinaryOp(ArmEmitterContext context, Func2I emit)
        {
            OpCodeSimdImm op = (OpCodeSimdImm)context.CurrOp;

            Operand imm = Const(op.Immediate);

            Operand res = context.VectorZero();

            int elems = op.GetBytesCount() >> op.Size;

            for (int index = 0; index < elems; index++)
            {
                Operand de = EmitVectorExtractZx(context, op.Rd, index, op.Size);

                res = EmitVectorInsert(context, res, emit(de, imm), index, op.Size);
            }

            context.Copy(GetVec(op.Rd), res);
        }

        public static void EmitVectorWidenRmBinaryOpSx(ArmEmitterContext context, Func2I emit)
        {
            EmitVectorWidenRmBinaryOp(context, emit, signed: true);
        }

        public static void EmitVectorWidenRmBinaryOpZx(ArmEmitterContext context, Func2I emit)
        {
            EmitVectorWidenRmBinaryOp(context, emit, signed: false);
        }

        private static void EmitVectorWidenRmBinaryOp(ArmEmitterContext context, Func2I emit, bool signed)
        {
            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;

            Operand res = context.VectorZero();

            int elems = 8 >> op.Size;

            int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;

            for (int index = 0; index < elems; index++)
            {
                Operand ne = EmitVectorExtract(context, op.Rn,        index, op.Size + 1, signed);
                Operand me = EmitVectorExtract(context, op.Rm, part + index, op.Size,     signed);

                res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size + 1);
            }

            context.Copy(GetVec(op.Rd), res);
        }

        public static void EmitVectorWidenRnRmBinaryOpSx(ArmEmitterContext context, Func2I emit)
        {
            EmitVectorWidenRnRmBinaryOp(context, emit, signed: true);
        }

        public static void EmitVectorWidenRnRmBinaryOpZx(ArmEmitterContext context, Func2I emit)
        {
            EmitVectorWidenRnRmBinaryOp(context, emit, signed: false);
        }

        private static void EmitVectorWidenRnRmBinaryOp(ArmEmitterContext context, Func2I emit, bool signed)
        {
            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;

            Operand res = context.VectorZero();

            int elems = 8 >> op.Size;

            int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;

            for (int index = 0; index < elems; index++)
            {
                Operand ne = EmitVectorExtract(context, op.Rn, part + index, op.Size, signed);
                Operand me = EmitVectorExtract(context, op.Rm, part + index, op.Size, signed);

                res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size + 1);
            }

            context.Copy(GetVec(op.Rd), res);
        }

        public static void EmitVectorWidenRnRmTernaryOpSx(ArmEmitterContext context, Func3I emit)
        {
            EmitVectorWidenRnRmTernaryOp(context, emit, signed: true);
        }

        public static void EmitVectorWidenRnRmTernaryOpZx(ArmEmitterContext context, Func3I emit)
        {
            EmitVectorWidenRnRmTernaryOp(context, emit, signed: false);
        }

        private static void EmitVectorWidenRnRmTernaryOp(ArmEmitterContext context, Func3I emit, bool signed)
        {
            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;

            Operand res = context.VectorZero();

            int elems = 8 >> op.Size;

            int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;

            for (int index = 0; index < elems; index++)
            {
                Operand de = EmitVectorExtract(context, op.Rd,        index, op.Size + 1, signed);
                Operand ne = EmitVectorExtract(context, op.Rn, part + index, op.Size,     signed);
                Operand me = EmitVectorExtract(context, op.Rm, part + index, op.Size,     signed);

                res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size + 1);
            }

            context.Copy(GetVec(op.Rd), res);
        }

        public static void EmitVectorWidenBinaryOpByElemSx(ArmEmitterContext context, Func2I emit)
        {
            EmitVectorWidenBinaryOpByElem(context, emit, signed: true);
        }

        public static void EmitVectorWidenBinaryOpByElemZx(ArmEmitterContext context, Func2I emit)
        {
            EmitVectorWidenBinaryOpByElem(context, emit, signed: false);
        }

        private static void EmitVectorWidenBinaryOpByElem(ArmEmitterContext context, Func2I emit, bool signed)
        {
            OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp;

            Operand res = context.VectorZero();

            Operand me = EmitVectorExtract(context, op.Rm, op.Index, op.Size, signed);

            int elems = 8 >> op.Size;

            int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;

            for (int index = 0; index < elems; index++)
            {
                Operand ne = EmitVectorExtract(context, op.Rn, part + index, op.Size, signed);

                res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size + 1);
            }

            context.Copy(GetVec(op.Rd), res);
        }

        public static void EmitVectorWidenTernaryOpByElemSx(ArmEmitterContext context, Func3I emit)
        {
            EmitVectorWidenTernaryOpByElem(context, emit, signed: true);
        }

        public static void EmitVectorWidenTernaryOpByElemZx(ArmEmitterContext context, Func3I emit)
        {
            EmitVectorWidenTernaryOpByElem(context, emit, signed: false);
        }

        private static void EmitVectorWidenTernaryOpByElem(ArmEmitterContext context, Func3I emit, bool signed)
        {
            OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp;

            Operand res = context.VectorZero();

            Operand me = EmitVectorExtract(context, op.Rm, op.Index, op.Size, signed);

            int elems = 8 >> op.Size;

            int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;

            for (int index = 0; index < elems; index++)
            {
                Operand de = EmitVectorExtract(context, op.Rd,        index, op.Size + 1, signed);
                Operand ne = EmitVectorExtract(context, op.Rn, part + index, op.Size,     signed);

                res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size + 1);
            }

            context.Copy(GetVec(op.Rd), res);
        }

        public static void EmitVectorPairwiseOpSx(ArmEmitterContext context, Func2I emit)
        {
            EmitVectorPairwiseOp(context, emit, signed: true);
        }

        public static void EmitVectorPairwiseOpZx(ArmEmitterContext context, Func2I emit)
        {
            EmitVectorPairwiseOp(context, emit, signed: false);
        }

        private static void EmitVectorPairwiseOp(ArmEmitterContext context, Func2I emit, bool signed)
        {
            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;

            Operand res = context.VectorZero();

            int pairs = op.GetPairsCount() >> op.Size;

            for (int index = 0; index < pairs; index++)
            {
                int pairIndex = index << 1;

                Operand n0 = EmitVectorExtract(context, op.Rn, pairIndex,     op.Size, signed);
                Operand n1 = EmitVectorExtract(context, op.Rn, pairIndex + 1, op.Size, signed);

                Operand m0 = EmitVectorExtract(context, op.Rm, pairIndex,     op.Size, signed);
                Operand m1 = EmitVectorExtract(context, op.Rm, pairIndex + 1, op.Size, signed);

                res = EmitVectorInsert(context, res, emit(n0, n1),         index, op.Size);
                res = EmitVectorInsert(context, res, emit(m0, m1), pairs + index, op.Size);
            }

            context.Copy(GetVec(op.Rd), res);
        }

        public static void EmitSsse3VectorPairwiseOp(ArmEmitterContext context, Intrinsic[] inst)
        {
            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;

            Operand n = GetVec(op.Rn);
            Operand m = GetVec(op.Rm);

            if (op.RegisterSize == RegisterSize.Simd64)
            {
                Operand zeroEvenMask = X86GetElements(context, ZeroMask, EvenMasks[op.Size]);
                Operand zeroOddMask  = X86GetElements(context, ZeroMask, OddMasks [op.Size]);

                Operand mN = context.AddIntrinsic(Intrinsic.X86Punpcklqdq, n, m); // m:n

                Operand left  = context.AddIntrinsic(Intrinsic.X86Pshufb, mN, zeroEvenMask); // 0:even from m:n
                Operand right = context.AddIntrinsic(Intrinsic.X86Pshufb, mN, zeroOddMask);  // 0:odd  from m:n

                context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst[op.Size], left, right));
            }
            else if (op.Size < 3)
            {
                Operand oddEvenMask = X86GetElements(context, OddMasks[op.Size], EvenMasks[op.Size]);

                Operand oddEvenN = context.AddIntrinsic(Intrinsic.X86Pshufb, n, oddEvenMask); // odd:even from n
                Operand oddEvenM = context.AddIntrinsic(Intrinsic.X86Pshufb, m, oddEvenMask); // odd:even from m

                Operand left  = context.AddIntrinsic(Intrinsic.X86Punpcklqdq, oddEvenN, oddEvenM);
                Operand right = context.AddIntrinsic(Intrinsic.X86Punpckhqdq, oddEvenN, oddEvenM);

                context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst[op.Size], left, right));
            }
            else
            {
                Operand left  = context.AddIntrinsic(Intrinsic.X86Punpcklqdq, n, m);
                Operand right = context.AddIntrinsic(Intrinsic.X86Punpckhqdq, n, m);

                context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst[3], left, right));
            }
        }

        public static void EmitVectorAcrossVectorOpSx(ArmEmitterContext context, Func2I emit)
        {
            EmitVectorAcrossVectorOp(context, emit, signed: true, isLong: false);
        }

        public static void EmitVectorAcrossVectorOpZx(ArmEmitterContext context, Func2I emit)
        {
            EmitVectorAcrossVectorOp(context, emit, signed: false, isLong: false);
        }

        public static void EmitVectorLongAcrossVectorOpSx(ArmEmitterContext context, Func2I emit)
        {
            EmitVectorAcrossVectorOp(context, emit, signed: true, isLong: true);
        }

        public static void EmitVectorLongAcrossVectorOpZx(ArmEmitterContext context, Func2I emit)
        {
            EmitVectorAcrossVectorOp(context, emit, signed: false, isLong: true);
        }

        private static void EmitVectorAcrossVectorOp(
            ArmEmitterContext context,
            Func2I emit,
            bool signed,
            bool isLong)
        {
            OpCodeSimd op = (OpCodeSimd)context.CurrOp;

            int elems = op.GetBytesCount() >> op.Size;

            Operand res = EmitVectorExtract(context, op.Rn, 0, op.Size, signed);

            for (int index = 1; index < elems; index++)
            {
                Operand n = EmitVectorExtract(context, op.Rn, index, op.Size, signed);

                res = emit(res, n);
            }

            int size = isLong ? op.Size + 1 : op.Size;

            Operand d = EmitVectorInsert(context, context.VectorZero(), res, 0, size);

            context.Copy(GetVec(op.Rd), d);
        }

        public static void EmitVectorAcrossVectorOpF(ArmEmitterContext context, Func2I emit)
        {
            OpCodeSimd op = (OpCodeSimd)context.CurrOp;

            Debug.Assert((op.Size & 1) == 0 && op.RegisterSize == RegisterSize.Simd128);

            Operand res = context.VectorExtract(OperandType.FP32, GetVec(op.Rn), 0);

            for (int index = 1; index < 4; index++)
            {
                Operand n = context.VectorExtract(OperandType.FP32, GetVec(op.Rn), index);

                res = emit(res, n);
            }

            Operand d = context.VectorInsert(context.VectorZero(), res, 0);

            context.Copy(GetVec(op.Rd), d);
        }

        public static void EmitSse2VectorAcrossVectorOpF(ArmEmitterContext context, Func2I emit)
        {
            OpCodeSimd op = (OpCodeSimd)context.CurrOp;

            Debug.Assert((op.Size & 1) == 0 && op.RegisterSize == RegisterSize.Simd128);

            const int sm0 = 0 << 6 | 0 << 4 | 0 << 2 | 0 << 0;
            const int sm1 = 1 << 6 | 1 << 4 | 1 << 2 | 1 << 0;
            const int sm2 = 2 << 6 | 2 << 4 | 2 << 2 | 2 << 0;
            const int sm3 = 3 << 6 | 3 << 4 | 3 << 2 | 3 << 0;

            Operand nCopy = context.Copy(GetVec(op.Rn));

            Operand part0 = context.AddIntrinsic(Intrinsic.X86Shufps, nCopy, nCopy, Const(sm0));
            Operand part1 = context.AddIntrinsic(Intrinsic.X86Shufps, nCopy, nCopy, Const(sm1));
            Operand part2 = context.AddIntrinsic(Intrinsic.X86Shufps, nCopy, nCopy, Const(sm2));
            Operand part3 = context.AddIntrinsic(Intrinsic.X86Shufps, nCopy, nCopy, Const(sm3));

            Operand res = emit(emit(part0, part1), emit(part2, part3));

            context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res));
        }

        public static void EmitScalarPairwiseOpF(ArmEmitterContext context, Func2I emit)
        {
            OpCodeSimd op = (OpCodeSimd)context.CurrOp;

            OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32;

            Operand ne0 = context.VectorExtract(type, GetVec(op.Rn), 0);
            Operand ne1 = context.VectorExtract(type, GetVec(op.Rn), 1);

            Operand res = context.VectorInsert(context.VectorZero(), emit(ne0, ne1), 0);

            context.Copy(GetVec(op.Rd), res);
        }

        public static void EmitSse2ScalarPairwiseOpF(ArmEmitterContext context, Func2I emit)
        {
            OpCodeSimd op = (OpCodeSimd)context.CurrOp;

            Operand n = GetVec(op.Rn);

            Operand op0, op1;

            if ((op.Size & 1) == 0)
            {
                const int sm0 = 2 << 6 | 2 << 4 | 2 << 2 | 0 << 0;
                const int sm1 = 2 << 6 | 2 << 4 | 2 << 2 | 1 << 0;

                Operand zeroN = context.VectorZeroUpper64(n);

                op0 = context.AddIntrinsic(Intrinsic.X86Pshufd, zeroN, Const(sm0));
                op1 = context.AddIntrinsic(Intrinsic.X86Pshufd, zeroN, Const(sm1));
            }
            else /* if ((op.Size & 1) == 1) */
            {
                Operand zero = context.VectorZero();

                op0 = context.AddIntrinsic(Intrinsic.X86Movlhps, n, zero);
                op1 = context.AddIntrinsic(Intrinsic.X86Movhlps, zero, n);
            }

            context.Copy(GetVec(op.Rd), emit(op0, op1));
        }

        public static void EmitVectorPairwiseOpF(ArmEmitterContext context, Func2I emit)
        {
            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;

            Operand res = context.VectorZero();

            int sizeF = op.Size & 1;

            OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;

            int pairs = op.GetPairsCount() >> sizeF + 2;

            for (int index = 0; index < pairs; index++)
            {
                int pairIndex = index << 1;

                Operand n0 = context.VectorExtract(type, GetVec(op.Rn), pairIndex);
                Operand n1 = context.VectorExtract(type, GetVec(op.Rn), pairIndex + 1);

                Operand m0 = context.VectorExtract(type, GetVec(op.Rm), pairIndex);
                Operand m1 = context.VectorExtract(type, GetVec(op.Rm), pairIndex + 1);

                res = context.VectorInsert(res, emit(n0, n1),         index);
                res = context.VectorInsert(res, emit(m0, m1), pairs + index);
            }

            context.Copy(GetVec(op.Rd), res);
        }

        public static void EmitSse2VectorPairwiseOpF(ArmEmitterContext context, Func2I emit)
        {
            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;

            Operand nCopy = context.Copy(GetVec(op.Rn));
            Operand mCopy = context.Copy(GetVec(op.Rm));

            int sizeF = op.Size & 1;

            if (sizeF == 0)
            {
                if (op.RegisterSize == RegisterSize.Simd64)
                {
                    Operand unpck = context.AddIntrinsic(Intrinsic.X86Unpcklps, nCopy, mCopy);

                    Operand zero = context.VectorZero();

                    Operand part0 = context.AddIntrinsic(Intrinsic.X86Movlhps, unpck, zero);
                    Operand part1 = context.AddIntrinsic(Intrinsic.X86Movhlps, zero, unpck);

                    context.Copy(GetVec(op.Rd), emit(part0, part1));
                }
                else /* if (op.RegisterSize == RegisterSize.Simd128) */
                {
                    const int sm0 = 2 << 6 | 0 << 4 | 2 << 2 | 0 << 0;
                    const int sm1 = 3 << 6 | 1 << 4 | 3 << 2 | 1 << 0;

                    Operand part0 = context.AddIntrinsic(Intrinsic.X86Shufps, nCopy, mCopy, Const(sm0));
                    Operand part1 = context.AddIntrinsic(Intrinsic.X86Shufps, nCopy, mCopy, Const(sm1));

                    context.Copy(GetVec(op.Rd), emit(part0, part1));
                }
            }
            else /* if (sizeF == 1) */
            {
                Operand part0 = context.AddIntrinsic(Intrinsic.X86Unpcklpd, nCopy, mCopy);
                Operand part1 = context.AddIntrinsic(Intrinsic.X86Unpckhpd, nCopy, mCopy);

                context.Copy(GetVec(op.Rd), emit(part0, part1));
            }
        }

        [Flags]
        public enum Mxcsr
        {
            Ftz = 1 << 15, // Flush To Zero.
            Um  = 1 << 11, // Underflow Mask.
            Dm  = 1 << 8,  // Denormal Mask.
            Daz = 1 << 6   // Denormals Are Zero.
        }

        public static void EmitSseOrAvxEnterFtzAndDazModesOpF(ArmEmitterContext context, out Operand isTrue)
        {
            isTrue = GetFpFlag(FPState.FzFlag);

            Operand lblTrue = Label();
            context.BranchIfFalse(lblTrue, isTrue);

            context.AddIntrinsicNoRet(Intrinsic.X86Mxcsrmb, Const((int)(Mxcsr.Ftz | Mxcsr.Um | Mxcsr.Dm | Mxcsr.Daz)));

            context.MarkLabel(lblTrue);
        }

        public static void EmitSseOrAvxExitFtzAndDazModesOpF(ArmEmitterContext context, Operand isTrue = default)
        {
            isTrue = isTrue == default ? GetFpFlag(FPState.FzFlag) : isTrue;

            Operand lblTrue = Label();
            context.BranchIfFalse(lblTrue, isTrue);

            context.AddIntrinsicNoRet(Intrinsic.X86Mxcsrub, Const((int)(Mxcsr.Ftz | Mxcsr.Daz)));

            context.MarkLabel(lblTrue);
        }

        public enum CmpCondition
        {
            // Legacy Sse.
            Equal              = 0, // Ordered, non-signaling.
            LessThan           = 1, // Ordered, signaling.
            LessThanOrEqual    = 2, // Ordered, signaling.
            UnorderedQ         = 3, // Non-signaling.
            NotLessThan        = 5, // Unordered, signaling.
            NotLessThanOrEqual = 6, // Unordered, signaling.
            OrderedQ           = 7, // Non-signaling.

            // Vex.
            GreaterThanOrEqual = 13, // Ordered, signaling.
            GreaterThan        = 14, // Ordered, signaling.
            OrderedS           = 23  // Signaling.
        }

        [Flags]
        public enum SaturatingFlags
        {
            None = 0,

            ByElem = 1 << 0,
            Scalar = 1 << 1,
            Signed = 1 << 2,

            Add = 1 << 3,
            Sub = 1 << 4,

            Accumulate = 1 << 5
        }

        public static void EmitScalarSaturatingUnaryOpSx(ArmEmitterContext context, Func1I emit)
        {
            EmitSaturatingUnaryOpSx(context, emit, SaturatingFlags.Scalar | SaturatingFlags.Signed);
        }

        public static void EmitVectorSaturatingUnaryOpSx(ArmEmitterContext context, Func1I emit)
        {
            EmitSaturatingUnaryOpSx(context, emit, SaturatingFlags.Signed);
        }

        public static void EmitSaturatingUnaryOpSx(ArmEmitterContext context, Func1I emit, SaturatingFlags flags)
        {
            OpCodeSimd op = (OpCodeSimd)context.CurrOp;

            Operand res = context.VectorZero();

            bool scalar = (flags & SaturatingFlags.Scalar) != 0;

            int elems = !scalar ? op.GetBytesCount() >> op.Size : 1;

            for (int index = 0; index < elems; index++)
            {
                Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size);
                Operand de;

                if (op.Size <= 2)
                {
                    de = EmitSignedSrcSatQ(context, emit(ne), op.Size, signedDst: true);
                }
                else /* if (op.Size == 3) */
                {
                    de = EmitUnarySignedSatQAbsOrNeg(context, emit(ne));
                }

                res = EmitVectorInsert(context, res, de, index, op.Size);
            }

            context.Copy(GetVec(op.Rd), res);
        }

        public static void EmitScalarSaturatingBinaryOpSx(ArmEmitterContext context, Func2I emit = null, SaturatingFlags flags = SaturatingFlags.None)
        {
            EmitSaturatingBinaryOp(context, emit, SaturatingFlags.Scalar | SaturatingFlags.Signed | flags);
        }

        public static void EmitScalarSaturatingBinaryOpZx(ArmEmitterContext context, SaturatingFlags flags)
        {
            EmitSaturatingBinaryOp(context, null, SaturatingFlags.Scalar | flags);
        }

        public static void EmitVectorSaturatingBinaryOpSx(ArmEmitterContext context, Func2I emit = null, SaturatingFlags flags = SaturatingFlags.None)
        {
            EmitSaturatingBinaryOp(context, emit, SaturatingFlags.Signed | flags);
        }

        public static void EmitVectorSaturatingBinaryOpZx(ArmEmitterContext context, SaturatingFlags flags)
        {
            EmitSaturatingBinaryOp(context, null, flags);
        }

        public static void EmitVectorSaturatingBinaryOpByElemSx(ArmEmitterContext context, Func2I emit)
        {
            EmitSaturatingBinaryOp(context, emit, SaturatingFlags.ByElem | SaturatingFlags.Signed);
        }

        public static void EmitSaturatingBinaryOp(ArmEmitterContext context, Func2I emit, SaturatingFlags flags)
        {
            OpCodeSimd op = (OpCodeSimd)context.CurrOp;

            Operand res = context.VectorZero();

            bool byElem = (flags & SaturatingFlags.ByElem) != 0;
            bool scalar = (flags & SaturatingFlags.Scalar) != 0;
            bool signed = (flags & SaturatingFlags.Signed) != 0;

            bool add = (flags & SaturatingFlags.Add) != 0;
            bool sub = (flags & SaturatingFlags.Sub) != 0;

            bool accumulate = (flags & SaturatingFlags.Accumulate) != 0;

            int elems = !scalar ? op.GetBytesCount() >> op.Size : 1;

            if (add || sub)
            {
                for (int index = 0; index < elems; index++)
                {
                    Operand de;
                    Operand ne = EmitVectorExtract(context, op.Rn, index, op.Size, signed);
                    Operand me = EmitVectorExtract(context, ((OpCodeSimdReg)op).Rm, index, op.Size, signed);

                    if (op.Size <= 2)
                    {
                        Operand temp = add ? context.Add(ne, me) : context.Subtract(ne, me);

                        de = EmitSignedSrcSatQ(context, temp, op.Size, signedDst: signed);
                    }
                    else /* if (op.Size == 3) */
                    {
                        if (add)
                        {
                            de = signed ? EmitBinarySignedSatQAdd(context, ne, me) : EmitBinaryUnsignedSatQAdd(context, ne, me);
                        }
                        else /* if (sub) */
                        {
                            de = signed ? EmitBinarySignedSatQSub(context, ne, me) : EmitBinaryUnsignedSatQSub(context, ne, me);
                        }
                    }

                    res = EmitVectorInsert(context, res, de, index, op.Size);
                }
            }
            else if (accumulate)
            {
                for (int index = 0; index < elems; index++)
                {
                    Operand de;
                    Operand ne = EmitVectorExtract(context, op.Rn, index, op.Size, !signed);
                    Operand me = EmitVectorExtract(context, op.Rd, index, op.Size,  signed);

                    if (op.Size <= 2)
                    {
                        Operand temp = context.Add(ne, me);

                        de = EmitSignedSrcSatQ(context, temp, op.Size, signedDst: signed);
                    }
                    else /* if (op.Size == 3) */
                    {
                        de = signed ? EmitBinarySignedSatQAcc(context, ne, me) : EmitBinaryUnsignedSatQAcc(context, ne, me);
                    }

                    res = EmitVectorInsert(context, res, de, index, op.Size);
                }
            }
            else
            {
                Operand me = default;

                if (byElem)
                {
                    OpCodeSimdRegElem opRegElem = (OpCodeSimdRegElem)op;

                    me = EmitVectorExtract(context, opRegElem.Rm, opRegElem.Index, op.Size, signed);
                }

                for (int index = 0; index < elems; index++)
                {
                    Operand ne = EmitVectorExtract(context, op.Rn, index, op.Size, signed);

                    if (!byElem)
                    {
                        me = EmitVectorExtract(context, ((OpCodeSimdReg)op).Rm, index, op.Size, signed);
                    }

                    Operand de = EmitSignedSrcSatQ(context, emit(ne, me), op.Size, signedDst: signed);

                    res = EmitVectorInsert(context, res, de, index, op.Size);
                }
            }

            context.Copy(GetVec(op.Rd), res);
        }

        [Flags]
        public enum SaturatingNarrowFlags
        {
            Scalar    = 1 << 0,
            SignedSrc = 1 << 1,
            SignedDst = 1 << 2,

            ScalarSxSx = Scalar | SignedSrc | SignedDst,
            ScalarSxZx = Scalar | SignedSrc,
            ScalarZxZx = Scalar,

            VectorSxSx = SignedSrc | SignedDst,
            VectorSxZx = SignedSrc,
            VectorZxZx = 0
        }

        public static void EmitSaturatingNarrowOp(ArmEmitterContext context, SaturatingNarrowFlags flags)
        {
            OpCodeSimd op = (OpCodeSimd)context.CurrOp;

            bool scalar    = (flags & SaturatingNarrowFlags.Scalar)    != 0;
            bool signedSrc = (flags & SaturatingNarrowFlags.SignedSrc) != 0;
            bool signedDst = (flags & SaturatingNarrowFlags.SignedDst) != 0;

            int elems = !scalar ? 8 >> op.Size : 1;

            int part = !scalar && (op.RegisterSize == RegisterSize.Simd128) ? elems : 0;

            Operand d = GetVec(op.Rd);

            Operand res = part == 0 ? context.VectorZero() : context.Copy(d);

            for (int index = 0; index < elems; index++)
            {
                Operand ne = EmitVectorExtract(context, op.Rn, index, op.Size + 1, signedSrc);

                Operand temp = signedSrc
                    ? EmitSignedSrcSatQ(context, ne, op.Size, signedDst)
                    : EmitUnsignedSrcSatQ(context, ne, op.Size, signedDst);

                res = EmitVectorInsert(context, res, temp, part + index, op.Size);
            }

            context.Copy(d, res);
        }

        // long SignedSignSatQ(long op, int size);
        public static Operand EmitSignedSignSatQ(ArmEmitterContext context, Operand op, int size)
        {
            int eSize = 8 << size;

            Debug.Assert(op.Type == OperandType.I64);
            Debug.Assert(eSize == 8 || eSize == 16 || eSize == 32 || eSize == 64);

            Operand lbl1 = Label();
            Operand lblEnd = Label();

            Operand zeroL = Const(0L);
            Operand maxT = Const((1L << (eSize - 1)) - 1L);
            Operand minT = Const(-(1L << (eSize - 1)));

            Operand res = context.Copy(context.AllocateLocal(OperandType.I64), zeroL);

            context.BranchIf(lbl1, op, zeroL, Comparison.LessOrEqual);
            context.Copy(res, maxT);
            SetFpFlag(context, FPState.QcFlag, Const(1));
            context.Branch(lblEnd);

            context.MarkLabel(lbl1);
            context.BranchIf(lblEnd, op, zeroL, Comparison.GreaterOrEqual);
            context.Copy(res, minT);
            SetFpFlag(context, FPState.QcFlag, Const(1));
            context.Branch(lblEnd);

            context.MarkLabel(lblEnd);

            return res;
        }

        // private static ulong UnsignedSignSatQ(ulong op, int size);
        public static Operand EmitUnsignedSignSatQ(ArmEmitterContext context, Operand op, int size)
        {
            int eSize = 8 << size;

            Debug.Assert(op.Type == OperandType.I64);
            Debug.Assert(eSize == 8 || eSize == 16 || eSize == 32 || eSize == 64);

            Operand lblEnd = Label();

            Operand zeroUL = Const(0UL);
            Operand maxT = Const(ulong.MaxValue >> (64 - eSize));

            Operand res = context.Copy(context.AllocateLocal(OperandType.I64), zeroUL);

            context.BranchIf(lblEnd, op, zeroUL, Comparison.LessOrEqualUI);
            context.Copy(res, maxT);
            SetFpFlag(context, FPState.QcFlag, Const(1));
            context.Branch(lblEnd);

            context.MarkLabel(lblEnd);

            return res;
        }

        // TSrc (16bit, 32bit, 64bit; signed) > TDst (8bit, 16bit, 32bit; signed, unsigned).
        // long SignedSrcSignedDstSatQ(long op, int size); ulong SignedSrcUnsignedDstSatQ(long op, int size);
        public static Operand EmitSignedSrcSatQ(ArmEmitterContext context, Operand op, int sizeDst, bool signedDst)
        {
            int eSizeDst = 8 << sizeDst;

            Debug.Assert(op.Type == OperandType.I64);
            Debug.Assert(eSizeDst == 8 || eSizeDst == 16 || eSizeDst == 32);

            Operand lbl1 = Label();
            Operand lblEnd = Label();

            Operand maxT = signedDst ? Const((1L << (eSizeDst - 1)) - 1L) : Const((1UL << eSizeDst) - 1UL);
            Operand minT = signedDst ? Const(-(1L << (eSizeDst - 1))) : Const(0UL);

            Operand res = context.Copy(context.AllocateLocal(OperandType.I64), op);

            context.BranchIf(lbl1, op, maxT, Comparison.LessOrEqual);
            context.Copy(res, maxT);
            SetFpFlag(context, FPState.QcFlag, Const(1));
            context.Branch(lblEnd);

            context.MarkLabel(lbl1);
            context.BranchIf(lblEnd, op, minT, Comparison.GreaterOrEqual);
            context.Copy(res, minT);
            SetFpFlag(context, FPState.QcFlag, Const(1));
            context.Branch(lblEnd);

            context.MarkLabel(lblEnd);

            return res;
        }

        // TSrc (16bit, 32bit, 64bit; unsigned) > TDst (8bit, 16bit, 32bit; signed, unsigned).
        // long UnsignedSrcSignedDstSatQ(ulong op, int size); ulong UnsignedSrcUnsignedDstSatQ(ulong op, int size);
        public static Operand EmitUnsignedSrcSatQ(ArmEmitterContext context, Operand op, int sizeDst, bool signedDst)
        {
            int eSizeDst = 8 << sizeDst;

            Debug.Assert(op.Type == OperandType.I64);
            Debug.Assert(eSizeDst == 8 || eSizeDst == 16 || eSizeDst == 32);

            Operand lblEnd = Label();

            Operand maxT = signedDst ? Const((1L << (eSizeDst - 1)) - 1L) : Const((1UL << eSizeDst) - 1UL);

            Operand res = context.Copy(context.AllocateLocal(OperandType.I64), op);

            context.BranchIf(lblEnd, op, maxT, Comparison.LessOrEqualUI);
            context.Copy(res, maxT);
            SetFpFlag(context, FPState.QcFlag, Const(1));
            context.Branch(lblEnd);

            context.MarkLabel(lblEnd);

            return res;
        }

        // long UnarySignedSatQAbsOrNeg(long op);
        private static Operand EmitUnarySignedSatQAbsOrNeg(ArmEmitterContext context, Operand op)
        {
            Debug.Assert(op.Type == OperandType.I64);

            Operand lblEnd = Label();

            Operand minL = Const(long.MinValue);
            Operand maxL = Const(long.MaxValue);

            Operand res = context.Copy(context.AllocateLocal(OperandType.I64), op);

            context.BranchIf(lblEnd, op, minL, Comparison.NotEqual);
            context.Copy(res, maxL);
            SetFpFlag(context, FPState.QcFlag, Const(1));
            context.Branch(lblEnd);

            context.MarkLabel(lblEnd);

            return res;
        }

        // long BinarySignedSatQAdd(long op1, long op2);
        public static Operand EmitBinarySignedSatQAdd(ArmEmitterContext context, Operand op1, Operand op2)
        {
            Debug.Assert(op1.Type == OperandType.I64 && op2.Type == OperandType.I64);

            Operand lblEnd = Label();

            Operand minL = Const(long.MinValue);
            Operand maxL = Const(long.MaxValue);
            Operand zeroL = Const(0L);

            Operand add = context.Add(op1, op2);
            Operand res = context.Copy(context.AllocateLocal(OperandType.I64), add);

            Operand left = context.BitwiseNot(context.BitwiseExclusiveOr(op1, op2));
            Operand right = context.BitwiseExclusiveOr(op1, add);
            context.BranchIf(lblEnd, context.BitwiseAnd(left, right), zeroL, Comparison.GreaterOrEqual);

            Operand isPositive = context.ICompareGreaterOrEqual(op1, zeroL);
            context.Copy(res, context.ConditionalSelect(isPositive, maxL, minL));
            SetFpFlag(context, FPState.QcFlag, Const(1));
            context.Branch(lblEnd);

            context.MarkLabel(lblEnd);

            return res;
        }

        // ulong BinaryUnsignedSatQAdd(ulong op1, ulong op2);
        public static Operand EmitBinaryUnsignedSatQAdd(ArmEmitterContext context, Operand op1, Operand op2)
        {
            Debug.Assert(op1.Type == OperandType.I64 && op2.Type == OperandType.I64);

            Operand lblEnd = Label();

            Operand maxUL = Const(ulong.MaxValue);

            Operand add = context.Add(op1, op2);
            Operand res = context.Copy(context.AllocateLocal(OperandType.I64), add);

            context.BranchIf(lblEnd, add, op1, Comparison.GreaterOrEqualUI);
            context.Copy(res, maxUL);
            SetFpFlag(context, FPState.QcFlag, Const(1));
            context.Branch(lblEnd);

            context.MarkLabel(lblEnd);

            return res;
        }

        // long BinarySignedSatQSub(long op1, long op2);
        public static Operand EmitBinarySignedSatQSub(ArmEmitterContext context, Operand op1, Operand op2)
        {
            Debug.Assert(op1.Type == OperandType.I64 && op2.Type == OperandType.I64);

            Operand lblEnd = Label();

            Operand minL = Const(long.MinValue);
            Operand maxL = Const(long.MaxValue);
            Operand zeroL = Const(0L);

            Operand sub = context.Subtract(op1, op2);
            Operand res = context.Copy(context.AllocateLocal(OperandType.I64), sub);

            Operand left = context.BitwiseExclusiveOr(op1, op2);
            Operand right = context.BitwiseExclusiveOr(op1, sub);
            context.BranchIf(lblEnd, context.BitwiseAnd(left, right), zeroL, Comparison.GreaterOrEqual);

            Operand isPositive = context.ICompareGreaterOrEqual(op1, zeroL);
            context.Copy(res, context.ConditionalSelect(isPositive, maxL, minL));
            SetFpFlag(context, FPState.QcFlag, Const(1));
            context.Branch(lblEnd);

            context.MarkLabel(lblEnd);

            return res;
        }

        // ulong BinaryUnsignedSatQSub(ulong op1, ulong op2);
        public static Operand EmitBinaryUnsignedSatQSub(ArmEmitterContext context, Operand op1, Operand op2)
        {
            Debug.Assert(op1.Type == OperandType.I64 && op2.Type == OperandType.I64);

            Operand lblEnd = Label();

            Operand zeroL = Const(0L);

            Operand sub = context.Subtract(op1, op2);
            Operand res = context.Copy(context.AllocateLocal(OperandType.I64), sub);

            context.BranchIf(lblEnd, op1, op2, Comparison.GreaterOrEqualUI);
            context.Copy(res, zeroL);
            SetFpFlag(context, FPState.QcFlag, Const(1));
            context.Branch(lblEnd);

            context.MarkLabel(lblEnd);

            return res;
        }

        // long BinarySignedSatQAcc(ulong op1, long op2);
        private static Operand EmitBinarySignedSatQAcc(ArmEmitterContext context, Operand op1, Operand op2)
        {
            Debug.Assert(op1.Type == OperandType.I64 && op2.Type == OperandType.I64);

            Operand lbl1 = Label();
            Operand lbl2 = Label();
            Operand lblEnd = Label();

            Operand maxL = Const(long.MaxValue);
            Operand zeroL = Const(0L);

            Operand add = context.Add(op1, op2);
            Operand res = context.Copy(context.AllocateLocal(OperandType.I64), add);

            context.BranchIf(lbl1, op1, maxL, Comparison.GreaterUI);
            Operand notOp2AndRes = context.BitwiseAnd(context.BitwiseNot(op2), add);
            context.BranchIf(lblEnd, notOp2AndRes, zeroL, Comparison.GreaterOrEqual);
            context.Copy(res, maxL);
            SetFpFlag(context, FPState.QcFlag, Const(1));
            context.Branch(lblEnd);

            context.MarkLabel(lbl1);
            context.BranchIf(lbl2, op2, zeroL, Comparison.Less);
            context.Copy(res, maxL);
            SetFpFlag(context, FPState.QcFlag, Const(1));
            context.Branch(lblEnd);

            context.MarkLabel(lbl2);
            context.BranchIf(lblEnd, add, maxL, Comparison.LessOrEqualUI);
            context.Copy(res, maxL);
            SetFpFlag(context, FPState.QcFlag, Const(1));
            context.Branch(lblEnd);

            context.MarkLabel(lblEnd);

            return res;
        }

        // ulong BinaryUnsignedSatQAcc(long op1, ulong op2);
        private static Operand EmitBinaryUnsignedSatQAcc(ArmEmitterContext context, Operand op1, Operand op2)
        {
            Debug.Assert(op1.Type == OperandType.I64 && op2.Type == OperandType.I64);

            Operand lbl1 = Label();
            Operand lblEnd = Label();

            Operand maxUL = Const(ulong.MaxValue);
            Operand maxL = Const(long.MaxValue);
            Operand zeroL = Const(0L);

            Operand add = context.Add(op1, op2);
            Operand res = context.Copy(context.AllocateLocal(OperandType.I64), add);

            context.BranchIf(lbl1, op1, zeroL, Comparison.Less);
            context.BranchIf(lblEnd, add, op1, Comparison.GreaterOrEqualUI);
            context.Copy(res, maxUL);
            SetFpFlag(context, FPState.QcFlag, Const(1));
            context.Branch(lblEnd);

            context.MarkLabel(lbl1);
            context.BranchIf(lblEnd, op2, maxL, Comparison.GreaterUI);
            context.BranchIf(lblEnd, add, zeroL, Comparison.GreaterOrEqual);
            context.Copy(res, zeroL);
            SetFpFlag(context, FPState.QcFlag, Const(1));
            context.Branch(lblEnd);

            context.MarkLabel(lblEnd);

            return res;
        }

        public static Operand EmitFloatAbs(ArmEmitterContext context, Operand value, bool single, bool vector)
        {
            Operand mask;
            if (single)
            {
                mask = vector ? X86GetAllElements(context, -0f) : X86GetScalar(context, -0f);
            }
            else
            {
                mask = vector ? X86GetAllElements(context, -0d) : X86GetScalar(context, -0d);
            }

            return context.AddIntrinsic(single ? Intrinsic.X86Andnps : Intrinsic.X86Andnpd, mask, value);
        }

        public static Operand EmitVectorExtractSx(ArmEmitterContext context, int reg, int index, int size)
        {
            return EmitVectorExtract(context, reg, index, size, true);
        }

        public static Operand EmitVectorExtractZx(ArmEmitterContext context, int reg, int index, int size)
        {
            return EmitVectorExtract(context, reg, index, size, false);
        }

        public static Operand EmitVectorExtract(ArmEmitterContext context, int reg, int index, int size, bool signed)
        {
            ThrowIfInvalid(index, size);

            Operand res = default;

            switch (size)
            {
                case 0:
                    res = context.VectorExtract8(GetVec(reg), index);
                    break;

                case 1:
                    res = context.VectorExtract16(GetVec(reg), index);
                    break;

                case 2:
                    res = context.VectorExtract(OperandType.I32, GetVec(reg), index);
                    break;

                case 3:
                    res = context.VectorExtract(OperandType.I64, GetVec(reg), index);
                    break;
            }

            if (signed)
            {
                switch (size)
                {
                    case 0: res = context.SignExtend8 (OperandType.I64, res); break;
                    case 1: res = context.SignExtend16(OperandType.I64, res); break;
                    case 2: res = context.SignExtend32(OperandType.I64, res); break;
                }
            }
            else
            {
                switch (size)
                {
                    case 0: res = context.ZeroExtend8 (OperandType.I64, res); break;
                    case 1: res = context.ZeroExtend16(OperandType.I64, res); break;
                    case 2: res = context.ZeroExtend32(OperandType.I64, res); break;
                }
            }

            return res;
        }

        public static Operand EmitVectorInsert(ArmEmitterContext context, Operand vector, Operand value, int index, int size)
        {
            ThrowIfInvalid(index, size);

            if (size < 3 && value.Type == OperandType.I64)
            {
                value = context.ConvertI64ToI32(value);
            }

            switch (size)
            {
                case 0: vector = context.VectorInsert8 (vector, value, index); break;
                case 1: vector = context.VectorInsert16(vector, value, index); break;
                case 2: vector = context.VectorInsert  (vector, value, index); break;
                case 3: vector = context.VectorInsert  (vector, value, index); break;
            }

            return vector;
        }

        public static void ThrowIfInvalid(int index, int size)
        {
            if ((uint)size > 3u)
            {
                throw new ArgumentOutOfRangeException(nameof(size));
            }

            if ((uint)index >= 16u >> size)
            {
                throw new ArgumentOutOfRangeException(nameof(index));
            }
        }
    }
}