diff options
author | LDj3SNuD <35856442+LDj3SNuD@users.noreply.github.com> | 2018-08-20 06:20:26 +0200 |
---|---|---|
committer | gdkchan <gab.dark.100@gmail.com> | 2018-08-20 01:20:26 -0300 |
commit | d021d5dfa9d884160625c273c7f54ffbbeb08802 (patch) | |
tree | cae5e3988767476b46d20228366080a09ea56d10 | |
parent | 726de8c46ab10f1b0684fe14bca1ca96ba6d2832 (diff) |
Add AESD, AESE, AESIMC, AESMC instructions; add 4 simple Tests (closed box). (#365)
* Create CpuTestSimdCrypto.cs
* Update AOpCodeTable.cs
* Create AInstEmitSimdCrypto.cs
* Update ASoftFallback.cs
* Create ACryptoHelper.cs
-rw-r--r-- | ChocolArm64/AOpCodeTable.cs | 4 | ||||
-rw-r--r-- | ChocolArm64/Instruction/ACryptoHelper.cs | 328 | ||||
-rw-r--r-- | ChocolArm64/Instruction/AInstEmitSimdCrypto.cs | 54 | ||||
-rw-r--r-- | ChocolArm64/Instruction/ASoftFallback.cs | 36 | ||||
-rw-r--r-- | Ryujinx.Tests/Cpu/CpuTestSimdCrypto.cs | 135 |
5 files changed, 557 insertions, 0 deletions
diff --git a/ChocolArm64/AOpCodeTable.cs b/ChocolArm64/AOpCodeTable.cs index 97404bbc..dc8cfc08 100644 --- a/ChocolArm64/AOpCodeTable.cs +++ b/ChocolArm64/AOpCodeTable.cs @@ -180,6 +180,10 @@ namespace ChocolArm64 SetA64("0>001110<<1xxxxx101111xxxxxxxxxx", AInstEmit.Addp_V, typeof(AOpCodeSimdReg)); SetA64("000011100x110001101110xxxxxxxxxx", AInstEmit.Addv_V, typeof(AOpCodeSimd)); SetA64("01001110<<110001101110xxxxxxxxxx", AInstEmit.Addv_V, typeof(AOpCodeSimd)); + SetA64("0100111000101000010110xxxxxxxxxx", AInstEmit.Aesd_V, typeof(AOpCodeSimd)); + SetA64("0100111000101000010010xxxxxxxxxx", AInstEmit.Aese_V, typeof(AOpCodeSimd)); + SetA64("0100111000101000011110xxxxxxxxxx", AInstEmit.Aesimc_V, typeof(AOpCodeSimd)); + SetA64("0100111000101000011010xxxxxxxxxx", AInstEmit.Aesmc_V, typeof(AOpCodeSimd)); SetA64("0x001110001xxxxx000111xxxxxxxxxx", AInstEmit.And_V, typeof(AOpCodeSimdReg)); SetA64("0x001110011xxxxx000111xxxxxxxxxx", AInstEmit.Bic_V, typeof(AOpCodeSimdReg)); SetA64("0x10111100000xxx<<x101xxxxxxxxxx", AInstEmit.Bic_Vi, typeof(AOpCodeSimdImm)); diff --git a/ChocolArm64/Instruction/ACryptoHelper.cs b/ChocolArm64/Instruction/ACryptoHelper.cs new file mode 100644 index 00000000..2dc65972 --- /dev/null +++ b/ChocolArm64/Instruction/ACryptoHelper.cs @@ -0,0 +1,328 @@ +// https://www.intel.com/content/dam/doc/white-paper/advanced-encryption-standard-new-instructions-set-paper.pdf + +using System; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; + +namespace ChocolArm64.Instruction +{ + static class ACryptoHelper + { +#region "LookUp Tables" + private static byte[] SBox = + { + 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76, + 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0, + 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15, + 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75, + 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84, + 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf, + 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8, + 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2, + 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73, + 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb, + 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79, + 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08, + 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a, + 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e, + 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf, + 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 + }; + + private static byte[] InvSBox = + { + 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb, + 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb, + 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e, + 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25, + 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92, + 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84, + 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06, + 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b, + 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73, + 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e, + 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b, + 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4, + 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f, + 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef, + 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61, + 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d + }; + + private static byte[] GFMul_02 = + { + 0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e, + 0x20, 0x22, 0x24, 0x26, 0x28, 0x2a, 0x2c, 0x2e, 0x30, 0x32, 0x34, 0x36, 0x38, 0x3a, 0x3c, 0x3e, + 0x40, 0x42, 0x44, 0x46, 0x48, 0x4a, 0x4c, 0x4e, 0x50, 0x52, 0x54, 0x56, 0x58, 0x5a, 0x5c, 0x5e, + 0x60, 0x62, 0x64, 0x66, 0x68, 0x6a, 0x6c, 0x6e, 0x70, 0x72, 0x74, 0x76, 0x78, 0x7a, 0x7c, 0x7e, + 0x80, 0x82, 0x84, 0x86, 0x88, 0x8a, 0x8c, 0x8e, 0x90, 0x92, 0x94, 0x96, 0x98, 0x9a, 0x9c, 0x9e, + 0xa0, 0xa2, 0xa4, 0xa6, 0xa8, 0xaa, 0xac, 0xae, 0xb0, 0xb2, 0xb4, 0xb6, 0xb8, 0xba, 0xbc, 0xbe, + 0xc0, 0xc2, 0xc4, 0xc6, 0xc8, 0xca, 0xcc, 0xce, 0xd0, 0xd2, 0xd4, 0xd6, 0xd8, 0xda, 0xdc, 0xde, + 0xe0, 0xe2, 0xe4, 0xe6, 0xe8, 0xea, 0xec, 0xee, 0xf0, 0xf2, 0xf4, 0xf6, 0xf8, 0xfa, 0xfc, 0xfe, + 0x1b, 0x19, 0x1f, 0x1d, 0x13, 0x11, 0x17, 0x15, 0x0b, 0x09, 0x0f, 0x0d, 0x03, 0x01, 0x07, 0x05, + 0x3b, 0x39, 0x3f, 0x3d, 0x33, 0x31, 0x37, 0x35, 0x2b, 0x29, 0x2f, 0x2d, 0x23, 0x21, 0x27, 0x25, + 0x5b, 0x59, 0x5f, 0x5d, 0x53, 0x51, 0x57, 0x55, 0x4b, 0x49, 0x4f, 0x4d, 0x43, 0x41, 0x47, 0x45, + 0x7b, 0x79, 0x7f, 0x7d, 0x73, 0x71, 0x77, 0x75, 0x6b, 0x69, 0x6f, 0x6d, 0x63, 0x61, 0x67, 0x65, + 0x9b, 0x99, 0x9f, 0x9d, 0x93, 0x91, 0x97, 0x95, 0x8b, 0x89, 0x8f, 0x8d, 0x83, 0x81, 0x87, 0x85, + 0xbb, 0xb9, 0xbf, 0xbd, 0xb3, 0xb1, 0xb7, 0xb5, 0xab, 0xa9, 0xaf, 0xad, 0xa3, 0xa1, 0xa7, 0xa5, + 0xdb, 0xd9, 0xdf, 0xdd, 0xd3, 0xd1, 0xd7, 0xd5, 0xcb, 0xc9, 0xcf, 0xcd, 0xc3, 0xc1, 0xc7, 0xc5, + 0xfb, 0xf9, 0xff, 0xfd, 0xf3, 0xf1, 0xf7, 0xf5, 0xeb, 0xe9, 0xef, 0xed, 0xe3, 0xe1, 0xe7, 0xe5 + }; + + private static byte[] GFMul_03 = + { + 0x00, 0x03, 0x06, 0x05, 0x0c, 0x0f, 0x0a, 0x09, 0x18, 0x1b, 0x1e, 0x1d, 0x14, 0x17, 0x12, 0x11, + 0x30, 0x33, 0x36, 0x35, 0x3c, 0x3f, 0x3a, 0x39, 0x28, 0x2b, 0x2e, 0x2d, 0x24, 0x27, 0x22, 0x21, + 0x60, 0x63, 0x66, 0x65, 0x6c, 0x6f, 0x6a, 0x69, 0x78, 0x7b, 0x7e, 0x7d, 0x74, 0x77, 0x72, 0x71, + 0x50, 0x53, 0x56, 0x55, 0x5c, 0x5f, 0x5a, 0x59, 0x48, 0x4b, 0x4e, 0x4d, 0x44, 0x47, 0x42, 0x41, + 0xc0, 0xc3, 0xc6, 0xc5, 0xcc, 0xcf, 0xca, 0xc9, 0xd8, 0xdb, 0xde, 0xdd, 0xd4, 0xd7, 0xd2, 0xd1, + 0xf0, 0xf3, 0xf6, 0xf5, 0xfc, 0xff, 0xfa, 0xf9, 0xe8, 0xeb, 0xee, 0xed, 0xe4, 0xe7, 0xe2, 0xe1, + 0xa0, 0xa3, 0xa6, 0xa5, 0xac, 0xaf, 0xaa, 0xa9, 0xb8, 0xbb, 0xbe, 0xbd, 0xb4, 0xb7, 0xb2, 0xb1, + 0x90, 0x93, 0x96, 0x95, 0x9c, 0x9f, 0x9a, 0x99, 0x88, 0x8b, 0x8e, 0x8d, 0x84, 0x87, 0x82, 0x81, + 0x9b, 0x98, 0x9d, 0x9e, 0x97, 0x94, 0x91, 0x92, 0x83, 0x80, 0x85, 0x86, 0x8f, 0x8c, 0x89, 0x8a, + 0xab, 0xa8, 0xad, 0xae, 0xa7, 0xa4, 0xa1, 0xa2, 0xb3, 0xb0, 0xb5, 0xb6, 0xbf, 0xbc, 0xb9, 0xba, + 0xfb, 0xf8, 0xfd, 0xfe, 0xf7, 0xf4, 0xf1, 0xf2, 0xe3, 0xe0, 0xe5, 0xe6, 0xef, 0xec, 0xe9, 0xea, + 0xcb, 0xc8, 0xcd, 0xce, 0xc7, 0xc4, 0xc1, 0xc2, 0xd3, 0xd0, 0xd5, 0xd6, 0xdf, 0xdc, 0xd9, 0xda, + 0x5b, 0x58, 0x5d, 0x5e, 0x57, 0x54, 0x51, 0x52, 0x43, 0x40, 0x45, 0x46, 0x4f, 0x4c, 0x49, 0x4a, + 0x6b, 0x68, 0x6d, 0x6e, 0x67, 0x64, 0x61, 0x62, 0x73, 0x70, 0x75, 0x76, 0x7f, 0x7c, 0x79, 0x7a, + 0x3b, 0x38, 0x3d, 0x3e, 0x37, 0x34, 0x31, 0x32, 0x23, 0x20, 0x25, 0x26, 0x2f, 0x2c, 0x29, 0x2a, + 0x0b, 0x08, 0x0d, 0x0e, 0x07, 0x04, 0x01, 0x02, 0x13, 0x10, 0x15, 0x16, 0x1f, 0x1c, 0x19, 0x1a + }; + + private static byte[] GFMul_09 = + { + 0x00, 0x09, 0x12, 0x1b, 0x24, 0x2d, 0x36, 0x3f, 0x48, 0x41, 0x5a, 0x53, 0x6c, 0x65, 0x7e, 0x77, + 0x90, 0x99, 0x82, 0x8b, 0xb4, 0xbd, 0xa6, 0xaf, 0xd8, 0xd1, 0xca, 0xc3, 0xfc, 0xf5, 0xee, 0xe7, + 0x3b, 0x32, 0x29, 0x20, 0x1f, 0x16, 0x0d, 0x04, 0x73, 0x7a, 0x61, 0x68, 0x57, 0x5e, 0x45, 0x4c, + 0xab, 0xa2, 0xb9, 0xb0, 0x8f, 0x86, 0x9d, 0x94, 0xe3, 0xea, 0xf1, 0xf8, 0xc7, 0xce, 0xd5, 0xdc, + 0x76, 0x7f, 0x64, 0x6d, 0x52, 0x5b, 0x40, 0x49, 0x3e, 0x37, 0x2c, 0x25, 0x1a, 0x13, 0x08, 0x01, + 0xe6, 0xef, 0xf4, 0xfd, 0xc2, 0xcb, 0xd0, 0xd9, 0xae, 0xa7, 0xbc, 0xb5, 0x8a, 0x83, 0x98, 0x91, + 0x4d, 0x44, 0x5f, 0x56, 0x69, 0x60, 0x7b, 0x72, 0x05, 0x0c, 0x17, 0x1e, 0x21, 0x28, 0x33, 0x3a, + 0xdd, 0xd4, 0xcf, 0xc6, 0xf9, 0xf0, 0xeb, 0xe2, 0x95, 0x9c, 0x87, 0x8e, 0xb1, 0xb8, 0xa3, 0xaa, + 0xec, 0xe5, 0xfe, 0xf7, 0xc8, 0xc1, 0xda, 0xd3, 0xa4, 0xad, 0xb6, 0xbf, 0x80, 0x89, 0x92, 0x9b, + 0x7c, 0x75, 0x6e, 0x67, 0x58, 0x51, 0x4a, 0x43, 0x34, 0x3d, 0x26, 0x2f, 0x10, 0x19, 0x02, 0x0b, + 0xd7, 0xde, 0xc5, 0xcc, 0xf3, 0xfa, 0xe1, 0xe8, 0x9f, 0x96, 0x8d, 0x84, 0xbb, 0xb2, 0xa9, 0xa0, + 0x47, 0x4e, 0x55, 0x5c, 0x63, 0x6a, 0x71, 0x78, 0x0f, 0x06, 0x1d, 0x14, 0x2b, 0x22, 0x39, 0x30, + 0x9a, 0x93, 0x88, 0x81, 0xbe, 0xb7, 0xac, 0xa5, 0xd2, 0xdb, 0xc0, 0xc9, 0xf6, 0xff, 0xe4, 0xed, + 0x0a, 0x03, 0x18, 0x11, 0x2e, 0x27, 0x3c, 0x35, 0x42, 0x4b, 0x50, 0x59, 0x66, 0x6f, 0x74, 0x7d, + 0xa1, 0xa8, 0xb3, 0xba, 0x85, 0x8c, 0x97, 0x9e, 0xe9, 0xe0, 0xfb, 0xf2, 0xcd, 0xc4, 0xdf, 0xd6, + 0x31, 0x38, 0x23, 0x2a, 0x15, 0x1c, 0x07, 0x0e, 0x79, 0x70, 0x6b, 0x62, 0x5d, 0x54, 0x4f, 0x46 + }; + + private static byte[] GFMul_0B = + { + 0x00, 0x0b, 0x16, 0x1d, 0x2c, 0x27, 0x3a, 0x31, 0x58, 0x53, 0x4e, 0x45, 0x74, 0x7f, 0x62, 0x69, + 0xb0, 0xbb, 0xa6, 0xad, 0x9c, 0x97, 0x8a, 0x81, 0xe8, 0xe3, 0xfe, 0xf5, 0xc4, 0xcf, 0xd2, 0xd9, + 0x7b, 0x70, 0x6d, 0x66, 0x57, 0x5c, 0x41, 0x4a, 0x23, 0x28, 0x35, 0x3e, 0x0f, 0x04, 0x19, 0x12, + 0xcb, 0xc0, 0xdd, 0xd6, 0xe7, 0xec, 0xf1, 0xfa, 0x93, 0x98, 0x85, 0x8e, 0xbf, 0xb4, 0xa9, 0xa2, + 0xf6, 0xfd, 0xe0, 0xeb, 0xda, 0xd1, 0xcc, 0xc7, 0xae, 0xa5, 0xb8, 0xb3, 0x82, 0x89, 0x94, 0x9f, + 0x46, 0x4d, 0x50, 0x5b, 0x6a, 0x61, 0x7c, 0x77, 0x1e, 0x15, 0x08, 0x03, 0x32, 0x39, 0x24, 0x2f, + 0x8d, 0x86, 0x9b, 0x90, 0xa1, 0xaa, 0xb7, 0xbc, 0xd5, 0xde, 0xc3, 0xc8, 0xf9, 0xf2, 0xef, 0xe4, + 0x3d, 0x36, 0x2b, 0x20, 0x11, 0x1a, 0x07, 0x0c, 0x65, 0x6e, 0x73, 0x78, 0x49, 0x42, 0x5f, 0x54, + 0xf7, 0xfc, 0xe1, 0xea, 0xdb, 0xd0, 0xcd, 0xc6, 0xaf, 0xa4, 0xb9, 0xb2, 0x83, 0x88, 0x95, 0x9e, + 0x47, 0x4c, 0x51, 0x5a, 0x6b, 0x60, 0x7d, 0x76, 0x1f, 0x14, 0x09, 0x02, 0x33, 0x38, 0x25, 0x2e, + 0x8c, 0x87, 0x9a, 0x91, 0xa0, 0xab, 0xb6, 0xbd, 0xd4, 0xdf, 0xc2, 0xc9, 0xf8, 0xf3, 0xee, 0xe5, + 0x3c, 0x37, 0x2a, 0x21, 0x10, 0x1b, 0x06, 0x0d, 0x64, 0x6f, 0x72, 0x79, 0x48, 0x43, 0x5e, 0x55, + 0x01, 0x0a, 0x17, 0x1c, 0x2d, 0x26, 0x3b, 0x30, 0x59, 0x52, 0x4f, 0x44, 0x75, 0x7e, 0x63, 0x68, + 0xb1, 0xba, 0xa7, 0xac, 0x9d, 0x96, 0x8b, 0x80, 0xe9, 0xe2, 0xff, 0xf4, 0xc5, 0xce, 0xd3, 0xd8, + 0x7a, 0x71, 0x6c, 0x67, 0x56, 0x5d, 0x40, 0x4b, 0x22, 0x29, 0x34, 0x3f, 0x0e, 0x05, 0x18, 0x13, + 0xca, 0xc1, 0xdc, 0xd7, 0xe6, 0xed, 0xf0, 0xfb, 0x92, 0x99, 0x84, 0x8f, 0xbe, 0xb5, 0xa8, 0xa3 + }; + + private static byte[] GFMul_0D = + { + 0x00, 0x0d, 0x1a, 0x17, 0x34, 0x39, 0x2e, 0x23, 0x68, 0x65, 0x72, 0x7f, 0x5c, 0x51, 0x46, 0x4b, + 0xd0, 0xdd, 0xca, 0xc7, 0xe4, 0xe9, 0xfe, 0xf3, 0xb8, 0xb5, 0xa2, 0xaf, 0x8c, 0x81, 0x96, 0x9b, + 0xbb, 0xb6, 0xa1, 0xac, 0x8f, 0x82, 0x95, 0x98, 0xd3, 0xde, 0xc9, 0xc4, 0xe7, 0xea, 0xfd, 0xf0, + 0x6b, 0x66, 0x71, 0x7c, 0x5f, 0x52, 0x45, 0x48, 0x03, 0x0e, 0x19, 0x14, 0x37, 0x3a, 0x2d, 0x20, + 0x6d, 0x60, 0x77, 0x7a, 0x59, 0x54, 0x43, 0x4e, 0x05, 0x08, 0x1f, 0x12, 0x31, 0x3c, 0x2b, 0x26, + 0xbd, 0xb0, 0xa7, 0xaa, 0x89, 0x84, 0x93, 0x9e, 0xd5, 0xd8, 0xcf, 0xc2, 0xe1, 0xec, 0xfb, 0xf6, + 0xd6, 0xdb, 0xcc, 0xc1, 0xe2, 0xef, 0xf8, 0xf5, 0xbe, 0xb3, 0xa4, 0xa9, 0x8a, 0x87, 0x90, 0x9d, + 0x06, 0x0b, 0x1c, 0x11, 0x32, 0x3f, 0x28, 0x25, 0x6e, 0x63, 0x74, 0x79, 0x5a, 0x57, 0x40, 0x4d, + 0xda, 0xd7, 0xc0, 0xcd, 0xee, 0xe3, 0xf4, 0xf9, 0xb2, 0xbf, 0xa8, 0xa5, 0x86, 0x8b, 0x9c, 0x91, + 0x0a, 0x07, 0x10, 0x1d, 0x3e, 0x33, 0x24, 0x29, 0x62, 0x6f, 0x78, 0x75, 0x56, 0x5b, 0x4c, 0x41, + 0x61, 0x6c, 0x7b, 0x76, 0x55, 0x58, 0x4f, 0x42, 0x09, 0x04, 0x13, 0x1e, 0x3d, 0x30, 0x27, 0x2a, + 0xb1, 0xbc, 0xab, 0xa6, 0x85, 0x88, 0x9f, 0x92, 0xd9, 0xd4, 0xc3, 0xce, 0xed, 0xe0, 0xf7, 0xfa, + 0xb7, 0xba, 0xad, 0xa0, 0x83, 0x8e, 0x99, 0x94, 0xdf, 0xd2, 0xc5, 0xc8, 0xeb, 0xe6, 0xf1, 0xfc, + 0x67, 0x6a, 0x7d, 0x70, 0x53, 0x5e, 0x49, 0x44, 0x0f, 0x02, 0x15, 0x18, 0x3b, 0x36, 0x21, 0x2c, + 0x0c, 0x01, 0x16, 0x1b, 0x38, 0x35, 0x22, 0x2f, 0x64, 0x69, 0x7e, 0x73, 0x50, 0x5d, 0x4a, 0x47, + 0xdc, 0xd1, 0xc6, 0xcb, 0xe8, 0xe5, 0xf2, 0xff, 0xb4, 0xb9, 0xae, 0xa3, 0x80, 0x8d, 0x9a, 0x97 + }; + + private static byte[] GFMul_0E = + { + 0x00, 0x0e, 0x1c, 0x12, 0x38, 0x36, 0x24, 0x2a, 0x70, 0x7e, 0x6c, 0x62, 0x48, 0x46, 0x54, 0x5a, + 0xe0, 0xee, 0xfc, 0xf2, 0xd8, 0xd6, 0xc4, 0xca, 0x90, 0x9e, 0x8c, 0x82, 0xa8, 0xa6, 0xb4, 0xba, + 0xdb, 0xd5, 0xc7, 0xc9, 0xe3, 0xed, 0xff, 0xf1, 0xab, 0xa5, 0xb7, 0xb9, 0x93, 0x9d, 0x8f, 0x81, + 0x3b, 0x35, 0x27, 0x29, 0x03, 0x0d, 0x1f, 0x11, 0x4b, 0x45, 0x57, 0x59, 0x73, 0x7d, 0x6f, 0x61, + 0xad, 0xa3, 0xb1, 0xbf, 0x95, 0x9b, 0x89, 0x87, 0xdd, 0xd3, 0xc1, 0xcf, 0xe5, 0xeb, 0xf9, 0xf7, + 0x4d, 0x43, 0x51, 0x5f, 0x75, 0x7b, 0x69, 0x67, 0x3d, 0x33, 0x21, 0x2f, 0x05, 0x0b, 0x19, 0x17, + 0x76, 0x78, 0x6a, 0x64, 0x4e, 0x40, 0x52, 0x5c, 0x06, 0x08, 0x1a, 0x14, 0x3e, 0x30, 0x22, 0x2c, + 0x96, 0x98, 0x8a, 0x84, 0xae, 0xa0, 0xb2, 0xbc, 0xe6, 0xe8, 0xfa, 0xf4, 0xde, 0xd0, 0xc2, 0xcc, + 0x41, 0x4f, 0x5d, 0x53, 0x79, 0x77, 0x65, 0x6b, 0x31, 0x3f, 0x2d, 0x23, 0x09, 0x07, 0x15, 0x1b, + 0xa1, 0xaf, 0xbd, 0xb3, 0x99, 0x97, 0x85, 0x8b, 0xd1, 0xdf, 0xcd, 0xc3, 0xe9, 0xe7, 0xf5, 0xfb, + 0x9a, 0x94, 0x86, 0x88, 0xa2, 0xac, 0xbe, 0xb0, 0xea, 0xe4, 0xf6, 0xf8, 0xd2, 0xdc, 0xce, 0xc0, + 0x7a, 0x74, 0x66, 0x68, 0x42, 0x4c, 0x5e, 0x50, 0x0a, 0x04, 0x16, 0x18, 0x32, 0x3c, 0x2e, 0x20, + 0xec, 0xe2, 0xf0, 0xfe, 0xd4, 0xda, 0xc8, 0xc6, 0x9c, 0x92, 0x80, 0x8e, 0xa4, 0xaa, 0xb8, 0xb6, + 0x0c, 0x02, 0x10, 0x1e, 0x34, 0x3a, 0x28, 0x26, 0x7c, 0x72, 0x60, 0x6e, 0x44, 0x4a, 0x58, 0x56, + 0x37, 0x39, 0x2b, 0x25, 0x0f, 0x01, 0x13, 0x1d, 0x47, 0x49, 0x5b, 0x55, 0x7f, 0x71, 0x63, 0x6d, + 0xd7, 0xd9, 0xcb, 0xc5, 0xef, 0xe1, 0xf3, 0xfd, 0xa7, 0xa9, 0xbb, 0xb5, 0x9f, 0x91, 0x83, 0x8d + }; + + private static byte[] SRPerm = { 0, 13, 10, 7, 4, 1, 14, 11, 8, 5, 2, 15, 12, 9, 6, 3 }; + + private static byte[] ISRPerm = { 0, 5, 10, 15, 4, 9, 14, 3, 8, 13, 2, 7, 12, 1, 6, 11 }; +#endregion + + public static Vector128<float> AESInvMixColumns(Vector128<float> op) + { + byte[] InState = new byte[16]; + byte[] OutState = new byte[16]; + + FromVectorToByteArray(InState, ref op); + + for (int Columns = 0; Columns <= 3; Columns++) + { + int Idx = Columns << 2; + + byte Row0 = InState[Idx + 0]; // A, E, I, M: [Row0, Col0-Col3] + byte Row1 = InState[Idx + 1]; // B, F, J, N: [Row1, Col0-Col3] + byte Row2 = InState[Idx + 2]; // C, G, K, O: [Row2, Col0-Col3] + byte Row3 = InState[Idx + 3]; // D, H, L, P: [Row3, Col0-Col3] + + OutState[Idx + 0] = (byte)((uint)GFMul_0E[Row0] ^ GFMul_0B[Row1] ^ GFMul_0D[Row2] ^ GFMul_09[Row3]); + OutState[Idx + 1] = (byte)((uint)GFMul_09[Row0] ^ GFMul_0E[Row1] ^ GFMul_0B[Row2] ^ GFMul_0D[Row3]); + OutState[Idx + 2] = (byte)((uint)GFMul_0D[Row0] ^ GFMul_09[Row1] ^ GFMul_0E[Row2] ^ GFMul_0B[Row3]); + OutState[Idx + 3] = (byte)((uint)GFMul_0B[Row0] ^ GFMul_0D[Row1] ^ GFMul_09[Row2] ^ GFMul_0E[Row3]); + } + + FromByteArrayToVector(OutState, ref op); + + return op; + } + + public static Vector128<float> AESInvShiftRows(Vector128<float> op) + { + byte[] InState = new byte[16]; + byte[] OutState = new byte[16]; + + FromVectorToByteArray(InState, ref op); + + for (int Idx = 0; Idx <= 15; Idx++) + { + OutState[ISRPerm[Idx]] = InState[Idx]; + } + + FromByteArrayToVector(OutState, ref op); + + return op; + } + + public static Vector128<float> AESInvSubBytes(Vector128<float> op) + { + byte[] InState = new byte[16]; + byte[] OutState = new byte[16]; + + FromVectorToByteArray(InState, ref op); + + for (int Idx = 0; Idx <= 15; Idx++) + { + OutState[Idx] = InvSBox[InState[Idx]]; + } + + FromByteArrayToVector(OutState, ref op); + + return op; + } + + public static Vector128<float> AESMixColumns(Vector128<float> op) + { + byte[] InState = new byte[16]; + byte[] OutState = new byte[16]; + + FromVectorToByteArray(InState, ref op); + + for (int Columns = 0; Columns <= 3; Columns++) + { + int Idx = Columns << 2; + + byte Row0 = InState[Idx + 0]; // A, E, I, M: [Row0, Col0-Col3] + byte Row1 = InState[Idx + 1]; // B, F, J, N: [Row1, Col0-Col3] + byte Row2 = InState[Idx + 2]; // C, G, K, O: [Row2, Col0-Col3] + byte Row3 = InState[Idx + 3]; // D, H, L, P: [Row3, Col0-Col3] + + OutState[Idx + 0] = (byte)((uint)GFMul_02[Row0] ^ GFMul_03[Row1] ^ Row2 ^ Row3); + OutState[Idx + 1] = (byte)((uint)Row0 ^ GFMul_02[Row1] ^ GFMul_03[Row2] ^ Row3); + OutState[Idx + 2] = (byte)((uint)Row0 ^ Row1 ^ GFMul_02[Row2] ^ GFMul_03[Row3]); + OutState[Idx + 3] = (byte)((uint)GFMul_03[Row0] ^ Row1 ^ Row2 ^ GFMul_02[Row3]); + } + + FromByteArrayToVector(OutState, ref op); + + return op; + } + + public static Vector128<float> AESShiftRows(Vector128<float> op) + { + byte[] InState = new byte[16]; + byte[] OutState = new byte[16]; + + FromVectorToByteArray(InState, ref op); + + for (int Idx = 0; Idx <= 15; Idx++) + { + OutState[SRPerm[Idx]] = InState[Idx]; + } + + FromByteArrayToVector(OutState, ref op); + + return op; + } + + public static Vector128<float> AESSubBytes(Vector128<float> op) + { + byte[] InState = new byte[16]; + byte[] OutState = new byte[16]; + + FromVectorToByteArray(InState, ref op); + + for (int Idx = 0; Idx <= 15; Idx++) + { + OutState[Idx] = SBox[InState[Idx]]; + } + + FromByteArrayToVector(OutState, ref op); + + return op; + } + + private static void FromVectorToByteArray(byte[] State, ref Vector128<float> op) + { + ulong ULongLow = AVectorHelper.VectorExtractIntZx((op), (byte)0, 3); + ulong ULongHigh = AVectorHelper.VectorExtractIntZx((op), (byte)1, 3); + + for (int Idx = 0; Idx <= 7; Idx++) + { + State[Idx + 0] = (byte)(ULongLow & 0xFFUL); + State[Idx + 8] = (byte)(ULongHigh & 0xFFUL); + + ULongLow >>= 8; + ULongHigh >>= 8; + } + } + + private static void FromByteArrayToVector(byte[] State, ref Vector128<float> op) + { + if (!Sse2.IsSupported) + { + throw new PlatformNotSupportedException(); + } + + op = Sse.StaticCast<byte, float>(Sse2.SetVector128( + State[15], State[14], State[13], State[12], + State[11], State[10], State[9], State[8], + State[7], State[6], State[5], State[4], + State[3], State[2], State[1], State[0])); + } + } +} diff --git a/ChocolArm64/Instruction/AInstEmitSimdCrypto.cs b/ChocolArm64/Instruction/AInstEmitSimdCrypto.cs new file mode 100644 index 00000000..b2680a58 --- /dev/null +++ b/ChocolArm64/Instruction/AInstEmitSimdCrypto.cs @@ -0,0 +1,54 @@ +using ChocolArm64.Decoder; +using ChocolArm64.Translation; + +namespace ChocolArm64.Instruction +{ + static partial class AInstEmit + { + public static void Aesd_V(AILEmitterCtx Context) + { + AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; + + Context.EmitLdvec(Op.Rd); + Context.EmitLdvec(Op.Rn); + + ASoftFallback.EmitCall(Context, nameof(ASoftFallback.Decrypt)); + + Context.EmitStvec(Op.Rd); + } + + public static void Aese_V(AILEmitterCtx Context) + { + AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; + + Context.EmitLdvec(Op.Rd); + Context.EmitLdvec(Op.Rn); + + ASoftFallback.EmitCall(Context, nameof(ASoftFallback.Encrypt)); + + Context.EmitStvec(Op.Rd); + } + + public static void Aesimc_V(AILEmitterCtx Context) + { + AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; + + Context.EmitLdvec(Op.Rn); + + ASoftFallback.EmitCall(Context, nameof(ASoftFallback.InverseMixColumns)); + + Context.EmitStvec(Op.Rd); + } + + public static void Aesmc_V(AILEmitterCtx Context) + { + AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; + + Context.EmitLdvec(Op.Rn); + + ASoftFallback.EmitCall(Context, nameof(ASoftFallback.MixColumns)); + + Context.EmitStvec(Op.Rd); + } + } +} diff --git a/ChocolArm64/Instruction/ASoftFallback.cs b/ChocolArm64/Instruction/ASoftFallback.cs index 0c8a39a4..0ae84ab2 100644 --- a/ChocolArm64/Instruction/ASoftFallback.cs +++ b/ChocolArm64/Instruction/ASoftFallback.cs @@ -410,6 +410,42 @@ namespace ChocolArm64.Instruction } #endregion +#region "Aes" + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128<float> Decrypt(Vector128<float> value, Vector128<float> roundKey) + { + if (!Sse.IsSupported) + { + throw new PlatformNotSupportedException(); + } + + return ACryptoHelper.AESInvSubBytes(ACryptoHelper.AESInvShiftRows(Sse.Xor(value, roundKey))); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128<float> Encrypt(Vector128<float> value, Vector128<float> roundKey) + { + if (!Sse.IsSupported) + { + throw new PlatformNotSupportedException(); + } + + return ACryptoHelper.AESSubBytes(ACryptoHelper.AESShiftRows(Sse.Xor(value, roundKey))); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128<float> InverseMixColumns(Vector128<float> value) + { + return ACryptoHelper.AESInvMixColumns(value); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128<float> MixColumns(Vector128<float> value) + { + return ACryptoHelper.AESMixColumns(value); + } +#endregion + #region "Sha256" [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector128<float> HashLower(Vector128<float> hash_abcd, Vector128<float> hash_efgh, Vector128<float> wk) diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdCrypto.cs b/Ryujinx.Tests/Cpu/CpuTestSimdCrypto.cs new file mode 100644 index 00000000..e4693733 --- /dev/null +++ b/Ryujinx.Tests/Cpu/CpuTestSimdCrypto.cs @@ -0,0 +1,135 @@ +// https://www.intel.com/content/dam/doc/white-paper/advanced-encryption-standard-new-instructions-set-paper.pdf + +using ChocolArm64.State; + +using NUnit.Framework; + +using System.Runtime.Intrinsics; + +namespace Ryujinx.Tests.Cpu +{ + public class CpuTestSimdCrypto : CpuTest + { + [Test, Explicit, Description("AESD <Vd>.16B, <Vn>.16B")] + public void Aesd_V([Values(0u)] uint Rd, + [Values(1u)] uint Rn, + [Values(0x7B5B546573745665ul)] ulong ValueH, + [Values(0x63746F725D53475Dul)] ulong ValueL, + [Random(2)] ulong RoundKeyH, + [Random(2)] ulong RoundKeyL, + [Values(0x8DCAB9BC035006BCul)] ulong ResultH, + [Values(0x8F57161E00CAFD8Dul)] ulong ResultL) + { + uint Opcode = 0x4E285800; // AESD V0.16B, V0.16B + Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0); + Vector128<float> V0 = MakeVectorE0E1(RoundKeyL ^ ValueL, RoundKeyH ^ ValueH); + Vector128<float> V1 = MakeVectorE0E1(RoundKeyL, RoundKeyH); + + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1); + + Assert.Multiple(() => + { + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(ResultL)); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(ResultH)); + }); + Assert.Multiple(() => + { + Assert.That(GetVectorE0(ThreadState.V1), Is.EqualTo(RoundKeyL)); + Assert.That(GetVectorE1(ThreadState.V1), Is.EqualTo(RoundKeyH)); + }); + } + + [Test, Explicit, Description("AESE <Vd>.16B, <Vn>.16B")] + public void Aese_V([Values(0u)] uint Rd, + [Values(1u)] uint Rn, + [Values(0x7B5B546573745665ul)] ulong ValueH, + [Values(0x63746F725D53475Dul)] ulong ValueL, + [Random(2)] ulong RoundKeyH, + [Random(2)] ulong RoundKeyL, + [Values(0x8F92A04DFBED204Dul)] ulong ResultH, + [Values(0x4C39B1402192A84Cul)] ulong ResultL) + { + uint Opcode = 0x4E284800; // AESE V0.16B, V0.16B + Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0); + Vector128<float> V0 = MakeVectorE0E1(RoundKeyL ^ ValueL, RoundKeyH ^ ValueH); + Vector128<float> V1 = MakeVectorE0E1(RoundKeyL, RoundKeyH); + + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1); + + Assert.Multiple(() => + { + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(ResultL)); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(ResultH)); + }); + Assert.Multiple(() => + { + Assert.That(GetVectorE0(ThreadState.V1), Is.EqualTo(RoundKeyL)); + Assert.That(GetVectorE1(ThreadState.V1), Is.EqualTo(RoundKeyH)); + }); + } + + [Test, Explicit, Description("AESIMC <Vd>.16B, <Vn>.16B")] + public void Aesimc_V([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(0x8DCAB9DC035006BCul)] ulong ValueH, + [Values(0x8F57161E00CAFD8Dul)] ulong ValueL, + [Values(0xD635A667928B5EAEul)] ulong ResultH, + [Values(0xEEC9CC3BC55F5777ul)] ulong ResultL) + { + uint Opcode = 0x4E287800; // AESIMC V0.16B, V0.16B + Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0); + Vector128<float> V = MakeVectorE0E1(ValueL, ValueH); + + AThreadState ThreadState = SingleOpcode( + Opcode, + V0: Rn == 0u ? V : default(Vector128<float>), + V1: Rn == 1u ? V : default(Vector128<float>)); + + Assert.Multiple(() => + { + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(ResultL)); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(ResultH)); + }); + if (Rn == 1u) + { + Assert.Multiple(() => + { + Assert.That(GetVectorE0(ThreadState.V1), Is.EqualTo(ValueL)); + Assert.That(GetVectorE1(ThreadState.V1), Is.EqualTo(ValueH)); + }); + } + } + + [Test, Explicit, Description("AESMC <Vd>.16B, <Vn>.16B")] + public void Aesmc_V([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(0x627A6F6644B109C8ul)] ulong ValueH, + [Values(0x2B18330A81C3B3E5ul)] ulong ValueL, + [Values(0x7B5B546573745665ul)] ulong ResultH, + [Values(0x63746F725D53475Dul)] ulong ResultL) + { + uint Opcode = 0x4E286800; // AESMC V0.16B, V0.16B + Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0); + Vector128<float> V = MakeVectorE0E1(ValueL, ValueH); + + AThreadState ThreadState = SingleOpcode( + Opcode, + V0: Rn == 0u ? V : default(Vector128<float>), + V1: Rn == 1u ? V : default(Vector128<float>)); + + Assert.Multiple(() => + { + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(ResultL)); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(ResultH)); + }); + if (Rn == 1u) + { + Assert.Multiple(() => + { + Assert.That(GetVectorE0(ThreadState.V1), Is.EqualTo(ValueL)); + Assert.That(GetVectorE1(ThreadState.V1), Is.EqualTo(ValueH)); + }); + } + } + } +} |