aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/aes128.asm129
-rw-r--r--src/aes192.asm196
-rw-r--r--src/aes256cbc.asm183
-rw-r--r--src/aes256ecb.asm181
-rw-r--r--src/common.c38
5 files changed, 727 insertions, 0 deletions
diff --git a/src/aes128.asm b/src/aes128.asm
new file mode 100644
index 0000000..dfa7a7d
--- /dev/null
+++ b/src/aes128.asm
@@ -0,0 +1,129 @@
+; Copyright 2015 Egor Tensin <Egor.Tensin@gmail.com>
+; This file is licensed under the terms of the MIT License.
+; See LICENSE.txt for details.
+
+.586
+.xmm
+.model flat
+
+.data
+
+align 10h
+key_schedule oword 11 dup(0)
+
+align 10h
+inverted_key_schedule oword 11 dup(0)
+
+.code
+
+@aes128ecb_encrypt@32 proc
+ call expand_keys_128ecb
+ pxor xmm0, [key_schedule]
+ aesenc xmm0, [key_schedule + 10h]
+ aesenc xmm0, [key_schedule + 20h]
+ aesenc xmm0, [key_schedule + 30h]
+ aesenc xmm0, [key_schedule + 40h]
+ aesenc xmm0, [key_schedule + 50h]
+ aesenc xmm0, [key_schedule + 60h]
+ aesenc xmm0, [key_schedule + 70h]
+ aesenc xmm0, [key_schedule + 80h]
+ aesenc xmm0, [key_schedule + 90h]
+ aesenclast xmm0, [key_schedule + 0A0h]
+ ret
+@aes128ecb_encrypt@32 endp
+
+@aes128ecb_decrypt@32 proc
+ call expand_keys_128ecb
+ pxor xmm0, [inverted_key_schedule]
+ aesdec xmm0, [inverted_key_schedule + 10h]
+ aesdec xmm0, [inverted_key_schedule + 20h]
+ aesdec xmm0, [inverted_key_schedule + 30h]
+ aesdec xmm0, [inverted_key_schedule + 40h]
+ aesdec xmm0, [inverted_key_schedule + 50h]
+ aesdec xmm0, [inverted_key_schedule + 60h]
+ aesdec xmm0, [inverted_key_schedule + 70h]
+ aesdec xmm0, [inverted_key_schedule + 80h]
+ aesdec xmm0, [inverted_key_schedule + 90h]
+ aesdeclast xmm0, [inverted_key_schedule + 0A0h]
+ ret
+@aes128ecb_decrypt@32 endp
+
+expand_keys_128ecb proc
+ lea ecx, [key_schedule + 10h]
+ movdqa [key_schedule], xmm1
+
+ aeskeygenassist xmm7, xmm1, 01h
+ call gen_round_key
+ aeskeygenassist xmm7, xmm1, 02h
+ call gen_round_key
+ aeskeygenassist xmm7, xmm1, 04h
+ call gen_round_key
+ aeskeygenassist xmm7, xmm1, 08h
+ call gen_round_key
+ aeskeygenassist xmm7, xmm1, 10h
+ call gen_round_key
+ aeskeygenassist xmm7, xmm1, 20h
+ call gen_round_key
+ aeskeygenassist xmm7, xmm1, 40h
+ call gen_round_key
+ aeskeygenassist xmm7, xmm1, 80h
+ call gen_round_key
+ aeskeygenassist xmm7, xmm1, 1Bh
+ call gen_round_key
+ aeskeygenassist xmm7, xmm1, 36h
+ call gen_round_key
+
+ call invert_key_schedule
+ ret
+
+gen_round_key:
+ movdqa xmm6, xmm1 ; xmm6 = key_schedule[i]
+ ; xmm6 = x3 x2 x1 x0
+
+ pslldq xmm6, 4 ; xmm6 = x2 x1 x0 0
+ pxor xmm1, xmm6 ; xmm1 = (x3 x2) (x2 x1) (x1 x0) x0
+ pslldq xmm6, 4 ; xmm6 = x1 x0 0 0
+ pxor xmm1, xmm6 ; xmm1 = (x3 x2 x1) (x2 x1 x0) (x1 x0) x0
+ pslldq xmm6, 4 ; xmm6 = x0 0 0 0
+ pxor xmm1, xmm6 ; xmm1 = (x3 x2 x1 x0) (x2 x1 x0) (x1 x0) x0
+
+ pshufd xmm7, xmm7, 0FFh
+ pxor xmm1, xmm7
+
+ movdqa [ecx], xmm1
+ add ecx, 10h
+ ret
+
+invert_key_schedule:
+ movdqa xmm7, [key_schedule]
+ movdqa xmm6, [key_schedule + 0A0h]
+ movdqa [inverted_key_schedule], xmm6
+ movdqa [inverted_key_schedule + 0A0h], xmm7
+
+ aesimc xmm7, [key_schedule + 10h]
+ aesimc xmm6, [key_schedule + 90h]
+ movdqa [inverted_key_schedule + 10h], xmm6
+ movdqa [inverted_key_schedule + 90h], xmm7
+
+ aesimc xmm7, [key_schedule + 20h]
+ aesimc xmm6, [key_schedule + 80h]
+ movdqa [inverted_key_schedule + 20h], xmm6
+ movdqa [inverted_key_schedule + 80h], xmm7
+
+ aesimc xmm7, [key_schedule + 30h]
+ aesimc xmm6, [key_schedule + 70h]
+ movdqa [inverted_key_schedule + 30h], xmm6
+ movdqa [inverted_key_schedule + 70h], xmm7
+
+ aesimc xmm7, [key_schedule + 40h]
+ aesimc xmm6, [key_schedule + 60h]
+ movdqa [inverted_key_schedule + 40h], xmm6
+ movdqa [inverted_key_schedule + 60h], xmm7
+
+ aesimc xmm7, [key_schedule + 50h]
+ movdqa [inverted_key_schedule + 50h], xmm7
+
+ ret
+expand_keys_128ecb endp
+
+end
diff --git a/src/aes192.asm b/src/aes192.asm
new file mode 100644
index 0000000..84d3a3a
--- /dev/null
+++ b/src/aes192.asm
@@ -0,0 +1,196 @@
+; Copyright 2015 Egor Tensin <Egor.Tensin@gmail.com>
+; This file is licensed under the terms of the MIT License.
+; See LICENSE.txt for details.
+
+.586
+.xmm
+.model flat
+
+.data
+
+align 10h
+key_schedule oword 13 dup(0)
+
+align 10h
+inverted_key_schedule oword 13 dup(0)
+
+.code
+
+@aes192ecb_encrypt@48 proc
+ call expand_keys_192ecb
+ pxor xmm0, [key_schedule]
+ aesenc xmm0, [key_schedule + 10h]
+ aesenc xmm0, [key_schedule + 20h]
+ aesenc xmm0, [key_schedule + 30h]
+ aesenc xmm0, [key_schedule + 40h]
+ aesenc xmm0, [key_schedule + 50h]
+ aesenc xmm0, [key_schedule + 60h]
+ aesenc xmm0, [key_schedule + 70h]
+ aesenc xmm0, [key_schedule + 80h]
+ aesenc xmm0, [key_schedule + 90h]
+ aesenc xmm0, [key_schedule + 0A0h]
+ aesenc xmm0, [key_schedule + 0B0h]
+ aesenclast xmm0, [key_schedule + 0C0h]
+ ret
+@aes192ecb_encrypt@48 endp
+
+@aes192ecb_decrypt@48 proc
+ call expand_keys_192ecb
+ pxor xmm0, [inverted_key_schedule]
+ aesdec xmm0, [inverted_key_schedule + 10h]
+ aesdec xmm0, [inverted_key_schedule + 20h]
+ aesdec xmm0, [inverted_key_schedule + 30h]
+ aesdec xmm0, [inverted_key_schedule + 40h]
+ aesdec xmm0, [inverted_key_schedule + 50h]
+ aesdec xmm0, [inverted_key_schedule + 60h]
+ aesdec xmm0, [inverted_key_schedule + 70h]
+ aesdec xmm0, [inverted_key_schedule + 80h]
+ aesdec xmm0, [inverted_key_schedule + 90h]
+ aesdec xmm0, [inverted_key_schedule + 0A0h]
+ aesdec xmm0, [inverted_key_schedule + 0B0h]
+ aesdeclast xmm0, [inverted_key_schedule + 0C0h]
+ ret
+@aes192ecb_decrypt@48 endp
+
+expand_keys_192ecb proc
+ ; key = k0 k1 k2 k3 k4 k5
+ ; xmm1 = k0 k1 k2 k3
+ ; xmm2 = 0 0 k5 k4
+
+ ; w[0] = k0 k1 k2 k3
+ ; w[1] = k4 k5 - -
+
+ ; i = 6
+ ; while (i < 52):
+ ; temp = w[i - 1]
+ ; if (i % 6 == 0):
+ ; temp = SubWord(RotWord(w[i - 1])) * Rcon
+ ; w[i] = w[i - 6] * temp
+ ; i = i + 1
+
+ ; w[6] = SubWord(RotWord(w[5])) * Rcon * w[0]
+ ; w[7] = w[6] * w[1]
+ ; = SubWord(RotWord(w[5])) * Rcon * w[0] * w[1]
+ ; w[8] = w[7] * w[2]
+ ; = SubWord(RotWord(w[5])) * Rcon * w[0] * w[1] * w[2]
+ ; w[9] = w[8] * w[3]
+ ; = SubWord(RotWord(w[5])) * Rcon * w[0] * w[1] * w[2] * w[3]
+ ; w[10] = w[9] * w[4]
+ ; = SubWord(RotWord(w[5])) * Rcon * w[0] * w[1] * w[2] * w[3] * w[4]
+ ; w[11] = w[10] * w[5]
+ ; = SubWord(RotWord(w[5])) * Rcon * w[0] * w[1] * w[2] * w[3] * w[4] * w[5]
+
+ movdqa [key_schedule], xmm1
+ movdqa [key_schedule + 10h], xmm2
+
+ lea ecx, [key_schedule + 18h]
+ aeskeygenassist xmm7, xmm2, 1
+ call gen_round_key
+ aeskeygenassist xmm7, xmm2, 2
+ call gen_round_key
+ aeskeygenassist xmm7, xmm2, 4
+ call gen_round_key
+ aeskeygenassist xmm7, xmm2, 8
+ call gen_round_key
+ aeskeygenassist xmm7, xmm2, 10h
+ call gen_round_key
+ aeskeygenassist xmm7, xmm2, 20h
+ call gen_round_key
+ aeskeygenassist xmm7, xmm2, 40h
+ call gen_round_key
+ aeskeygenassist xmm7, xmm2, 80h
+ call gen_round_key
+
+ call invert_key_schedule
+ ret
+
+gen_round_key:
+ ; xmm1 = x3 x2 x1 x0
+ ; xmm2 = - - x5 x4
+ ; xmm7 = RotWord(SubWord(-)) xor Rcon
+ ; SubWord(-)
+ ; RotWord(SubWord(x5)) xor Rcon
+ ; SubWord(x5)
+ movdqa xmm6, xmm1
+
+ pslldq xmm6, 4 ; xmm6 = x2 x1 x0 0
+ pxor xmm1, xmm6 ; xmm1 = (x3 * x2) (x1 * x2) (x1 * x0) x0
+ pslldq xmm6, 4 ; xmm6 = x1 x0 0 0
+ pxor xmm1, xmm6 ; xmm1 = (x3 * x2 * x1) (x1 * x2 * x0) (x1 * x0) x0
+ pslldq xmm6, 4 ; xmm6 = x0 0 0 0
+ pxor xmm1, xmm6 ; xmm1 = (x3 * x2 * x1 * x0) (x1 * x2 * x0) (x1 * x0) x0
+
+ pshufd xmm7, xmm7, 55h ; xmm7 = RotWord(SubWord(x5)) * Rcon
+ ; RotWord(SubWord(x5)) * Rcon
+ ; RotWord(SubWord(x5)) * Rcon
+ ; RotWord(SubWord(x5)) * Rcon
+
+ pxor xmm1, xmm7 ; xmm1 = RotWord(SubWord(x5)) * Rcon * x3 * x2 * x1 * x0
+ ; RotWord(SubWord(x5)) * Rcon * x2 * x1 * x0
+ ; RotWord(SubWord(x5)) * Rcon * x1 * x0
+ ; RotWord(SubWord(x5)) * Rcon * x0
+
+ movq qword ptr [ecx], xmm1
+ add ecx, 8
+
+ pshufd xmm7, xmm1, 0FFh ; xmm7 = -
+ ; -
+ ; RotWord(SubWord(x5)) * Rcon * x3 * x2 * x1 * x0
+ ; RotWord(SubWord(x5)) * Rcon * x3 * x2 * x1 * x0
+ pxor xmm7, xmm2 ; xmm7 = -
+ ; -
+ ; RotWord(SubWord(x5)) * Rcon * x5 * x3 * x2 * x1 * x0
+ ; RotWord(SubWord(x5)) * Rcon * x4 * x3 * x2 * x1 * x0
+ pslldq xmm2, 4 ; xmm2 = - k5 k4 0
+ pxor xmm7, xmm2 ; xmm7 = -
+ ; -
+ ; RotWord(SubWord(x5)) * Rcon * x5 * x4 * x3 * x2 * x1 * x0
+ ; RotWord(SubWord(x5)) * Rcon * x4 * x3 * x2 * x1 * x0
+
+ movq xmm2, xmm7
+ pslldq xmm7, 8
+ movdqa xmm6, xmm1
+ psrldq xmm6, 8
+ por xmm7, xmm6
+ movdqu [ecx], xmm7
+ add ecx, 10h
+ ret
+
+invert_key_schedule:
+ movdqa xmm7, [key_schedule]
+ movdqa xmm6, [key_schedule + 0C0h]
+ movdqa [inverted_key_schedule], xmm6
+ movdqa [inverted_key_schedule + 0C0h], xmm7
+
+ aesimc xmm7, [key_schedule + 10h]
+ aesimc xmm6, [key_schedule + 0B0h]
+ movdqa [inverted_key_schedule + 10h], xmm6
+ movdqa [inverted_key_schedule + 0B0h], xmm7
+
+ aesimc xmm7, [key_schedule + 20h]
+ aesimc xmm6, [key_schedule + 0A0h]
+ movdqa [inverted_key_schedule + 20h], xmm6
+ movdqa [inverted_key_schedule + 0A0h], xmm7
+
+ aesimc xmm7, [key_schedule + 30h]
+ aesimc xmm6, [key_schedule + 90h]
+ movdqa [inverted_key_schedule + 30h], xmm6
+ movdqa [inverted_key_schedule + 90h], xmm7
+
+ aesimc xmm7, [key_schedule + 40h]
+ aesimc xmm6, [key_schedule + 80h]
+ movdqa [inverted_key_schedule + 40h], xmm6
+ movdqa [inverted_key_schedule + 80h], xmm7
+
+ aesimc xmm7, [key_schedule + 50h]
+ aesimc xmm6, [key_schedule + 70h]
+ movdqa [inverted_key_schedule + 50h], xmm6
+ movdqa [inverted_key_schedule + 70h], xmm7
+
+ aesimc xmm7, [key_schedule + 60h]
+ movdqa [inverted_key_schedule + 60h], xmm7
+
+ ret
+expand_keys_192ecb endp
+
+end
diff --git a/src/aes256cbc.asm b/src/aes256cbc.asm
new file mode 100644
index 0000000..3446d31
--- /dev/null
+++ b/src/aes256cbc.asm
@@ -0,0 +1,183 @@
+; Copyright 2015 Egor Tensin <Egor.Tensin@gmail.com>
+; This file is licensed under the terms of the MIT License.
+; See LICENSE.txt for details.
+
+.586
+.xmm
+.model flat
+
+.data
+
+align 10h
+key_schedule oword 15 dup(0)
+
+align 10h
+inverse_key_schedule oword 15 dup(0)
+
+.code
+
+@aes256cbc_encrypt@52 proc
+ call expand_keys_256cbc
+ pxor xmm0, [ecx]
+ pxor xmm0, [key_schedule]
+ aesenc xmm0, [key_schedule + 10h]
+ aesenc xmm0, [key_schedule + 20h]
+ aesenc xmm0, [key_schedule + 30h]
+ aesenc xmm0, [key_schedule + 40h]
+ aesenc xmm0, [key_schedule + 50h]
+ aesenc xmm0, [key_schedule + 60h]
+ aesenc xmm0, [key_schedule + 70h]
+ aesenc xmm0, [key_schedule + 80h]
+ aesenc xmm0, [key_schedule + 90h]
+ aesenc xmm0, [key_schedule + 0A0h]
+ aesenc xmm0, [key_schedule + 0B0h]
+ aesenc xmm0, [key_schedule + 0C0h]
+ aesenc xmm0, [key_schedule + 0D0h]
+ aesenclast xmm0, [key_schedule + 0E0h]
+ ret
+@aes256cbc_encrypt@52 endp
+
+expand_keys_256cbc proc
+ lea edx, [key_schedule + 20h]
+ movdqa [key_schedule], xmm1
+ movdqa [key_schedule + 10h], xmm2
+
+ aeskeygenassist xmm7, xmm2, 1h
+ pshufd xmm7, xmm7, 0FFh
+ call gen_round_key
+
+ aeskeygenassist xmm7, xmm2, 0
+ pshufd xmm7, xmm7, 0AAh
+ call gen_round_key
+
+ aeskeygenassist xmm7, xmm2, 2h
+ pshufd xmm7, xmm7, 0FFh
+ call gen_round_key
+
+ aeskeygenassist xmm7, xmm2, 0
+ pshufd xmm7, xmm7, 0AAh
+ call gen_round_key
+
+ aeskeygenassist xmm7, xmm2, 4h
+ pshufd xmm7, xmm7, 0FFh
+ call gen_round_key
+
+ aeskeygenassist xmm7, xmm2, 0
+ pshufd xmm7, xmm7, 0AAh
+ call gen_round_key
+
+ aeskeygenassist xmm7, xmm2, 8h
+ pshufd xmm7, xmm7, 0FFh
+ call gen_round_key
+
+ aeskeygenassist xmm7, xmm2, 0
+ pshufd xmm7, xmm7, 0AAh
+ call gen_round_key
+
+ aeskeygenassist xmm7, xmm2, 10h
+ pshufd xmm7, xmm7, 0FFh
+ call gen_round_key
+
+ aeskeygenassist xmm7, xmm2, 0
+ pshufd xmm7, xmm7, 0AAh
+ call gen_round_key
+
+ aeskeygenassist xmm7, xmm2, 20h
+ pshufd xmm7, xmm7, 0FFh
+ call gen_round_key
+
+ aeskeygenassist xmm7, xmm2, 0
+ pshufd xmm7, xmm7, 0AAh
+ call gen_round_key
+
+ aeskeygenassist xmm7, xmm2, 40h
+ pshufd xmm7, xmm7, 0FFh
+ call gen_round_key
+
+ call invert_key_schedule
+ ret
+
+gen_round_key:
+ movdqa xmm6, xmm1
+
+ pslldq xmm6, 4
+ pxor xmm1, xmm6
+ pslldq xmm6, 4
+ pxor xmm1, xmm6
+ pslldq xmm6, 4
+ pxor xmm1, xmm6
+
+ pxor xmm1, xmm7
+
+ movdqa [edx], xmm1
+ add edx, 10h
+
+ movdqa xmm7, xmm1
+ movdqa xmm1, xmm2
+ movdqa xmm2, xmm7
+ ret
+
+invert_key_schedule:
+ movdqa xmm7, [key_schedule]
+ movdqa xmm6, [key_schedule + 0E0h]
+ movdqa [inverse_key_schedule], xmm6
+ movdqa [inverse_key_schedule + 0E0h], xmm7
+
+ aesimc xmm7, [key_schedule + 10h]
+ aesimc xmm6, [key_schedule + 0D0h]
+ movdqa [inverse_key_schedule + 10h], xmm6
+ movdqa [inverse_key_schedule + 0D0h], xmm7
+
+ aesimc xmm7, [key_schedule + 20h]
+ aesimc xmm6, [key_schedule + 0C0h]
+ movdqa [inverse_key_schedule + 20h], xmm6
+ movdqa [inverse_key_schedule + 0C0h], xmm7
+
+ aesimc xmm7, [key_schedule + 30h]
+ aesimc xmm6, [key_schedule + 0B0h]
+ movdqa [inverse_key_schedule + 30h], xmm6
+ movdqa [inverse_key_schedule + 0B0h], xmm7
+
+ aesimc xmm7, [key_schedule + 40h]
+ aesimc xmm6, [key_schedule + 0A0h]
+ movdqa [inverse_key_schedule + 40h], xmm6
+ movdqa [inverse_key_schedule + 0A0h], xmm7
+
+ aesimc xmm7, [key_schedule + 50h]
+ aesimc xmm6, [key_schedule + 90h]
+ movdqa [inverse_key_schedule + 50h], xmm6
+ movdqa [inverse_key_schedule + 90h], xmm7
+
+ aesimc xmm7, [key_schedule + 60h]
+ aesimc xmm6, [key_schedule + 80h]
+ movdqa [inverse_key_schedule + 60h], xmm6
+ movdqa [inverse_key_schedule + 80h], xmm7
+
+ aesimc xmm7, [key_schedule + 70h]
+ movdqa [inverse_key_schedule + 70h], xmm7
+
+ ret
+expand_keys_256cbc endp
+
+@aes256cbc_decrypt@52 proc
+ call expand_keys_256cbc
+ pxor xmm0, [inverse_key_schedule]
+ aesdec xmm0, [inverse_key_schedule + 10h]
+ aesdec xmm0, [inverse_key_schedule + 20h]
+ aesdec xmm0, [inverse_key_schedule + 30h]
+ aesdec xmm0, [inverse_key_schedule + 40h]
+ aesdec xmm0, [inverse_key_schedule + 50h]
+ aesdec xmm0, [inverse_key_schedule + 60h]
+ aesdec xmm0, [inverse_key_schedule + 70h]
+ aesdec xmm0, [inverse_key_schedule + 80h]
+ aesdec xmm0, [inverse_key_schedule + 90h]
+ aesdec xmm0, [inverse_key_schedule + 0A0h]
+ aesdec xmm0, [inverse_key_schedule + 0B0h]
+ aesdec xmm0, [inverse_key_schedule + 0C0h]
+ aesdec xmm0, [inverse_key_schedule + 0D0h]
+ aesdeclast xmm0, [inverse_key_schedule + 0E0h]
+ pxor xmm0, [ecx]
+ ret
+@aes256cbc_decrypt@52 endp
+
+end
diff --git a/src/aes256ecb.asm b/src/aes256ecb.asm
new file mode 100644
index 0000000..4246dee
--- /dev/null
+++ b/src/aes256ecb.asm
@@ -0,0 +1,181 @@
+; Copyright 2015 Egor Tensin <Egor.Tensin@gmail.com>
+; This file is licensed under the terms of the MIT License.
+; See LICENSE.txt for details.
+
+.586
+.xmm
+.model flat
+
+.data
+
+align 10h
+key_schedule oword 15 dup(0)
+
+align 10h
+inverse_key_schedule oword 15 dup(0)
+
+.code
+
+@aes256ecb_encrypt@48 proc
+ call expand_keys_256ecb
+ pxor xmm0, [key_schedule]
+ aesenc xmm0, [key_schedule + 10h]
+ aesenc xmm0, [key_schedule + 20h]
+ aesenc xmm0, [key_schedule + 30h]
+ aesenc xmm0, [key_schedule + 40h]
+ aesenc xmm0, [key_schedule + 50h]
+ aesenc xmm0, [key_schedule + 60h]
+ aesenc xmm0, [key_schedule + 70h]
+ aesenc xmm0, [key_schedule + 80h]
+ aesenc xmm0, [key_schedule + 90h]
+ aesenc xmm0, [key_schedule + 0A0h]
+ aesenc xmm0, [key_schedule + 0B0h]
+ aesenc xmm0, [key_schedule + 0C0h]
+ aesenc xmm0, [key_schedule + 0D0h]
+ aesenclast xmm0, [key_schedule + 0E0h]
+ ret
+@aes256ecb_encrypt@48 endp
+
+expand_keys_256ecb proc
+ lea edx, [key_schedule + 20h]
+ movdqa [key_schedule], xmm1
+ movdqa [key_schedule + 10h], xmm2
+
+ aeskeygenassist xmm7, xmm2, 1h
+ pshufd xmm7, xmm7, 0FFh
+ call gen_round_key
+
+ aeskeygenassist xmm7, xmm2, 0
+ pshufd xmm7, xmm7, 0AAh
+ call gen_round_key
+
+ aeskeygenassist xmm7, xmm2, 2h
+ pshufd xmm7, xmm7, 0FFh
+ call gen_round_key
+
+ aeskeygenassist xmm7, xmm2, 0
+ pshufd xmm7, xmm7, 0AAh
+ call gen_round_key
+
+ aeskeygenassist xmm7, xmm2, 4h
+ pshufd xmm7, xmm7, 0FFh
+ call gen_round_key
+
+ aeskeygenassist xmm7, xmm2, 0
+ pshufd xmm7, xmm7, 0AAh
+ call gen_round_key
+
+ aeskeygenassist xmm7, xmm2, 8h
+ pshufd xmm7, xmm7, 0FFh
+ call gen_round_key
+
+ aeskeygenassist xmm7, xmm2, 0
+ pshufd xmm7, xmm7, 0AAh
+ call gen_round_key
+
+ aeskeygenassist xmm7, xmm2, 10h
+ pshufd xmm7, xmm7, 0FFh
+ call gen_round_key
+
+ aeskeygenassist xmm7, xmm2, 0
+ pshufd xmm7, xmm7, 0AAh
+ call gen_round_key
+
+ aeskeygenassist xmm7, xmm2, 20h
+ pshufd xmm7, xmm7, 0FFh
+ call gen_round_key
+
+ aeskeygenassist xmm7, xmm2, 0
+ pshufd xmm7, xmm7, 0AAh
+ call gen_round_key
+
+ aeskeygenassist xmm7, xmm2, 40h
+ pshufd xmm7, xmm7, 0FFh
+ call gen_round_key
+
+ call invert_key_schedule
+ ret
+
+gen_round_key:
+ movdqa xmm6, xmm1
+
+ pslldq xmm6, 4
+ pxor xmm1, xmm6
+ pslldq xmm6, 4
+ pxor xmm1, xmm6
+ pslldq xmm6, 4
+ pxor xmm1, xmm6
+
+ pxor xmm1, xmm7
+
+ movdqa [edx], xmm1
+ add edx, 10h
+
+ movdqa xmm7, xmm1
+ movdqa xmm1, xmm2
+ movdqa xmm2, xmm7
+ ret
+
+invert_key_schedule:
+ movdqa xmm7, [key_schedule ]
+ movdqa xmm6, [key_schedule + 0E0h]
+ movdqa [inverse_key_schedule ], xmm6
+ movdqa [inverse_key_schedule + 0E0h], xmm7
+
+ aesimc xmm7, [key_schedule + 10h]
+ aesimc xmm6, [key_schedule + 0D0h]
+ movdqa [inverse_key_schedule + 10h], xmm6
+ movdqa [inverse_key_schedule + 0D0h], xmm7
+
+ aesimc xmm7, [key_schedule + 20h]
+ aesimc xmm6, [key_schedule + 0C0h]
+ movdqa [inverse_key_schedule + 20h], xmm6
+ movdqa [inverse_key_schedule + 0C0h], xmm7
+
+ aesimc xmm7, [key_schedule + 30h]
+ aesimc xmm6, [key_schedule + 0B0h]
+ movdqa [inverse_key_schedule + 30h], xmm6
+ movdqa [inverse_key_schedule + 0B0h], xmm7
+
+ aesimc xmm7, [key_schedule + 40h]
+ aesimc xmm6, [key_schedule + 0A0h]
+ movdqa [inverse_key_schedule + 40h], xmm6
+ movdqa [inverse_key_schedule + 0A0h], xmm7
+
+ aesimc xmm7, [key_schedule + 50h]
+ aesimc xmm6, [key_schedule + 90h]
+ movdqa [inverse_key_schedule + 50h], xmm6
+ movdqa [inverse_key_schedule + 90h], xmm7
+
+ aesimc xmm7, [key_schedule + 60h]
+ aesimc xmm6, [key_schedule + 80h]
+ movdqa [inverse_key_schedule + 60h], xmm6
+ movdqa [inverse_key_schedule + 80h], xmm7
+
+ aesimc xmm7, [key_schedule + 70h]
+ movdqa [inverse_key_schedule + 70h], xmm7
+
+ ret
+expand_keys_256ecb endp
+
+@aes256ecb_decrypt@48 proc
+ call expand_keys_256ecb
+ pxor xmm0, [inverse_key_schedule]
+ aesdec xmm0, [inverse_key_schedule + 10h]
+ aesdec xmm0, [inverse_key_schedule + 20h]
+ aesdec xmm0, [inverse_key_schedule + 30h]
+ aesdec xmm0, [inverse_key_schedule + 40h]
+ aesdec xmm0, [inverse_key_schedule + 50h]
+ aesdec xmm0, [inverse_key_schedule + 60h]
+ aesdec xmm0, [inverse_key_schedule + 70h]
+ aesdec xmm0, [inverse_key_schedule + 80h]
+ aesdec xmm0, [inverse_key_schedule + 90h]
+ aesdec xmm0, [inverse_key_schedule + 0A0h]
+ aesdec xmm0, [inverse_key_schedule + 0B0h]
+ aesdec xmm0, [inverse_key_schedule + 0C0h]
+ aesdec xmm0, [inverse_key_schedule + 0D0h]
+ aesdeclast xmm0, [inverse_key_schedule + 0E0h]
+ ret
+@aes256ecb_decrypt@48 endp
+
+end
diff --git a/src/common.c b/src/common.c
new file mode 100644
index 0000000..92f5e24
--- /dev/null
+++ b/src/common.c
@@ -0,0 +1,38 @@
+/**
+ * \file
+ * \author Egor Tensin <Egor.Tensin@gmail.com>
+ * \date 2015
+ * \copyright This file is licensed under the terms of the MIT License.
+ * See LICENSE.txt for details.
+ */
+
+#include "aesni/all.h"
+
+#include <intrin.h>
+
+#include <stdio.h>
+
+AesBlock make_aes_block(int highest, int high, int low, int lowest)
+{
+ return _mm_set_epi32(highest, high, low, lowest);
+}
+
+AesState aes_block_to_state(AesBlock block)
+{
+ AesState state;
+ _mm_storeu_si128((__m128i*) &state.bytes, block);
+ return state;
+}
+
+void print_aes_block(AesBlock block)
+{
+ int i, j;
+ AesState state = aes_block_to_state(block);
+
+ for (i = 0; i < 4; ++i)
+ {
+ for (j = 0; j < 3; ++j)
+ printf("%02x ", state.bytes[j][i]);
+ printf("%02x\n", state.bytes[3][i]);
+ }
+}