aboutsummaryrefslogtreecommitdiffstatshomepage
diff options
context:
space:
mode:
authorEgor Tensin <Egor.Tensin@gmail.com>2015-05-22 01:03:07 +0300
committerEgor Tensin <Egor.Tensin@gmail.com>2015-05-22 01:03:07 +0300
commit1463fd0559e0a664cd6a1bf6462a68dd79e58814 (patch)
treecd9ba513f473186a0780baf3f212c57d4521d91f
downloadaes-tools-1463fd0559e0a664cd6a1bf6462a68dd79e58814.tar.gz
aes-tools-1463fd0559e0a664cd6a1bf6462a68dd79e58814.zip
initial commit
Diffstat (limited to '')
-rw-r--r--CMakeLists.txt8
-rw-r--r--LICENSE.txt21
-rw-r--r--README.md14
-rw-r--r--examples/CMakeLists.txt11
-rw-r--r--examples/aes128_example.c35
-rw-r--r--examples/aes192_example.c39
-rw-r--r--examples/aes256cbc_example.c43
-rw-r--r--examples/aes256ecb_example.c39
-rw-r--r--include/aesni/all.h12
-rw-r--r--include/aesni/data.h41
-rw-r--r--include/aesni/raw.h47
-rw-r--r--src/aes128.asm129
-rw-r--r--src/aes192.asm196
-rw-r--r--src/aes256cbc.asm183
-rw-r--r--src/aes256ecb.asm181
-rw-r--r--src/common.c38
16 files changed, 1037 insertions, 0 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
new file mode 100644
index 0000000..cff7e40
--- /dev/null
+++ b/CMakeLists.txt
@@ -0,0 +1,8 @@
+project(libaesni C ASM_MASM)
+file(GLOB_RECURSE ${PROJECT_NAME}_headers "include/*.h")
+file(GLOB ${PROJECT_NAME}_sources "src/*.asm" "src/*.c")
+add_library(${PROJECT_NAME} ${${PROJECT_NAME}_headers}
+ ${${PROJECT_NAME}_sources})
+target_include_directories(${PROJECT_NAME} PUBLIC include/)
+
+add_subdirectory(examples)
diff --git a/LICENSE.txt b/LICENSE.txt
new file mode 100644
index 0000000..fbbdd68
--- /dev/null
+++ b/LICENSE.txt
@@ -0,0 +1,21 @@
+The MIT License (MIT)
+
+Copyright (c) 2015 Egor Tensin <Egor.Tensin@gmail.com>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..561b27e
--- /dev/null
+++ b/README.md
@@ -0,0 +1,14 @@
+# aesni
+
+Simple AES encryption algorithm implementation using the AES-NI instruction set.
+
+## Building
+
+I've used the compiler and the assembler shipped with Visual Studio Express 2013 with Update 4 for Windows Desktop.
+
+You can generate the solution using CMake and build it using Visual Studio.
+
+## Licensing
+
+This project, including all of the files and their contents, is licensed under the terms of the MIT License.
+See LICENSE.txt for details.
diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
new file mode 100644
index 0000000..8277421
--- /dev/null
+++ b/examples/CMakeLists.txt
@@ -0,0 +1,11 @@
+set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /SAFESEH:NO")
+
+macro(example prefix)
+ add_executable(${prefix}_example ${prefix}_example.c)
+ target_link_libraries(${prefix}_example libaesni)
+endmacro()
+
+example(aes128)
+example(aes192)
+example(aes256ecb)
+example(aes256cbc)
diff --git a/examples/aes128_example.c b/examples/aes128_example.c
new file mode 100644
index 0000000..b4f689f
--- /dev/null
+++ b/examples/aes128_example.c
@@ -0,0 +1,35 @@
+/**
+ * \file
+ * \author Egor Tensin <Egor.Tensin@gmail.com>
+ * \date 2015
+ * \copyright This file is licensed under the terms of the MIT License.
+ * See LICENSE.txt for details.
+ */
+
+#include <aesni/all.h>
+
+#include <stdio.h>
+
+int main()
+{
+ __declspec(align(16)) AesBlock plain, key, cypher, decrypted;
+
+ plain = make_aes_block(0xffeeddcc, 0xbbaa9988, 0x77665544, 0x33221100);
+ key = make_aes_block(0x0f0e0d0c, 0x0b0a0908, 0x07060504, 0x03020100);
+
+ printf("Plain:\n");
+ print_aes_block(plain);
+
+ printf("\nKey:\n");
+ print_aes_block(key);
+
+ printf("\nCypher:\n");
+ cypher = aes128ecb_encrypt(plain, key);
+ print_aes_block(cypher);
+
+ printf("\nDecrypted:\n");
+ decrypted = aes128ecb_decrypt(cypher, key);
+ print_aes_block(decrypted);
+
+ return 0;
+}
diff --git a/examples/aes192_example.c b/examples/aes192_example.c
new file mode 100644
index 0000000..dd22d80
--- /dev/null
+++ b/examples/aes192_example.c
@@ -0,0 +1,39 @@
+/**
+ * \file
+ * \author Egor Tensin <Egor.Tensin@gmail.com>
+ * \date 2015
+ * \copyright This file is licensed under the terms of the MIT License.
+ * See LICENSE.txt for details.
+ */
+
+#include <aesni/all.h>
+
+#include <stdio.h>
+
+int main()
+{
+ __declspec(align(16)) AesBlock plain, cypher, decrypted;
+ __declspec(align(16)) AesBlock key_low, key_high;
+
+ plain = make_aes_block(0xffeeddcc, 0xbbaa9988, 0x77665544, 0x33221100);
+ key_low = make_aes_block(0x0f0e0d0c, 0x0b0a0908, 0x07060504, 0x03020100);
+ key_high = make_aes_block( 0, 0, 0x17161514, 0x13121110);
+
+ printf("Plain:\n");
+ print_aes_block(plain);
+
+ printf("\nKey low:\n");
+ print_aes_block(key_low);
+ printf("\nKey high:\n");
+ print_aes_block(key_high);
+
+ printf("\nCypher:\n");
+ cypher = aes192ecb_encrypt(plain, key_low, key_high);
+ print_aes_block(cypher);
+
+ printf("\nDecrypted:\n");
+ decrypted = aes192ecb_decrypt(cypher, key_low, key_high);
+ print_aes_block(decrypted);
+
+ return 0;
+}
diff --git a/examples/aes256cbc_example.c b/examples/aes256cbc_example.c
new file mode 100644
index 0000000..1fcc615
--- /dev/null
+++ b/examples/aes256cbc_example.c
@@ -0,0 +1,43 @@
+/**
+ * \file
+ * \author Egor Tensin <Egor.Tensin@gmail.com>
+ * \date 2015
+ * \copyright This file is licensed under the terms of the MIT License.
+ * See LICENSE.txt for details.
+ */
+
+#include <aesni/all.h>
+
+#include <stdio.h>
+
+int main()
+{
+ __declspec(align(16)) AesBlock plain, cypher, decrypted;
+ __declspec(align(16)) AesBlock key_low, key_high, iv;
+
+ plain = make_aes_block(0xffeeddcc, 0xbbaa9988, 0x77665544, 0x33221100);
+ key_low = make_aes_block(0x0f0e0d0c, 0x0b0a0908, 0x07060504, 0x03020100);
+ key_high = make_aes_block(0x1f1e1d1c, 0x1b1a1918, 0x17161514, 0x13121110);
+ iv = make_aes_block(0xfedcba98, 0x76543210, 0xfedcba98, 0x76543210);
+
+ printf("Plain:\n");
+ print_aes_block(plain);
+
+ printf("\nKey low:\n");
+ print_aes_block(key_low);
+ printf("\nKey high:\n");
+ print_aes_block(key_high);
+
+ printf("\nInitialization vector:\n");
+ print_aes_block(iv);
+
+ printf("\nCypher:\n");
+ cypher = aes256cbc_encrypt(plain, key_low, key_high, &iv);
+ print_aes_block(cypher);
+
+ printf("\nDecrypted:\n");
+ decrypted = aes256cbc_decrypt(cypher, key_low, key_high, &iv);
+ print_aes_block(decrypted);
+
+ return 0;
+}
diff --git a/examples/aes256ecb_example.c b/examples/aes256ecb_example.c
new file mode 100644
index 0000000..25ec61c
--- /dev/null
+++ b/examples/aes256ecb_example.c
@@ -0,0 +1,39 @@
+/**
+ * \file
+ * \author Egor Tensin <Egor.Tensin@gmail.com>
+ * \date 2015
+ * \copyright This file is licensed under the terms of the MIT License.
+ * See LICENSE.txt for details.
+ */
+
+#include <aesni/all.h>
+
+#include <stdio.h>
+
+int main()
+{
+ __declspec(align(16)) AesBlock plain, cypher, decrypted;
+ __declspec(align(16)) AesBlock key_low, key_high;
+
+ plain = make_aes_block(0xffeeddcc, 0xbbaa9988, 0x77665544, 0x33221100);
+ key_low = make_aes_block(0x0f0e0d0c, 0x0b0a0908, 0x07060504, 0x03020100);
+ key_high = make_aes_block(0x1f1e1d1c, 0x1b1a1918, 0x17161514, 0x13121110);
+
+ printf("Plain:\n");
+ print_aes_block(plain);
+
+ printf("\nKey low:\n");
+ print_aes_block(key_low);
+ printf("\nKey high:\n");
+ print_aes_block(key_high);
+
+ printf("\nCypher:\n");
+ cypher = aes256ecb_encrypt(plain, key_low, key_high);
+ print_aes_block(cypher);
+
+ printf("\nDecrypted:\n");
+ decrypted = aes256ecb_decrypt(cypher, key_low, key_high);
+ print_aes_block(decrypted);
+
+ return 0;
+}
diff --git a/include/aesni/all.h b/include/aesni/all.h
new file mode 100644
index 0000000..f16d2a6
--- /dev/null
+++ b/include/aesni/all.h
@@ -0,0 +1,12 @@
+/**
+ * \file
+ * \author Egor Tensin <Egor.Tensin@gmail.com>
+ * \date 2015
+ * \copyright This file is licensed under the terms of the MIT License.
+ * See LICENSE.txt for details.
+ */
+
+#pragma once
+
+#include "data.h"
+#include "raw.h"
diff --git a/include/aesni/data.h b/include/aesni/data.h
new file mode 100644
index 0000000..029d8c8
--- /dev/null
+++ b/include/aesni/data.h
@@ -0,0 +1,41 @@
+/**
+ * \file
+ * \author Egor Tensin <Egor.Tensin@gmail.com>
+ * \date 2015
+ * \copyright This file is licensed under the terms of the MIT License.
+ * See LICENSE.txt for details.
+ */
+
+#pragma once
+
+#include <emmintrin.h>
+
+typedef __m128i AesBlock;
+
+AesBlock make_aes_block(int highest, int high, int low, int lowest);
+
+typedef AesBlock Aes128Key;
+
+typedef struct
+{
+ AesBlock hi;
+ AesBlock lo;
+}
+Aes192Key;
+
+typedef struct
+{
+ AesBlock hi;
+ AesBlock lo;
+}
+Aes256Key;
+
+typedef struct
+{
+ unsigned char bytes[4][4];
+}
+AesState;
+
+AesState aes_block_to_state(AesBlock);
+
+void print_aes_block(AesBlock);
diff --git a/include/aesni/raw.h b/include/aesni/raw.h
new file mode 100644
index 0000000..03ce217
--- /dev/null
+++ b/include/aesni/raw.h
@@ -0,0 +1,47 @@
+/**
+ * \file
+ * \author Egor Tensin <Egor.Tensin@gmail.com>
+ * \date 2015
+ * \copyright This file is licensed under the terms of the MIT License.
+ * See LICENSE.txt for details.
+ */
+
+#pragma once
+
+#include "data.h"
+
+AesBlock __fastcall aes128ecb_encrypt(
+ AesBlock plain,
+ AesBlock key);
+AesBlock __fastcall aes128ecb_decrypt(
+ AesBlock cypher,
+ AesBlock key);
+
+AesBlock __fastcall aes192ecb_encrypt(
+ AesBlock plain,
+ AesBlock key_lo,
+ AesBlock key_hi);
+AesBlock __fastcall aes192ecb_decrypt(
+ AesBlock cypher,
+ AesBlock key_lo,
+ AesBlock key_hi);
+
+AesBlock __fastcall aes256ecb_encrypt(
+ AesBlock plain,
+ AesBlock key_lo,
+ AesBlock key_hi);
+AesBlock __fastcall aes256ecb_decrypt(
+ AesBlock cypher,
+ AesBlock key_lo,
+ AesBlock key_hi);
+
+AesBlock __fastcall aes256cbc_encrypt(
+ AesBlock plain,
+ AesBlock key_lo,
+ AesBlock key_hi,
+ AesBlock *iv);
+AesBlock __fastcall aes256cbc_decrypt(
+ AesBlock cypher,
+ AesBlock key_lo,
+ AesBlock key_hi,
+ AesBlock *iv);
diff --git a/src/aes128.asm b/src/aes128.asm
new file mode 100644
index 0000000..dfa7a7d
--- /dev/null
+++ b/src/aes128.asm
@@ -0,0 +1,129 @@
+; Copyright 2015 Egor Tensin <Egor.Tensin@gmail.com>
+; This file is licensed under the terms of the MIT License.
+; See LICENSE.txt for details.
+
+.586
+.xmm
+.model flat
+
+.data
+
+align 10h
+key_schedule oword 11 dup(0)
+
+align 10h
+inverted_key_schedule oword 11 dup(0)
+
+.code
+
+@aes128ecb_encrypt@32 proc
+ call expand_keys_128ecb
+ pxor xmm0, [key_schedule]
+ aesenc xmm0, [key_schedule + 10h]
+ aesenc xmm0, [key_schedule + 20h]
+ aesenc xmm0, [key_schedule + 30h]
+ aesenc xmm0, [key_schedule + 40h]
+ aesenc xmm0, [key_schedule + 50h]
+ aesenc xmm0, [key_schedule + 60h]
+ aesenc xmm0, [key_schedule + 70h]
+ aesenc xmm0, [key_schedule + 80h]
+ aesenc xmm0, [key_schedule + 90h]
+ aesenclast xmm0, [key_schedule + 0A0h]
+ ret
+@aes128ecb_encrypt@32 endp
+
+@aes128ecb_decrypt@32 proc
+ call expand_keys_128ecb
+ pxor xmm0, [inverted_key_schedule]
+ aesdec xmm0, [inverted_key_schedule + 10h]
+ aesdec xmm0, [inverted_key_schedule + 20h]
+ aesdec xmm0, [inverted_key_schedule + 30h]
+ aesdec xmm0, [inverted_key_schedule + 40h]
+ aesdec xmm0, [inverted_key_schedule + 50h]
+ aesdec xmm0, [inverted_key_schedule + 60h]
+ aesdec xmm0, [inverted_key_schedule + 70h]
+ aesdec xmm0, [inverted_key_schedule + 80h]
+ aesdec xmm0, [inverted_key_schedule + 90h]
+ aesdeclast xmm0, [inverted_key_schedule + 0A0h]
+ ret
+@aes128ecb_decrypt@32 endp
+
+expand_keys_128ecb proc
+ lea ecx, [key_schedule + 10h]
+ movdqa [key_schedule], xmm1
+
+ aeskeygenassist xmm7, xmm1, 01h
+ call gen_round_key
+ aeskeygenassist xmm7, xmm1, 02h
+ call gen_round_key
+ aeskeygenassist xmm7, xmm1, 04h
+ call gen_round_key
+ aeskeygenassist xmm7, xmm1, 08h
+ call gen_round_key
+ aeskeygenassist xmm7, xmm1, 10h
+ call gen_round_key
+ aeskeygenassist xmm7, xmm1, 20h
+ call gen_round_key
+ aeskeygenassist xmm7, xmm1, 40h
+ call gen_round_key
+ aeskeygenassist xmm7, xmm1, 80h
+ call gen_round_key
+ aeskeygenassist xmm7, xmm1, 1Bh
+ call gen_round_key
+ aeskeygenassist xmm7, xmm1, 36h
+ call gen_round_key
+
+ call invert_key_schedule
+ ret
+
+gen_round_key:
+ movdqa xmm6, xmm1 ; xmm6 = key_schedule[i]
+ ; xmm6 = x3 x2 x1 x0
+
+ pslldq xmm6, 4 ; xmm6 = x2 x1 x0 0
+ pxor xmm1, xmm6 ; xmm1 = (x3 x2) (x2 x1) (x1 x0) x0
+ pslldq xmm6, 4 ; xmm6 = x1 x0 0 0
+ pxor xmm1, xmm6 ; xmm1 = (x3 x2 x1) (x2 x1 x0) (x1 x0) x0
+ pslldq xmm6, 4 ; xmm6 = x0 0 0 0
+ pxor xmm1, xmm6 ; xmm1 = (x3 x2 x1 x0) (x2 x1 x0) (x1 x0) x0
+
+ pshufd xmm7, xmm7, 0FFh
+ pxor xmm1, xmm7
+
+ movdqa [ecx], xmm1
+ add ecx, 10h
+ ret
+
+invert_key_schedule:
+ movdqa xmm7, [key_schedule]
+ movdqa xmm6, [key_schedule + 0A0h]
+ movdqa [inverted_key_schedule], xmm6
+ movdqa [inverted_key_schedule + 0A0h], xmm7
+
+ aesimc xmm7, [key_schedule + 10h]
+ aesimc xmm6, [key_schedule + 90h]
+ movdqa [inverted_key_schedule + 10h], xmm6
+ movdqa [inverted_key_schedule + 90h], xmm7
+
+ aesimc xmm7, [key_schedule + 20h]
+ aesimc xmm6, [key_schedule + 80h]
+ movdqa [inverted_key_schedule + 20h], xmm6
+ movdqa [inverted_key_schedule + 80h], xmm7
+
+ aesimc xmm7, [key_schedule + 30h]
+ aesimc xmm6, [key_schedule + 70h]
+ movdqa [inverted_key_schedule + 30h], xmm6
+ movdqa [inverted_key_schedule + 70h], xmm7
+
+ aesimc xmm7, [key_schedule + 40h]
+ aesimc xmm6, [key_schedule + 60h]
+ movdqa [inverted_key_schedule + 40h], xmm6
+ movdqa [inverted_key_schedule + 60h], xmm7
+
+ aesimc xmm7, [key_schedule + 50h]
+ movdqa [inverted_key_schedule + 50h], xmm7
+
+ ret
+expand_keys_128ecb endp
+
+end
diff --git a/src/aes192.asm b/src/aes192.asm
new file mode 100644
index 0000000..84d3a3a
--- /dev/null
+++ b/src/aes192.asm
@@ -0,0 +1,196 @@
+; Copyright 2015 Egor Tensin <Egor.Tensin@gmail.com>
+; This file is licensed under the terms of the MIT License.
+; See LICENSE.txt for details.
+
+.586
+.xmm
+.model flat
+
+.data
+
+align 10h
+key_schedule oword 13 dup(0)
+
+align 10h
+inverted_key_schedule oword 13 dup(0)
+
+.code
+
+@aes192ecb_encrypt@48 proc
+ call expand_keys_192ecb
+ pxor xmm0, [key_schedule]
+ aesenc xmm0, [key_schedule + 10h]
+ aesenc xmm0, [key_schedule + 20h]
+ aesenc xmm0, [key_schedule + 30h]
+ aesenc xmm0, [key_schedule + 40h]
+ aesenc xmm0, [key_schedule + 50h]
+ aesenc xmm0, [key_schedule + 60h]
+ aesenc xmm0, [key_schedule + 70h]
+ aesenc xmm0, [key_schedule + 80h]
+ aesenc xmm0, [key_schedule + 90h]
+ aesenc xmm0, [key_schedule + 0A0h]
+ aesenc xmm0, [key_schedule + 0B0h]
+ aesenclast xmm0, [key_schedule + 0C0h]
+ ret
+@aes192ecb_encrypt@48 endp
+
+@aes192ecb_decrypt@48 proc
+ call expand_keys_192ecb
+ pxor xmm0, [inverted_key_schedule]
+ aesdec xmm0, [inverted_key_schedule + 10h]
+ aesdec xmm0, [inverted_key_schedule + 20h]
+ aesdec xmm0, [inverted_key_schedule + 30h]
+ aesdec xmm0, [inverted_key_schedule + 40h]
+ aesdec xmm0, [inverted_key_schedule + 50h]
+ aesdec xmm0, [inverted_key_schedule + 60h]
+ aesdec xmm0, [inverted_key_schedule + 70h]
+ aesdec xmm0, [inverted_key_schedule + 80h]
+ aesdec xmm0, [inverted_key_schedule + 90h]
+ aesdec xmm0, [inverted_key_schedule + 0A0h]
+ aesdec xmm0, [inverted_key_schedule + 0B0h]
+ aesdeclast xmm0, [inverted_key_schedule + 0C0h]
+ ret
+@aes192ecb_decrypt@48 endp
+
+expand_keys_192ecb proc
+ ; key = k0 k1 k2 k3 k4 k5
+ ; xmm1 = k0 k1 k2 k3
+ ; xmm2 = 0 0 k5 k4
+
+ ; w[0] = k0 k1 k2 k3
+ ; w[1] = k4 k5 - -
+
+ ; i = 6
+ ; while (i < 52):
+ ; temp = w[i - 1]
+ ; if (i % 6 == 0):
+ ; temp = SubWord(RotWord(w[i - 1])) * Rcon
+ ; w[i] = w[i - 6] * temp
+ ; i = i + 1
+
+ ; w[6] = SubWord(RotWord(w[5])) * Rcon * w[0]
+ ; w[7] = w[6] * w[1]
+ ; = SubWord(RotWord(w[5])) * Rcon * w[0] * w[1]
+ ; w[8] = w[7] * w[2]
+ ; = SubWord(RotWord(w[5])) * Rcon * w[0] * w[1] * w[2]
+ ; w[9] = w[8] * w[3]
+ ; = SubWord(RotWord(w[5])) * Rcon * w[0] * w[1] * w[2] * w[3]
+ ; w[10] = w[9] * w[4]
+ ; = SubWord(RotWord(w[5])) * Rcon * w[0] * w[1] * w[2] * w[3] * w[4]
+ ; w[11] = w[10] * w[5]
+ ; = SubWord(RotWord(w[5])) * Rcon * w[0] * w[1] * w[2] * w[3] * w[4] * w[5]
+
+ movdqa [key_schedule], xmm1
+ movdqa [key_schedule + 10h], xmm2
+
+ lea ecx, [key_schedule + 18h]
+ aeskeygenassist xmm7, xmm2, 1
+ call gen_round_key
+ aeskeygenassist xmm7, xmm2, 2
+ call gen_round_key
+ aeskeygenassist xmm7, xmm2, 4
+ call gen_round_key
+ aeskeygenassist xmm7, xmm2, 8
+ call gen_round_key
+ aeskeygenassist xmm7, xmm2, 10h
+ call gen_round_key
+ aeskeygenassist xmm7, xmm2, 20h
+ call gen_round_key
+ aeskeygenassist xmm7, xmm2, 40h
+ call gen_round_key
+ aeskeygenassist xmm7, xmm2, 80h
+ call gen_round_key
+
+ call invert_key_schedule
+ ret
+
+gen_round_key:
+ ; xmm1 = x3 x2 x1 x0
+ ; xmm2 = - - x5 x4
+ ; xmm7 = RotWord(SubWord(-)) xor Rcon
+ ; SubWord(-)
+ ; RotWord(SubWord(x5)) xor Rcon
+ ; SubWord(x5)
+ movdqa xmm6, xmm1
+
+ pslldq xmm6, 4 ; xmm6 = x2 x1 x0 0
+ pxor xmm1, xmm6 ; xmm1 = (x3 * x2) (x1 * x2) (x1 * x0) x0
+ pslldq xmm6, 4 ; xmm6 = x1 x0 0 0
+ pxor xmm1, xmm6 ; xmm1 = (x3 * x2 * x1) (x1 * x2 * x0) (x1 * x0) x0
+ pslldq xmm6, 4 ; xmm6 = x0 0 0 0
+ pxor xmm1, xmm6 ; xmm1 = (x3 * x2 * x1 * x0) (x1 * x2 * x0) (x1 * x0) x0
+
+ pshufd xmm7, xmm7, 55h ; xmm7 = RotWord(SubWord(x5)) * Rcon
+ ; RotWord(SubWord(x5)) * Rcon
+ ; RotWord(SubWord(x5)) * Rcon
+ ; RotWord(SubWord(x5)) * Rcon
+
+ pxor xmm1, xmm7 ; xmm1 = RotWord(SubWord(x5)) * Rcon * x3 * x2 * x1 * x0
+ ; RotWord(SubWord(x5)) * Rcon * x2 * x1 * x0
+ ; RotWord(SubWord(x5)) * Rcon * x1 * x0
+ ; RotWord(SubWord(x5)) * Rcon * x0
+
+ movq qword ptr [ecx], xmm1
+ add ecx, 8
+
+ pshufd xmm7, xmm1, 0FFh ; xmm7 = -
+ ; -
+ ; RotWord(SubWord(x5)) * Rcon * x3 * x2 * x1 * x0
+ ; RotWord(SubWord(x5)) * Rcon * x3 * x2 * x1 * x0
+ pxor xmm7, xmm2 ; xmm7 = -
+ ; -
+ ; RotWord(SubWord(x5)) * Rcon * x5 * x3 * x2 * x1 * x0
+ ; RotWord(SubWord(x5)) * Rcon * x4 * x3 * x2 * x1 * x0
+ pslldq xmm2, 4 ; xmm2 = - k5 k4 0
+ pxor xmm7, xmm2 ; xmm7 = -
+ ; -
+ ; RotWord(SubWord(x5)) * Rcon * x5 * x4 * x3 * x2 * x1 * x0
+ ; RotWord(SubWord(x5)) * Rcon * x4 * x3 * x2 * x1 * x0
+
+ movq xmm2, xmm7
+ pslldq xmm7, 8
+ movdqa xmm6, xmm1
+ psrldq xmm6, 8
+ por xmm7, xmm6
+ movdqu [ecx], xmm7
+ add ecx, 10h
+ ret
+
+invert_key_schedule:
+ movdqa xmm7, [key_schedule]
+ movdqa xmm6, [key_schedule + 0C0h]
+ movdqa [inverted_key_schedule], xmm6
+ movdqa [inverted_key_schedule + 0C0h], xmm7
+
+ aesimc xmm7, [key_schedule + 10h]
+ aesimc xmm6, [key_schedule + 0B0h]
+ movdqa [inverted_key_schedule + 10h], xmm6
+ movdqa [inverted_key_schedule + 0B0h], xmm7
+
+ aesimc xmm7, [key_schedule + 20h]
+ aesimc xmm6, [key_schedule + 0A0h]
+ movdqa [inverted_key_schedule + 20h], xmm6
+ movdqa [inverted_key_schedule + 0A0h], xmm7
+
+ aesimc xmm7, [key_schedule + 30h]
+ aesimc xmm6, [key_schedule + 90h]
+ movdqa [inverted_key_schedule + 30h], xmm6
+ movdqa [inverted_key_schedule + 90h], xmm7
+
+ aesimc xmm7, [key_schedule + 40h]
+ aesimc xmm6, [key_schedule + 80h]
+ movdqa [inverted_key_schedule + 40h], xmm6
+ movdqa [inverted_key_schedule + 80h], xmm7
+
+ aesimc xmm7, [key_schedule + 50h]
+ aesimc xmm6, [key_schedule + 70h]
+ movdqa [inverted_key_schedule + 50h], xmm6
+ movdqa [inverted_key_schedule + 70h], xmm7
+
+ aesimc xmm7, [key_schedule + 60h]
+ movdqa [inverted_key_schedule + 60h], xmm7
+
+ ret
+expand_keys_192ecb endp
+
+end
diff --git a/src/aes256cbc.asm b/src/aes256cbc.asm
new file mode 100644
index 0000000..3446d31
--- /dev/null
+++ b/src/aes256cbc.asm
@@ -0,0 +1,183 @@
+; Copyright 2015 Egor Tensin <Egor.Tensin@gmail.com>
+; This file is licensed under the terms of the MIT License.
+; See LICENSE.txt for details.
+
+.586
+.xmm
+.model flat
+
+.data
+
+align 10h
+key_schedule oword 15 dup(0)
+
+align 10h
+inverse_key_schedule oword 15 dup(0)
+
+.code
+
+@aes256cbc_encrypt@52 proc
+ call expand_keys_256cbc
+ pxor xmm0, [ecx]
+ pxor xmm0, [key_schedule]
+ aesenc xmm0, [key_schedule + 10h]
+ aesenc xmm0, [key_schedule + 20h]
+ aesenc xmm0, [key_schedule + 30h]
+ aesenc xmm0, [key_schedule + 40h]
+ aesenc xmm0, [key_schedule + 50h]
+ aesenc xmm0, [key_schedule + 60h]
+ aesenc xmm0, [key_schedule + 70h]
+ aesenc xmm0, [key_schedule + 80h]
+ aesenc xmm0, [key_schedule + 90h]
+ aesenc xmm0, [key_schedule + 0A0h]
+ aesenc xmm0, [key_schedule + 0B0h]
+ aesenc xmm0, [key_schedule + 0C0h]
+ aesenc xmm0, [key_schedule + 0D0h]
+ aesenclast xmm0, [key_schedule + 0E0h]
+ ret
+@aes256cbc_encrypt@52 endp
+
+expand_keys_256cbc proc
+ lea edx, [key_schedule + 20h]
+ movdqa [key_schedule], xmm1
+ movdqa [key_schedule + 10h], xmm2
+
+ aeskeygenassist xmm7, xmm2, 1h
+ pshufd xmm7, xmm7, 0FFh
+ call gen_round_key
+
+ aeskeygenassist xmm7, xmm2, 0
+ pshufd xmm7, xmm7, 0AAh
+ call gen_round_key
+
+ aeskeygenassist xmm7, xmm2, 2h
+ pshufd xmm7, xmm7, 0FFh
+ call gen_round_key
+
+ aeskeygenassist xmm7, xmm2, 0
+ pshufd xmm7, xmm7, 0AAh
+ call gen_round_key
+
+ aeskeygenassist xmm7, xmm2, 4h
+ pshufd xmm7, xmm7, 0FFh
+ call gen_round_key
+
+ aeskeygenassist xmm7, xmm2, 0
+ pshufd xmm7, xmm7, 0AAh
+ call gen_round_key
+
+ aeskeygenassist xmm7, xmm2, 8h
+ pshufd xmm7, xmm7, 0FFh
+ call gen_round_key
+
+ aeskeygenassist xmm7, xmm2, 0
+ pshufd xmm7, xmm7, 0AAh
+ call gen_round_key
+
+ aeskeygenassist xmm7, xmm2, 10h
+ pshufd xmm7, xmm7, 0FFh
+ call gen_round_key
+
+ aeskeygenassist xmm7, xmm2, 0
+ pshufd xmm7, xmm7, 0AAh
+ call gen_round_key
+
+ aeskeygenassist xmm7, xmm2, 20h
+ pshufd xmm7, xmm7, 0FFh
+ call gen_round_key
+
+ aeskeygenassist xmm7, xmm2, 0
+ pshufd xmm7, xmm7, 0AAh
+ call gen_round_key
+
+ aeskeygenassist xmm7, xmm2, 40h
+ pshufd xmm7, xmm7, 0FFh
+ call gen_round_key
+
+ call invert_key_schedule
+ ret
+
+gen_round_key:
+ movdqa xmm6, xmm1
+
+ pslldq xmm6, 4
+ pxor xmm1, xmm6
+ pslldq xmm6, 4
+ pxor xmm1, xmm6
+ pslldq xmm6, 4
+ pxor xmm1, xmm6
+
+ pxor xmm1, xmm7
+
+ movdqa [edx], xmm1
+ add edx, 10h
+
+ movdqa xmm7, xmm1
+ movdqa xmm1, xmm2
+ movdqa xmm2, xmm7
+ ret
+
+invert_key_schedule:
+ movdqa xmm7, [key_schedule]
+ movdqa xmm6, [key_schedule + 0E0h]
+ movdqa [inverse_key_schedule], xmm6
+ movdqa [inverse_key_schedule + 0E0h], xmm7
+
+ aesimc xmm7, [key_schedule + 10h]
+ aesimc xmm6, [key_schedule + 0D0h]
+ movdqa [inverse_key_schedule + 10h], xmm6
+ movdqa [inverse_key_schedule + 0D0h], xmm7
+
+ aesimc xmm7, [key_schedule + 20h]
+ aesimc xmm6, [key_schedule + 0C0h]
+ movdqa [inverse_key_schedule + 20h], xmm6
+ movdqa [inverse_key_schedule + 0C0h], xmm7
+
+ aesimc xmm7, [key_schedule + 30h]
+ aesimc xmm6, [key_schedule + 0B0h]
+ movdqa [inverse_key_schedule + 30h], xmm6
+ movdqa [inverse_key_schedule + 0B0h], xmm7
+
+ aesimc xmm7, [key_schedule + 40h]
+ aesimc xmm6, [key_schedule + 0A0h]
+ movdqa [inverse_key_schedule + 40h], xmm6
+ movdqa [inverse_key_schedule + 0A0h], xmm7
+
+ aesimc xmm7, [key_schedule + 50h]
+ aesimc xmm6, [key_schedule + 90h]
+ movdqa [inverse_key_schedule + 50h], xmm6
+ movdqa [inverse_key_schedule + 90h], xmm7
+
+ aesimc xmm7, [key_schedule + 60h]
+ aesimc xmm6, [key_schedule + 80h]
+ movdqa [inverse_key_schedule + 60h], xmm6
+ movdqa [inverse_key_schedule + 80h], xmm7
+
+ aesimc xmm7, [key_schedule + 70h]
+ movdqa [inverse_key_schedule + 70h], xmm7
+
+ ret
+expand_keys_256cbc endp
+
+@aes256cbc_decrypt@52 proc
+ call expand_keys_256cbc
+ pxor xmm0, [inverse_key_schedule]
+ aesdec xmm0, [inverse_key_schedule + 10h]
+ aesdec xmm0, [inverse_key_schedule + 20h]
+ aesdec xmm0, [inverse_key_schedule + 30h]
+ aesdec xmm0, [inverse_key_schedule + 40h]
+ aesdec xmm0, [inverse_key_schedule + 50h]
+ aesdec xmm0, [inverse_key_schedule + 60h]
+ aesdec xmm0, [inverse_key_schedule + 70h]
+ aesdec xmm0, [inverse_key_schedule + 80h]
+ aesdec xmm0, [inverse_key_schedule + 90h]
+ aesdec xmm0, [inverse_key_schedule + 0A0h]
+ aesdec xmm0, [inverse_key_schedule + 0B0h]
+ aesdec xmm0, [inverse_key_schedule + 0C0h]
+ aesdec xmm0, [inverse_key_schedule + 0D0h]
+ aesdeclast xmm0, [inverse_key_schedule + 0E0h]
+ pxor xmm0, [ecx]
+ ret
+@aes256cbc_decrypt@52 endp
+
+end
diff --git a/src/aes256ecb.asm b/src/aes256ecb.asm
new file mode 100644
index 0000000..4246dee
--- /dev/null
+++ b/src/aes256ecb.asm
@@ -0,0 +1,181 @@
+; Copyright 2015 Egor Tensin <Egor.Tensin@gmail.com>
+; This file is licensed under the terms of the MIT License.
+; See LICENSE.txt for details.
+
+.586
+.xmm
+.model flat
+
+.data
+
+align 10h
+key_schedule oword 15 dup(0)
+
+align 10h
+inverse_key_schedule oword 15 dup(0)
+
+.code
+
+@aes256ecb_encrypt@48 proc
+ call expand_keys_256ecb
+ pxor xmm0, [key_schedule]
+ aesenc xmm0, [key_schedule + 10h]
+ aesenc xmm0, [key_schedule + 20h]
+ aesenc xmm0, [key_schedule + 30h]
+ aesenc xmm0, [key_schedule + 40h]
+ aesenc xmm0, [key_schedule + 50h]
+ aesenc xmm0, [key_schedule + 60h]
+ aesenc xmm0, [key_schedule + 70h]
+ aesenc xmm0, [key_schedule + 80h]
+ aesenc xmm0, [key_schedule + 90h]
+ aesenc xmm0, [key_schedule + 0A0h]
+ aesenc xmm0, [key_schedule + 0B0h]
+ aesenc xmm0, [key_schedule + 0C0h]
+ aesenc xmm0, [key_schedule + 0D0h]
+ aesenclast xmm0, [key_schedule + 0E0h]
+ ret
+@aes256ecb_encrypt@48 endp
+
+expand_keys_256ecb proc
+ lea edx, [key_schedule + 20h]
+ movdqa [key_schedule], xmm1
+ movdqa [key_schedule + 10h], xmm2
+
+ aeskeygenassist xmm7, xmm2, 1h
+ pshufd xmm7, xmm7, 0FFh
+ call gen_round_key
+
+ aeskeygenassist xmm7, xmm2, 0
+ pshufd xmm7, xmm7, 0AAh
+ call gen_round_key
+
+ aeskeygenassist xmm7, xmm2, 2h
+ pshufd xmm7, xmm7, 0FFh
+ call gen_round_key
+
+ aeskeygenassist xmm7, xmm2, 0
+ pshufd xmm7, xmm7, 0AAh
+ call gen_round_key
+
+ aeskeygenassist xmm7, xmm2, 4h
+ pshufd xmm7, xmm7, 0FFh
+ call gen_round_key
+
+ aeskeygenassist xmm7, xmm2, 0
+ pshufd xmm7, xmm7, 0AAh
+ call gen_round_key
+
+ aeskeygenassist xmm7, xmm2, 8h
+ pshufd xmm7, xmm7, 0FFh
+ call gen_round_key
+
+ aeskeygenassist xmm7, xmm2, 0
+ pshufd xmm7, xmm7, 0AAh
+ call gen_round_key
+
+ aeskeygenassist xmm7, xmm2, 10h
+ pshufd xmm7, xmm7, 0FFh
+ call gen_round_key
+
+ aeskeygenassist xmm7, xmm2, 0
+ pshufd xmm7, xmm7, 0AAh
+ call gen_round_key
+
+ aeskeygenassist xmm7, xmm2, 20h
+ pshufd xmm7, xmm7, 0FFh
+ call gen_round_key
+
+ aeskeygenassist xmm7, xmm2, 0
+ pshufd xmm7, xmm7, 0AAh
+ call gen_round_key
+
+ aeskeygenassist xmm7, xmm2, 40h
+ pshufd xmm7, xmm7, 0FFh
+ call gen_round_key
+
+ call invert_key_schedule
+ ret
+
+gen_round_key:
+ movdqa xmm6, xmm1
+
+ pslldq xmm6, 4
+ pxor xmm1, xmm6
+ pslldq xmm6, 4
+ pxor xmm1, xmm6
+ pslldq xmm6, 4
+ pxor xmm1, xmm6
+
+ pxor xmm1, xmm7
+
+ movdqa [edx], xmm1
+ add edx, 10h
+
+ movdqa xmm7, xmm1
+ movdqa xmm1, xmm2
+ movdqa xmm2, xmm7
+ ret
+
+invert_key_schedule:
+ movdqa xmm7, [key_schedule ]
+ movdqa xmm6, [key_schedule + 0E0h]
+ movdqa [inverse_key_schedule ], xmm6
+ movdqa [inverse_key_schedule + 0E0h], xmm7
+
+ aesimc xmm7, [key_schedule + 10h]
+ aesimc xmm6, [key_schedule + 0D0h]
+ movdqa [inverse_key_schedule + 10h], xmm6
+ movdqa [inverse_key_schedule + 0D0h], xmm7
+
+ aesimc xmm7, [key_schedule + 20h]
+ aesimc xmm6, [key_schedule + 0C0h]
+ movdqa [inverse_key_schedule + 20h], xmm6
+ movdqa [inverse_key_schedule + 0C0h], xmm7
+
+ aesimc xmm7, [key_schedule + 30h]
+ aesimc xmm6, [key_schedule + 0B0h]
+ movdqa [inverse_key_schedule + 30h], xmm6
+ movdqa [inverse_key_schedule + 0B0h], xmm7
+
+ aesimc xmm7, [key_schedule + 40h]
+ aesimc xmm6, [key_schedule + 0A0h]
+ movdqa [inverse_key_schedule + 40h], xmm6
+ movdqa [inverse_key_schedule + 0A0h], xmm7
+
+ aesimc xmm7, [key_schedule + 50h]
+ aesimc xmm6, [key_schedule + 90h]
+ movdqa [inverse_key_schedule + 50h], xmm6
+ movdqa [inverse_key_schedule + 90h], xmm7
+
+ aesimc xmm7, [key_schedule + 60h]
+ aesimc xmm6, [key_schedule + 80h]
+ movdqa [inverse_key_schedule + 60h], xmm6
+ movdqa [inverse_key_schedule + 80h], xmm7
+
+ aesimc xmm7, [key_schedule + 70h]
+ movdqa [inverse_key_schedule + 70h], xmm7
+
+ ret
+expand_keys_256ecb endp
+
+@aes256ecb_decrypt@48 proc
+ call expand_keys_256ecb
+ pxor xmm0, [inverse_key_schedule]
+ aesdec xmm0, [inverse_key_schedule + 10h]
+ aesdec xmm0, [inverse_key_schedule + 20h]
+ aesdec xmm0, [inverse_key_schedule + 30h]
+ aesdec xmm0, [inverse_key_schedule + 40h]
+ aesdec xmm0, [inverse_key_schedule + 50h]
+ aesdec xmm0, [inverse_key_schedule + 60h]
+ aesdec xmm0, [inverse_key_schedule + 70h]
+ aesdec xmm0, [inverse_key_schedule + 80h]
+ aesdec xmm0, [inverse_key_schedule + 90h]
+ aesdec xmm0, [inverse_key_schedule + 0A0h]
+ aesdec xmm0, [inverse_key_schedule + 0B0h]
+ aesdec xmm0, [inverse_key_schedule + 0C0h]
+ aesdec xmm0, [inverse_key_schedule + 0D0h]
+ aesdeclast xmm0, [inverse_key_schedule + 0E0h]
+ ret
+@aes256ecb_decrypt@48 endp
+
+end
diff --git a/src/common.c b/src/common.c
new file mode 100644
index 0000000..92f5e24
--- /dev/null
+++ b/src/common.c
@@ -0,0 +1,38 @@
+/**
+ * \file
+ * \author Egor Tensin <Egor.Tensin@gmail.com>
+ * \date 2015
+ * \copyright This file is licensed under the terms of the MIT License.
+ * See LICENSE.txt for details.
+ */
+
+#include "aesni/all.h"
+
+#include <intrin.h>
+
+#include <stdio.h>
+
+AesBlock make_aes_block(int highest, int high, int low, int lowest)
+{
+ return _mm_set_epi32(highest, high, low, lowest);
+}
+
+AesState aes_block_to_state(AesBlock block)
+{
+ AesState state;
+ _mm_storeu_si128((__m128i*) &state.bytes, block);
+ return state;
+}
+
+void print_aes_block(AesBlock block)
+{
+ int i, j;
+ AesState state = aes_block_to_state(block);
+
+ for (i = 0; i < 4; ++i)
+ {
+ for (j = 0; j < 3; ++j)
+ printf("%02x ", state.bytes[j][i]);
+ printf("%02x\n", state.bytes[3][i]);
+ }
+}