From 1463fd0559e0a664cd6a1bf6462a68dd79e58814 Mon Sep 17 00:00:00 2001 From: Egor Tensin Date: Fri, 22 May 2015 01:03:07 +0300 Subject: initial commit --- CMakeLists.txt | 8 ++ LICENSE.txt | 21 +++++ README.md | 14 ++++ examples/CMakeLists.txt | 11 +++ examples/aes128_example.c | 35 ++++++++ examples/aes192_example.c | 39 +++++++++ examples/aes256cbc_example.c | 43 ++++++++++ examples/aes256ecb_example.c | 39 +++++++++ include/aesni/all.h | 12 +++ include/aesni/data.h | 41 +++++++++ include/aesni/raw.h | 47 +++++++++++ src/aes128.asm | 129 ++++++++++++++++++++++++++++ src/aes192.asm | 196 +++++++++++++++++++++++++++++++++++++++++++ src/aes256cbc.asm | 183 ++++++++++++++++++++++++++++++++++++++++ src/aes256ecb.asm | 181 +++++++++++++++++++++++++++++++++++++++ src/common.c | 38 +++++++++ 16 files changed, 1037 insertions(+) create mode 100644 CMakeLists.txt create mode 100644 LICENSE.txt create mode 100644 README.md create mode 100644 examples/CMakeLists.txt create mode 100644 examples/aes128_example.c create mode 100644 examples/aes192_example.c create mode 100644 examples/aes256cbc_example.c create mode 100644 examples/aes256ecb_example.c create mode 100644 include/aesni/all.h create mode 100644 include/aesni/data.h create mode 100644 include/aesni/raw.h create mode 100644 src/aes128.asm create mode 100644 src/aes192.asm create mode 100644 src/aes256cbc.asm create mode 100644 src/aes256ecb.asm create mode 100644 src/common.c diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..cff7e40 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,8 @@ +project(libaesni C ASM_MASM) +file(GLOB_RECURSE ${PROJECT_NAME}_headers "include/*.h") +file(GLOB ${PROJECT_NAME}_sources "src/*.asm" "src/*.c") +add_library(${PROJECT_NAME} ${${PROJECT_NAME}_headers} + ${${PROJECT_NAME}_sources}) +target_include_directories(${PROJECT_NAME} PUBLIC include/) + +add_subdirectory(examples) diff --git a/LICENSE.txt b/LICENSE.txt new file mode 100644 index 0000000..fbbdd68 --- /dev/null +++ b/LICENSE.txt @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2015 Egor Tensin + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..561b27e --- /dev/null +++ b/README.md @@ -0,0 +1,14 @@ +# aesni + +Simple AES encryption algorithm implementation using the AES-NI instruction set. + +## Building + +I've used the compiler and the assembler shipped with Visual Studio Express 2013 with Update 4 for Windows Desktop. + +You can generate the solution using CMake and build it using Visual Studio. + +## Licensing + +This project, including all of the files and their contents, is licensed under the terms of the MIT License. +See LICENSE.txt for details. diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt new file mode 100644 index 0000000..8277421 --- /dev/null +++ b/examples/CMakeLists.txt @@ -0,0 +1,11 @@ +set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /SAFESEH:NO") + +macro(example prefix) + add_executable(${prefix}_example ${prefix}_example.c) + target_link_libraries(${prefix}_example libaesni) +endmacro() + +example(aes128) +example(aes192) +example(aes256ecb) +example(aes256cbc) diff --git a/examples/aes128_example.c b/examples/aes128_example.c new file mode 100644 index 0000000..b4f689f --- /dev/null +++ b/examples/aes128_example.c @@ -0,0 +1,35 @@ +/** + * \file + * \author Egor Tensin + * \date 2015 + * \copyright This file is licensed under the terms of the MIT License. + * See LICENSE.txt for details. + */ + +#include + +#include + +int main() +{ + __declspec(align(16)) AesBlock plain, key, cypher, decrypted; + + plain = make_aes_block(0xffeeddcc, 0xbbaa9988, 0x77665544, 0x33221100); + key = make_aes_block(0x0f0e0d0c, 0x0b0a0908, 0x07060504, 0x03020100); + + printf("Plain:\n"); + print_aes_block(plain); + + printf("\nKey:\n"); + print_aes_block(key); + + printf("\nCypher:\n"); + cypher = aes128ecb_encrypt(plain, key); + print_aes_block(cypher); + + printf("\nDecrypted:\n"); + decrypted = aes128ecb_decrypt(cypher, key); + print_aes_block(decrypted); + + return 0; +} diff --git a/examples/aes192_example.c b/examples/aes192_example.c new file mode 100644 index 0000000..dd22d80 --- /dev/null +++ b/examples/aes192_example.c @@ -0,0 +1,39 @@ +/** + * \file + * \author Egor Tensin + * \date 2015 + * \copyright This file is licensed under the terms of the MIT License. + * See LICENSE.txt for details. + */ + +#include + +#include + +int main() +{ + __declspec(align(16)) AesBlock plain, cypher, decrypted; + __declspec(align(16)) AesBlock key_low, key_high; + + plain = make_aes_block(0xffeeddcc, 0xbbaa9988, 0x77665544, 0x33221100); + key_low = make_aes_block(0x0f0e0d0c, 0x0b0a0908, 0x07060504, 0x03020100); + key_high = make_aes_block( 0, 0, 0x17161514, 0x13121110); + + printf("Plain:\n"); + print_aes_block(plain); + + printf("\nKey low:\n"); + print_aes_block(key_low); + printf("\nKey high:\n"); + print_aes_block(key_high); + + printf("\nCypher:\n"); + cypher = aes192ecb_encrypt(plain, key_low, key_high); + print_aes_block(cypher); + + printf("\nDecrypted:\n"); + decrypted = aes192ecb_decrypt(cypher, key_low, key_high); + print_aes_block(decrypted); + + return 0; +} diff --git a/examples/aes256cbc_example.c b/examples/aes256cbc_example.c new file mode 100644 index 0000000..1fcc615 --- /dev/null +++ b/examples/aes256cbc_example.c @@ -0,0 +1,43 @@ +/** + * \file + * \author Egor Tensin + * \date 2015 + * \copyright This file is licensed under the terms of the MIT License. + * See LICENSE.txt for details. + */ + +#include + +#include + +int main() +{ + __declspec(align(16)) AesBlock plain, cypher, decrypted; + __declspec(align(16)) AesBlock key_low, key_high, iv; + + plain = make_aes_block(0xffeeddcc, 0xbbaa9988, 0x77665544, 0x33221100); + key_low = make_aes_block(0x0f0e0d0c, 0x0b0a0908, 0x07060504, 0x03020100); + key_high = make_aes_block(0x1f1e1d1c, 0x1b1a1918, 0x17161514, 0x13121110); + iv = make_aes_block(0xfedcba98, 0x76543210, 0xfedcba98, 0x76543210); + + printf("Plain:\n"); + print_aes_block(plain); + + printf("\nKey low:\n"); + print_aes_block(key_low); + printf("\nKey high:\n"); + print_aes_block(key_high); + + printf("\nInitialization vector:\n"); + print_aes_block(iv); + + printf("\nCypher:\n"); + cypher = aes256cbc_encrypt(plain, key_low, key_high, &iv); + print_aes_block(cypher); + + printf("\nDecrypted:\n"); + decrypted = aes256cbc_decrypt(cypher, key_low, key_high, &iv); + print_aes_block(decrypted); + + return 0; +} diff --git a/examples/aes256ecb_example.c b/examples/aes256ecb_example.c new file mode 100644 index 0000000..25ec61c --- /dev/null +++ b/examples/aes256ecb_example.c @@ -0,0 +1,39 @@ +/** + * \file + * \author Egor Tensin + * \date 2015 + * \copyright This file is licensed under the terms of the MIT License. + * See LICENSE.txt for details. + */ + +#include + +#include + +int main() +{ + __declspec(align(16)) AesBlock plain, cypher, decrypted; + __declspec(align(16)) AesBlock key_low, key_high; + + plain = make_aes_block(0xffeeddcc, 0xbbaa9988, 0x77665544, 0x33221100); + key_low = make_aes_block(0x0f0e0d0c, 0x0b0a0908, 0x07060504, 0x03020100); + key_high = make_aes_block(0x1f1e1d1c, 0x1b1a1918, 0x17161514, 0x13121110); + + printf("Plain:\n"); + print_aes_block(plain); + + printf("\nKey low:\n"); + print_aes_block(key_low); + printf("\nKey high:\n"); + print_aes_block(key_high); + + printf("\nCypher:\n"); + cypher = aes256ecb_encrypt(plain, key_low, key_high); + print_aes_block(cypher); + + printf("\nDecrypted:\n"); + decrypted = aes256ecb_decrypt(cypher, key_low, key_high); + print_aes_block(decrypted); + + return 0; +} diff --git a/include/aesni/all.h b/include/aesni/all.h new file mode 100644 index 0000000..f16d2a6 --- /dev/null +++ b/include/aesni/all.h @@ -0,0 +1,12 @@ +/** + * \file + * \author Egor Tensin + * \date 2015 + * \copyright This file is licensed under the terms of the MIT License. + * See LICENSE.txt for details. + */ + +#pragma once + +#include "data.h" +#include "raw.h" diff --git a/include/aesni/data.h b/include/aesni/data.h new file mode 100644 index 0000000..029d8c8 --- /dev/null +++ b/include/aesni/data.h @@ -0,0 +1,41 @@ +/** + * \file + * \author Egor Tensin + * \date 2015 + * \copyright This file is licensed under the terms of the MIT License. + * See LICENSE.txt for details. + */ + +#pragma once + +#include + +typedef __m128i AesBlock; + +AesBlock make_aes_block(int highest, int high, int low, int lowest); + +typedef AesBlock Aes128Key; + +typedef struct +{ + AesBlock hi; + AesBlock lo; +} +Aes192Key; + +typedef struct +{ + AesBlock hi; + AesBlock lo; +} +Aes256Key; + +typedef struct +{ + unsigned char bytes[4][4]; +} +AesState; + +AesState aes_block_to_state(AesBlock); + +void print_aes_block(AesBlock); diff --git a/include/aesni/raw.h b/include/aesni/raw.h new file mode 100644 index 0000000..03ce217 --- /dev/null +++ b/include/aesni/raw.h @@ -0,0 +1,47 @@ +/** + * \file + * \author Egor Tensin + * \date 2015 + * \copyright This file is licensed under the terms of the MIT License. + * See LICENSE.txt for details. + */ + +#pragma once + +#include "data.h" + +AesBlock __fastcall aes128ecb_encrypt( + AesBlock plain, + AesBlock key); +AesBlock __fastcall aes128ecb_decrypt( + AesBlock cypher, + AesBlock key); + +AesBlock __fastcall aes192ecb_encrypt( + AesBlock plain, + AesBlock key_lo, + AesBlock key_hi); +AesBlock __fastcall aes192ecb_decrypt( + AesBlock cypher, + AesBlock key_lo, + AesBlock key_hi); + +AesBlock __fastcall aes256ecb_encrypt( + AesBlock plain, + AesBlock key_lo, + AesBlock key_hi); +AesBlock __fastcall aes256ecb_decrypt( + AesBlock cypher, + AesBlock key_lo, + AesBlock key_hi); + +AesBlock __fastcall aes256cbc_encrypt( + AesBlock plain, + AesBlock key_lo, + AesBlock key_hi, + AesBlock *iv); +AesBlock __fastcall aes256cbc_decrypt( + AesBlock cypher, + AesBlock key_lo, + AesBlock key_hi, + AesBlock *iv); diff --git a/src/aes128.asm b/src/aes128.asm new file mode 100644 index 0000000..dfa7a7d --- /dev/null +++ b/src/aes128.asm @@ -0,0 +1,129 @@ +; Copyright 2015 Egor Tensin +; This file is licensed under the terms of the MIT License. +; See LICENSE.txt for details. + +.586 +.xmm +.model flat + +.data + +align 10h +key_schedule oword 11 dup(0) + +align 10h +inverted_key_schedule oword 11 dup(0) + +.code + +@aes128ecb_encrypt@32 proc + call expand_keys_128ecb + pxor xmm0, [key_schedule] + aesenc xmm0, [key_schedule + 10h] + aesenc xmm0, [key_schedule + 20h] + aesenc xmm0, [key_schedule + 30h] + aesenc xmm0, [key_schedule + 40h] + aesenc xmm0, [key_schedule + 50h] + aesenc xmm0, [key_schedule + 60h] + aesenc xmm0, [key_schedule + 70h] + aesenc xmm0, [key_schedule + 80h] + aesenc xmm0, [key_schedule + 90h] + aesenclast xmm0, [key_schedule + 0A0h] + ret +@aes128ecb_encrypt@32 endp + +@aes128ecb_decrypt@32 proc + call expand_keys_128ecb + pxor xmm0, [inverted_key_schedule] + aesdec xmm0, [inverted_key_schedule + 10h] + aesdec xmm0, [inverted_key_schedule + 20h] + aesdec xmm0, [inverted_key_schedule + 30h] + aesdec xmm0, [inverted_key_schedule + 40h] + aesdec xmm0, [inverted_key_schedule + 50h] + aesdec xmm0, [inverted_key_schedule + 60h] + aesdec xmm0, [inverted_key_schedule + 70h] + aesdec xmm0, [inverted_key_schedule + 80h] + aesdec xmm0, [inverted_key_schedule + 90h] + aesdeclast xmm0, [inverted_key_schedule + 0A0h] + ret +@aes128ecb_decrypt@32 endp + +expand_keys_128ecb proc + lea ecx, [key_schedule + 10h] + movdqa [key_schedule], xmm1 + + aeskeygenassist xmm7, xmm1, 01h + call gen_round_key + aeskeygenassist xmm7, xmm1, 02h + call gen_round_key + aeskeygenassist xmm7, xmm1, 04h + call gen_round_key + aeskeygenassist xmm7, xmm1, 08h + call gen_round_key + aeskeygenassist xmm7, xmm1, 10h + call gen_round_key + aeskeygenassist xmm7, xmm1, 20h + call gen_round_key + aeskeygenassist xmm7, xmm1, 40h + call gen_round_key + aeskeygenassist xmm7, xmm1, 80h + call gen_round_key + aeskeygenassist xmm7, xmm1, 1Bh + call gen_round_key + aeskeygenassist xmm7, xmm1, 36h + call gen_round_key + + call invert_key_schedule + ret + +gen_round_key: + movdqa xmm6, xmm1 ; xmm6 = key_schedule[i] + ; xmm6 = x3 x2 x1 x0 + + pslldq xmm6, 4 ; xmm6 = x2 x1 x0 0 + pxor xmm1, xmm6 ; xmm1 = (x3 x2) (x2 x1) (x1 x0) x0 + pslldq xmm6, 4 ; xmm6 = x1 x0 0 0 + pxor xmm1, xmm6 ; xmm1 = (x3 x2 x1) (x2 x1 x0) (x1 x0) x0 + pslldq xmm6, 4 ; xmm6 = x0 0 0 0 + pxor xmm1, xmm6 ; xmm1 = (x3 x2 x1 x0) (x2 x1 x0) (x1 x0) x0 + + pshufd xmm7, xmm7, 0FFh + pxor xmm1, xmm7 + + movdqa [ecx], xmm1 + add ecx, 10h + ret + +invert_key_schedule: + movdqa xmm7, [key_schedule] + movdqa xmm6, [key_schedule + 0A0h] + movdqa [inverted_key_schedule], xmm6 + movdqa [inverted_key_schedule + 0A0h], xmm7 + + aesimc xmm7, [key_schedule + 10h] + aesimc xmm6, [key_schedule + 90h] + movdqa [inverted_key_schedule + 10h], xmm6 + movdqa [inverted_key_schedule + 90h], xmm7 + + aesimc xmm7, [key_schedule + 20h] + aesimc xmm6, [key_schedule + 80h] + movdqa [inverted_key_schedule + 20h], xmm6 + movdqa [inverted_key_schedule + 80h], xmm7 + + aesimc xmm7, [key_schedule + 30h] + aesimc xmm6, [key_schedule + 70h] + movdqa [inverted_key_schedule + 30h], xmm6 + movdqa [inverted_key_schedule + 70h], xmm7 + + aesimc xmm7, [key_schedule + 40h] + aesimc xmm6, [key_schedule + 60h] + movdqa [inverted_key_schedule + 40h], xmm6 + movdqa [inverted_key_schedule + 60h], xmm7 + + aesimc xmm7, [key_schedule + 50h] + movdqa [inverted_key_schedule + 50h], xmm7 + + ret +expand_keys_128ecb endp + +end diff --git a/src/aes192.asm b/src/aes192.asm new file mode 100644 index 0000000..84d3a3a --- /dev/null +++ b/src/aes192.asm @@ -0,0 +1,196 @@ +; Copyright 2015 Egor Tensin +; This file is licensed under the terms of the MIT License. +; See LICENSE.txt for details. + +.586 +.xmm +.model flat + +.data + +align 10h +key_schedule oword 13 dup(0) + +align 10h +inverted_key_schedule oword 13 dup(0) + +.code + +@aes192ecb_encrypt@48 proc + call expand_keys_192ecb + pxor xmm0, [key_schedule] + aesenc xmm0, [key_schedule + 10h] + aesenc xmm0, [key_schedule + 20h] + aesenc xmm0, [key_schedule + 30h] + aesenc xmm0, [key_schedule + 40h] + aesenc xmm0, [key_schedule + 50h] + aesenc xmm0, [key_schedule + 60h] + aesenc xmm0, [key_schedule + 70h] + aesenc xmm0, [key_schedule + 80h] + aesenc xmm0, [key_schedule + 90h] + aesenc xmm0, [key_schedule + 0A0h] + aesenc xmm0, [key_schedule + 0B0h] + aesenclast xmm0, [key_schedule + 0C0h] + ret +@aes192ecb_encrypt@48 endp + +@aes192ecb_decrypt@48 proc + call expand_keys_192ecb + pxor xmm0, [inverted_key_schedule] + aesdec xmm0, [inverted_key_schedule + 10h] + aesdec xmm0, [inverted_key_schedule + 20h] + aesdec xmm0, [inverted_key_schedule + 30h] + aesdec xmm0, [inverted_key_schedule + 40h] + aesdec xmm0, [inverted_key_schedule + 50h] + aesdec xmm0, [inverted_key_schedule + 60h] + aesdec xmm0, [inverted_key_schedule + 70h] + aesdec xmm0, [inverted_key_schedule + 80h] + aesdec xmm0, [inverted_key_schedule + 90h] + aesdec xmm0, [inverted_key_schedule + 0A0h] + aesdec xmm0, [inverted_key_schedule + 0B0h] + aesdeclast xmm0, [inverted_key_schedule + 0C0h] + ret +@aes192ecb_decrypt@48 endp + +expand_keys_192ecb proc + ; key = k0 k1 k2 k3 k4 k5 + ; xmm1 = k0 k1 k2 k3 + ; xmm2 = 0 0 k5 k4 + + ; w[0] = k0 k1 k2 k3 + ; w[1] = k4 k5 - - + + ; i = 6 + ; while (i < 52): + ; temp = w[i - 1] + ; if (i % 6 == 0): + ; temp = SubWord(RotWord(w[i - 1])) * Rcon + ; w[i] = w[i - 6] * temp + ; i = i + 1 + + ; w[6] = SubWord(RotWord(w[5])) * Rcon * w[0] + ; w[7] = w[6] * w[1] + ; = SubWord(RotWord(w[5])) * Rcon * w[0] * w[1] + ; w[8] = w[7] * w[2] + ; = SubWord(RotWord(w[5])) * Rcon * w[0] * w[1] * w[2] + ; w[9] = w[8] * w[3] + ; = SubWord(RotWord(w[5])) * Rcon * w[0] * w[1] * w[2] * w[3] + ; w[10] = w[9] * w[4] + ; = SubWord(RotWord(w[5])) * Rcon * w[0] * w[1] * w[2] * w[3] * w[4] + ; w[11] = w[10] * w[5] + ; = SubWord(RotWord(w[5])) * Rcon * w[0] * w[1] * w[2] * w[3] * w[4] * w[5] + + movdqa [key_schedule], xmm1 + movdqa [key_schedule + 10h], xmm2 + + lea ecx, [key_schedule + 18h] + aeskeygenassist xmm7, xmm2, 1 + call gen_round_key + aeskeygenassist xmm7, xmm2, 2 + call gen_round_key + aeskeygenassist xmm7, xmm2, 4 + call gen_round_key + aeskeygenassist xmm7, xmm2, 8 + call gen_round_key + aeskeygenassist xmm7, xmm2, 10h + call gen_round_key + aeskeygenassist xmm7, xmm2, 20h + call gen_round_key + aeskeygenassist xmm7, xmm2, 40h + call gen_round_key + aeskeygenassist xmm7, xmm2, 80h + call gen_round_key + + call invert_key_schedule + ret + +gen_round_key: + ; xmm1 = x3 x2 x1 x0 + ; xmm2 = - - x5 x4 + ; xmm7 = RotWord(SubWord(-)) xor Rcon + ; SubWord(-) + ; RotWord(SubWord(x5)) xor Rcon + ; SubWord(x5) + movdqa xmm6, xmm1 + + pslldq xmm6, 4 ; xmm6 = x2 x1 x0 0 + pxor xmm1, xmm6 ; xmm1 = (x3 * x2) (x1 * x2) (x1 * x0) x0 + pslldq xmm6, 4 ; xmm6 = x1 x0 0 0 + pxor xmm1, xmm6 ; xmm1 = (x3 * x2 * x1) (x1 * x2 * x0) (x1 * x0) x0 + pslldq xmm6, 4 ; xmm6 = x0 0 0 0 + pxor xmm1, xmm6 ; xmm1 = (x3 * x2 * x1 * x0) (x1 * x2 * x0) (x1 * x0) x0 + + pshufd xmm7, xmm7, 55h ; xmm7 = RotWord(SubWord(x5)) * Rcon + ; RotWord(SubWord(x5)) * Rcon + ; RotWord(SubWord(x5)) * Rcon + ; RotWord(SubWord(x5)) * Rcon + + pxor xmm1, xmm7 ; xmm1 = RotWord(SubWord(x5)) * Rcon * x3 * x2 * x1 * x0 + ; RotWord(SubWord(x5)) * Rcon * x2 * x1 * x0 + ; RotWord(SubWord(x5)) * Rcon * x1 * x0 + ; RotWord(SubWord(x5)) * Rcon * x0 + + movq qword ptr [ecx], xmm1 + add ecx, 8 + + pshufd xmm7, xmm1, 0FFh ; xmm7 = - + ; - + ; RotWord(SubWord(x5)) * Rcon * x3 * x2 * x1 * x0 + ; RotWord(SubWord(x5)) * Rcon * x3 * x2 * x1 * x0 + pxor xmm7, xmm2 ; xmm7 = - + ; - + ; RotWord(SubWord(x5)) * Rcon * x5 * x3 * x2 * x1 * x0 + ; RotWord(SubWord(x5)) * Rcon * x4 * x3 * x2 * x1 * x0 + pslldq xmm2, 4 ; xmm2 = - k5 k4 0 + pxor xmm7, xmm2 ; xmm7 = - + ; - + ; RotWord(SubWord(x5)) * Rcon * x5 * x4 * x3 * x2 * x1 * x0 + ; RotWord(SubWord(x5)) * Rcon * x4 * x3 * x2 * x1 * x0 + + movq xmm2, xmm7 + pslldq xmm7, 8 + movdqa xmm6, xmm1 + psrldq xmm6, 8 + por xmm7, xmm6 + movdqu [ecx], xmm7 + add ecx, 10h + ret + +invert_key_schedule: + movdqa xmm7, [key_schedule] + movdqa xmm6, [key_schedule + 0C0h] + movdqa [inverted_key_schedule], xmm6 + movdqa [inverted_key_schedule + 0C0h], xmm7 + + aesimc xmm7, [key_schedule + 10h] + aesimc xmm6, [key_schedule + 0B0h] + movdqa [inverted_key_schedule + 10h], xmm6 + movdqa [inverted_key_schedule + 0B0h], xmm7 + + aesimc xmm7, [key_schedule + 20h] + aesimc xmm6, [key_schedule + 0A0h] + movdqa [inverted_key_schedule + 20h], xmm6 + movdqa [inverted_key_schedule + 0A0h], xmm7 + + aesimc xmm7, [key_schedule + 30h] + aesimc xmm6, [key_schedule + 90h] + movdqa [inverted_key_schedule + 30h], xmm6 + movdqa [inverted_key_schedule + 90h], xmm7 + + aesimc xmm7, [key_schedule + 40h] + aesimc xmm6, [key_schedule + 80h] + movdqa [inverted_key_schedule + 40h], xmm6 + movdqa [inverted_key_schedule + 80h], xmm7 + + aesimc xmm7, [key_schedule + 50h] + aesimc xmm6, [key_schedule + 70h] + movdqa [inverted_key_schedule + 50h], xmm6 + movdqa [inverted_key_schedule + 70h], xmm7 + + aesimc xmm7, [key_schedule + 60h] + movdqa [inverted_key_schedule + 60h], xmm7 + + ret +expand_keys_192ecb endp + +end diff --git a/src/aes256cbc.asm b/src/aes256cbc.asm new file mode 100644 index 0000000..3446d31 --- /dev/null +++ b/src/aes256cbc.asm @@ -0,0 +1,183 @@ +; Copyright 2015 Egor Tensin +; This file is licensed under the terms of the MIT License. +; See LICENSE.txt for details. + +.586 +.xmm +.model flat + +.data + +align 10h +key_schedule oword 15 dup(0) + +align 10h +inverse_key_schedule oword 15 dup(0) + +.code + +@aes256cbc_encrypt@52 proc + call expand_keys_256cbc + pxor xmm0, [ecx] + pxor xmm0, [key_schedule] + aesenc xmm0, [key_schedule + 10h] + aesenc xmm0, [key_schedule + 20h] + aesenc xmm0, [key_schedule + 30h] + aesenc xmm0, [key_schedule + 40h] + aesenc xmm0, [key_schedule + 50h] + aesenc xmm0, [key_schedule + 60h] + aesenc xmm0, [key_schedule + 70h] + aesenc xmm0, [key_schedule + 80h] + aesenc xmm0, [key_schedule + 90h] + aesenc xmm0, [key_schedule + 0A0h] + aesenc xmm0, [key_schedule + 0B0h] + aesenc xmm0, [key_schedule + 0C0h] + aesenc xmm0, [key_schedule + 0D0h] + aesenclast xmm0, [key_schedule + 0E0h] + ret +@aes256cbc_encrypt@52 endp + +expand_keys_256cbc proc + lea edx, [key_schedule + 20h] + movdqa [key_schedule], xmm1 + movdqa [key_schedule + 10h], xmm2 + + aeskeygenassist xmm7, xmm2, 1h + pshufd xmm7, xmm7, 0FFh + call gen_round_key + + aeskeygenassist xmm7, xmm2, 0 + pshufd xmm7, xmm7, 0AAh + call gen_round_key + + aeskeygenassist xmm7, xmm2, 2h + pshufd xmm7, xmm7, 0FFh + call gen_round_key + + aeskeygenassist xmm7, xmm2, 0 + pshufd xmm7, xmm7, 0AAh + call gen_round_key + + aeskeygenassist xmm7, xmm2, 4h + pshufd xmm7, xmm7, 0FFh + call gen_round_key + + aeskeygenassist xmm7, xmm2, 0 + pshufd xmm7, xmm7, 0AAh + call gen_round_key + + aeskeygenassist xmm7, xmm2, 8h + pshufd xmm7, xmm7, 0FFh + call gen_round_key + + aeskeygenassist xmm7, xmm2, 0 + pshufd xmm7, xmm7, 0AAh + call gen_round_key + + aeskeygenassist xmm7, xmm2, 10h + pshufd xmm7, xmm7, 0FFh + call gen_round_key + + aeskeygenassist xmm7, xmm2, 0 + pshufd xmm7, xmm7, 0AAh + call gen_round_key + + aeskeygenassist xmm7, xmm2, 20h + pshufd xmm7, xmm7, 0FFh + call gen_round_key + + aeskeygenassist xmm7, xmm2, 0 + pshufd xmm7, xmm7, 0AAh + call gen_round_key + + aeskeygenassist xmm7, xmm2, 40h + pshufd xmm7, xmm7, 0FFh + call gen_round_key + + call invert_key_schedule + ret + +gen_round_key: + movdqa xmm6, xmm1 + + pslldq xmm6, 4 + pxor xmm1, xmm6 + pslldq xmm6, 4 + pxor xmm1, xmm6 + pslldq xmm6, 4 + pxor xmm1, xmm6 + + pxor xmm1, xmm7 + + movdqa [edx], xmm1 + add edx, 10h + + movdqa xmm7, xmm1 + movdqa xmm1, xmm2 + movdqa xmm2, xmm7 + ret + +invert_key_schedule: + movdqa xmm7, [key_schedule] + movdqa xmm6, [key_schedule + 0E0h] + movdqa [inverse_key_schedule], xmm6 + movdqa [inverse_key_schedule + 0E0h], xmm7 + + aesimc xmm7, [key_schedule + 10h] + aesimc xmm6, [key_schedule + 0D0h] + movdqa [inverse_key_schedule + 10h], xmm6 + movdqa [inverse_key_schedule + 0D0h], xmm7 + + aesimc xmm7, [key_schedule + 20h] + aesimc xmm6, [key_schedule + 0C0h] + movdqa [inverse_key_schedule + 20h], xmm6 + movdqa [inverse_key_schedule + 0C0h], xmm7 + + aesimc xmm7, [key_schedule + 30h] + aesimc xmm6, [key_schedule + 0B0h] + movdqa [inverse_key_schedule + 30h], xmm6 + movdqa [inverse_key_schedule + 0B0h], xmm7 + + aesimc xmm7, [key_schedule + 40h] + aesimc xmm6, [key_schedule + 0A0h] + movdqa [inverse_key_schedule + 40h], xmm6 + movdqa [inverse_key_schedule + 0A0h], xmm7 + + aesimc xmm7, [key_schedule + 50h] + aesimc xmm6, [key_schedule + 90h] + movdqa [inverse_key_schedule + 50h], xmm6 + movdqa [inverse_key_schedule + 90h], xmm7 + + aesimc xmm7, [key_schedule + 60h] + aesimc xmm6, [key_schedule + 80h] + movdqa [inverse_key_schedule + 60h], xmm6 + movdqa [inverse_key_schedule + 80h], xmm7 + + aesimc xmm7, [key_schedule + 70h] + movdqa [inverse_key_schedule + 70h], xmm7 + + ret +expand_keys_256cbc endp + +@aes256cbc_decrypt@52 proc + call expand_keys_256cbc + pxor xmm0, [inverse_key_schedule] + aesdec xmm0, [inverse_key_schedule + 10h] + aesdec xmm0, [inverse_key_schedule + 20h] + aesdec xmm0, [inverse_key_schedule + 30h] + aesdec xmm0, [inverse_key_schedule + 40h] + aesdec xmm0, [inverse_key_schedule + 50h] + aesdec xmm0, [inverse_key_schedule + 60h] + aesdec xmm0, [inverse_key_schedule + 70h] + aesdec xmm0, [inverse_key_schedule + 80h] + aesdec xmm0, [inverse_key_schedule + 90h] + aesdec xmm0, [inverse_key_schedule + 0A0h] + aesdec xmm0, [inverse_key_schedule + 0B0h] + aesdec xmm0, [inverse_key_schedule + 0C0h] + aesdec xmm0, [inverse_key_schedule + 0D0h] + aesdeclast xmm0, [inverse_key_schedule + 0E0h] + pxor xmm0, [ecx] + ret +@aes256cbc_decrypt@52 endp + +end diff --git a/src/aes256ecb.asm b/src/aes256ecb.asm new file mode 100644 index 0000000..4246dee --- /dev/null +++ b/src/aes256ecb.asm @@ -0,0 +1,181 @@ +; Copyright 2015 Egor Tensin +; This file is licensed under the terms of the MIT License. +; See LICENSE.txt for details. + +.586 +.xmm +.model flat + +.data + +align 10h +key_schedule oword 15 dup(0) + +align 10h +inverse_key_schedule oword 15 dup(0) + +.code + +@aes256ecb_encrypt@48 proc + call expand_keys_256ecb + pxor xmm0, [key_schedule] + aesenc xmm0, [key_schedule + 10h] + aesenc xmm0, [key_schedule + 20h] + aesenc xmm0, [key_schedule + 30h] + aesenc xmm0, [key_schedule + 40h] + aesenc xmm0, [key_schedule + 50h] + aesenc xmm0, [key_schedule + 60h] + aesenc xmm0, [key_schedule + 70h] + aesenc xmm0, [key_schedule + 80h] + aesenc xmm0, [key_schedule + 90h] + aesenc xmm0, [key_schedule + 0A0h] + aesenc xmm0, [key_schedule + 0B0h] + aesenc xmm0, [key_schedule + 0C0h] + aesenc xmm0, [key_schedule + 0D0h] + aesenclast xmm0, [key_schedule + 0E0h] + ret +@aes256ecb_encrypt@48 endp + +expand_keys_256ecb proc + lea edx, [key_schedule + 20h] + movdqa [key_schedule], xmm1 + movdqa [key_schedule + 10h], xmm2 + + aeskeygenassist xmm7, xmm2, 1h + pshufd xmm7, xmm7, 0FFh + call gen_round_key + + aeskeygenassist xmm7, xmm2, 0 + pshufd xmm7, xmm7, 0AAh + call gen_round_key + + aeskeygenassist xmm7, xmm2, 2h + pshufd xmm7, xmm7, 0FFh + call gen_round_key + + aeskeygenassist xmm7, xmm2, 0 + pshufd xmm7, xmm7, 0AAh + call gen_round_key + + aeskeygenassist xmm7, xmm2, 4h + pshufd xmm7, xmm7, 0FFh + call gen_round_key + + aeskeygenassist xmm7, xmm2, 0 + pshufd xmm7, xmm7, 0AAh + call gen_round_key + + aeskeygenassist xmm7, xmm2, 8h + pshufd xmm7, xmm7, 0FFh + call gen_round_key + + aeskeygenassist xmm7, xmm2, 0 + pshufd xmm7, xmm7, 0AAh + call gen_round_key + + aeskeygenassist xmm7, xmm2, 10h + pshufd xmm7, xmm7, 0FFh + call gen_round_key + + aeskeygenassist xmm7, xmm2, 0 + pshufd xmm7, xmm7, 0AAh + call gen_round_key + + aeskeygenassist xmm7, xmm2, 20h + pshufd xmm7, xmm7, 0FFh + call gen_round_key + + aeskeygenassist xmm7, xmm2, 0 + pshufd xmm7, xmm7, 0AAh + call gen_round_key + + aeskeygenassist xmm7, xmm2, 40h + pshufd xmm7, xmm7, 0FFh + call gen_round_key + + call invert_key_schedule + ret + +gen_round_key: + movdqa xmm6, xmm1 + + pslldq xmm6, 4 + pxor xmm1, xmm6 + pslldq xmm6, 4 + pxor xmm1, xmm6 + pslldq xmm6, 4 + pxor xmm1, xmm6 + + pxor xmm1, xmm7 + + movdqa [edx], xmm1 + add edx, 10h + + movdqa xmm7, xmm1 + movdqa xmm1, xmm2 + movdqa xmm2, xmm7 + ret + +invert_key_schedule: + movdqa xmm7, [key_schedule ] + movdqa xmm6, [key_schedule + 0E0h] + movdqa [inverse_key_schedule ], xmm6 + movdqa [inverse_key_schedule + 0E0h], xmm7 + + aesimc xmm7, [key_schedule + 10h] + aesimc xmm6, [key_schedule + 0D0h] + movdqa [inverse_key_schedule + 10h], xmm6 + movdqa [inverse_key_schedule + 0D0h], xmm7 + + aesimc xmm7, [key_schedule + 20h] + aesimc xmm6, [key_schedule + 0C0h] + movdqa [inverse_key_schedule + 20h], xmm6 + movdqa [inverse_key_schedule + 0C0h], xmm7 + + aesimc xmm7, [key_schedule + 30h] + aesimc xmm6, [key_schedule + 0B0h] + movdqa [inverse_key_schedule + 30h], xmm6 + movdqa [inverse_key_schedule + 0B0h], xmm7 + + aesimc xmm7, [key_schedule + 40h] + aesimc xmm6, [key_schedule + 0A0h] + movdqa [inverse_key_schedule + 40h], xmm6 + movdqa [inverse_key_schedule + 0A0h], xmm7 + + aesimc xmm7, [key_schedule + 50h] + aesimc xmm6, [key_schedule + 90h] + movdqa [inverse_key_schedule + 50h], xmm6 + movdqa [inverse_key_schedule + 90h], xmm7 + + aesimc xmm7, [key_schedule + 60h] + aesimc xmm6, [key_schedule + 80h] + movdqa [inverse_key_schedule + 60h], xmm6 + movdqa [inverse_key_schedule + 80h], xmm7 + + aesimc xmm7, [key_schedule + 70h] + movdqa [inverse_key_schedule + 70h], xmm7 + + ret +expand_keys_256ecb endp + +@aes256ecb_decrypt@48 proc + call expand_keys_256ecb + pxor xmm0, [inverse_key_schedule] + aesdec xmm0, [inverse_key_schedule + 10h] + aesdec xmm0, [inverse_key_schedule + 20h] + aesdec xmm0, [inverse_key_schedule + 30h] + aesdec xmm0, [inverse_key_schedule + 40h] + aesdec xmm0, [inverse_key_schedule + 50h] + aesdec xmm0, [inverse_key_schedule + 60h] + aesdec xmm0, [inverse_key_schedule + 70h] + aesdec xmm0, [inverse_key_schedule + 80h] + aesdec xmm0, [inverse_key_schedule + 90h] + aesdec xmm0, [inverse_key_schedule + 0A0h] + aesdec xmm0, [inverse_key_schedule + 0B0h] + aesdec xmm0, [inverse_key_schedule + 0C0h] + aesdec xmm0, [inverse_key_schedule + 0D0h] + aesdeclast xmm0, [inverse_key_schedule + 0E0h] + ret +@aes256ecb_decrypt@48 endp + +end diff --git a/src/common.c b/src/common.c new file mode 100644 index 0000000..92f5e24 --- /dev/null +++ b/src/common.c @@ -0,0 +1,38 @@ +/** + * \file + * \author Egor Tensin + * \date 2015 + * \copyright This file is licensed under the terms of the MIT License. + * See LICENSE.txt for details. + */ + +#include "aesni/all.h" + +#include + +#include + +AesBlock make_aes_block(int highest, int high, int low, int lowest) +{ + return _mm_set_epi32(highest, high, low, lowest); +} + +AesState aes_block_to_state(AesBlock block) +{ + AesState state; + _mm_storeu_si128((__m128i*) &state.bytes, block); + return state; +} + +void print_aes_block(AesBlock block) +{ + int i, j; + AesState state = aes_block_to_state(block); + + for (i = 0; i < 4; ++i) + { + for (j = 0; j < 3; ++j) + printf("%02x ", state.bytes[j][i]); + printf("%02x\n", state.bytes[3][i]); + } +} -- cgit v1.2.3