diff options
-rw-r--r-- | CMakeLists.txt | 2 | ||||
-rw-r--r-- | src/aes128.c | 99 | ||||
-rw-r--r-- | src/aes192.c | 134 | ||||
-rw-r--r-- | src/aes256.c | 162 |
4 files changed, 396 insertions, 1 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 5be65df..61ca42c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,6 +1,6 @@ project(libaesni C ASM_MASM) file(GLOB_RECURSE ${PROJECT_NAME}_headers "include/*.h") -file(GLOB ${PROJECT_NAME}_sources "src/*.asm" "src/*.c") +file(GLOB ${PROJECT_NAME}_sources "src/*.c") add_library(${PROJECT_NAME} ${${PROJECT_NAME}_headers} ${${PROJECT_NAME}_sources}) target_include_directories(${PROJECT_NAME} PUBLIC include/) diff --git a/src/aes128.c b/src/aes128.c new file mode 100644 index 0000000..66c2b2e --- /dev/null +++ b/src/aes128.c @@ -0,0 +1,99 @@ +/** + * \file + * \author Egor Tensin <Egor.Tensin@gmail.com> + * \date 2015 + * \copyright This file is licensed under the terms of the MIT License. + * See LICENSE.txt for details. + */ + +#include <aesni/all.h> + +#include <emmintrin.h> +#include <wmmintrin.h> + +AesBlock128 __fastcall raw_aes128ecb_encrypt( + AesBlock128 plain, + Aes128KeySchedule* key_schedule) +{ + plain = _mm_xor_si128(plain, key_schedule->keys[0]); + plain = _mm_aesenc_si128(plain, key_schedule->keys[1]); + plain = _mm_aesenc_si128(plain, key_schedule->keys[2]); + plain = _mm_aesenc_si128(plain, key_schedule->keys[3]); + plain = _mm_aesenc_si128(plain, key_schedule->keys[4]); + plain = _mm_aesenc_si128(plain, key_schedule->keys[5]); + plain = _mm_aesenc_si128(plain, key_schedule->keys[6]); + plain = _mm_aesenc_si128(plain, key_schedule->keys[7]); + plain = _mm_aesenc_si128(plain, key_schedule->keys[8]); + plain = _mm_aesenc_si128(plain, key_schedule->keys[9]); + return _mm_aesenclast_si128(plain, key_schedule->keys[10]); +} + +AesBlock128 __fastcall raw_aes128ecb_decrypt( + AesBlock128 cipher, + Aes128KeySchedule* inverted_schedule) +{ + cipher = _mm_xor_si128(cipher, inverted_schedule->keys[0]); + cipher = _mm_aesdec_si128(cipher, inverted_schedule->keys[1]); + cipher = _mm_aesdec_si128(cipher, inverted_schedule->keys[2]); + cipher = _mm_aesdec_si128(cipher, inverted_schedule->keys[3]); + cipher = _mm_aesdec_si128(cipher, inverted_schedule->keys[4]); + cipher = _mm_aesdec_si128(cipher, inverted_schedule->keys[5]); + cipher = _mm_aesdec_si128(cipher, inverted_schedule->keys[6]); + cipher = _mm_aesdec_si128(cipher, inverted_schedule->keys[7]); + cipher = _mm_aesdec_si128(cipher, inverted_schedule->keys[8]); + cipher = _mm_aesdec_si128(cipher, inverted_schedule->keys[9]); + return _mm_aesdeclast_si128(cipher, inverted_schedule->keys[10]); +} + +static AesBlock128 __fastcall aes128_keygen_assist( + AesBlock128 prev, + AesBlock128 hwgen) +{ + AesBlock128 tmp = prev; + + tmp = _mm_slli_si128(tmp, 4); + prev = _mm_xor_si128(prev, tmp); + tmp = _mm_slli_si128(tmp, 4); + prev = _mm_xor_si128(prev, tmp); + tmp = _mm_slli_si128(tmp, 4); + prev = _mm_xor_si128(prev, tmp); + + hwgen = _mm_shuffle_epi32(hwgen, 0xff); + prev = _mm_xor_si128(prev, hwgen); + + return prev; +} + +void __fastcall raw_aes128_expand_key_schedule( + AesBlock128 key, + Aes128KeySchedule* key_schedule) +{ + AesBlock128 prev = key_schedule->keys[0] = key; + prev = key_schedule->keys[1] = aes128_keygen_assist(prev, _mm_aeskeygenassist_si128(prev, 0x01)); + prev = key_schedule->keys[2] = aes128_keygen_assist(prev, _mm_aeskeygenassist_si128(prev, 0x02)); + prev = key_schedule->keys[3] = aes128_keygen_assist(prev, _mm_aeskeygenassist_si128(prev, 0x04)); + prev = key_schedule->keys[4] = aes128_keygen_assist(prev, _mm_aeskeygenassist_si128(prev, 0x08)); + prev = key_schedule->keys[5] = aes128_keygen_assist(prev, _mm_aeskeygenassist_si128(prev, 0x10)); + prev = key_schedule->keys[6] = aes128_keygen_assist(prev, _mm_aeskeygenassist_si128(prev, 0x20)); + prev = key_schedule->keys[7] = aes128_keygen_assist(prev, _mm_aeskeygenassist_si128(prev, 0x40)); + prev = key_schedule->keys[8] = aes128_keygen_assist(prev, _mm_aeskeygenassist_si128(prev, 0x80)); + prev = key_schedule->keys[9] = aes128_keygen_assist(prev, _mm_aeskeygenassist_si128(prev, 0x1b)); + prev = key_schedule->keys[10] = aes128_keygen_assist(prev, _mm_aeskeygenassist_si128(prev, 0x36)); +} + +void __fastcall raw_aes128_invert_key_schedule( + Aes128KeySchedule* key_schedule, + Aes128KeySchedule* inverted_schedule) +{ + inverted_schedule->keys[0] = key_schedule->keys[10]; + inverted_schedule->keys[1] = _mm_aesimc_si128(key_schedule->keys[9]); + inverted_schedule->keys[2] = _mm_aesimc_si128(key_schedule->keys[8]); + inverted_schedule->keys[3] = _mm_aesimc_si128(key_schedule->keys[7]); + inverted_schedule->keys[4] = _mm_aesimc_si128(key_schedule->keys[6]); + inverted_schedule->keys[5] = _mm_aesimc_si128(key_schedule->keys[5]); + inverted_schedule->keys[6] = _mm_aesimc_si128(key_schedule->keys[4]); + inverted_schedule->keys[7] = _mm_aesimc_si128(key_schedule->keys[3]); + inverted_schedule->keys[8] = _mm_aesimc_si128(key_schedule->keys[2]); + inverted_schedule->keys[9] = _mm_aesimc_si128(key_schedule->keys[1]); + inverted_schedule->keys[10] = key_schedule->keys[0]; +} diff --git a/src/aes192.c b/src/aes192.c new file mode 100644 index 0000000..2d1d6e5 --- /dev/null +++ b/src/aes192.c @@ -0,0 +1,134 @@ +/** + * \file + * \author Egor Tensin <Egor.Tensin@gmail.com> + * \date 2015 + * \copyright This file is licensed under the terms of the MIT License. + * See LICENSE.txt for details. + */ + +#include <aesni/all.h> + +#include <emmintrin.h> +#include <wmmintrin.h> + +AesBlock128 __fastcall raw_aes192ecb_encrypt( + AesBlock128 plain, + Aes192KeySchedule* key_schedule) +{ + plain = _mm_xor_si128(plain, key_schedule->keys[0]); + plain = _mm_aesenc_si128(plain, key_schedule->keys[1]); + plain = _mm_aesenc_si128(plain, key_schedule->keys[2]); + plain = _mm_aesenc_si128(plain, key_schedule->keys[3]); + plain = _mm_aesenc_si128(plain, key_schedule->keys[4]); + plain = _mm_aesenc_si128(plain, key_schedule->keys[5]); + plain = _mm_aesenc_si128(plain, key_schedule->keys[6]); + plain = _mm_aesenc_si128(plain, key_schedule->keys[7]); + plain = _mm_aesenc_si128(plain, key_schedule->keys[8]); + plain = _mm_aesenc_si128(plain, key_schedule->keys[9]); + plain = _mm_aesenc_si128(plain, key_schedule->keys[10]); + plain = _mm_aesenc_si128(plain, key_schedule->keys[11]); + return _mm_aesenclast_si128(plain, key_schedule->keys[12]); +} + +AesBlock128 __fastcall raw_aes192ecb_decrypt( + AesBlock128 cipher, + Aes192KeySchedule* inverted_schedule) +{ + cipher = _mm_xor_si128(cipher, inverted_schedule->keys[0]); + cipher = _mm_aesdec_si128(cipher, inverted_schedule->keys[1]); + cipher = _mm_aesdec_si128(cipher, inverted_schedule->keys[2]); + cipher = _mm_aesdec_si128(cipher, inverted_schedule->keys[3]); + cipher = _mm_aesdec_si128(cipher, inverted_schedule->keys[4]); + cipher = _mm_aesdec_si128(cipher, inverted_schedule->keys[5]); + cipher = _mm_aesdec_si128(cipher, inverted_schedule->keys[6]); + cipher = _mm_aesdec_si128(cipher, inverted_schedule->keys[7]); + cipher = _mm_aesdec_si128(cipher, inverted_schedule->keys[8]); + cipher = _mm_aesdec_si128(cipher, inverted_schedule->keys[9]); + cipher = _mm_aesdec_si128(cipher, inverted_schedule->keys[10]); + cipher = _mm_aesdec_si128(cipher, inverted_schedule->keys[11]); + return _mm_aesdeclast_si128(cipher, inverted_schedule->keys[12]); +} + +static void __fastcall aes192_keygen_assist( + AesBlock128* prev_lo, + AesBlock128* prev_hi, + AesBlock128 hwgen) +{ + AesBlock128 tmp = *prev_lo; + + tmp = _mm_slli_si128(tmp, 4); + *prev_lo = _mm_xor_si128(*prev_lo, tmp); + tmp = _mm_slli_si128(tmp, 4); + *prev_lo = _mm_xor_si128(*prev_lo, tmp); + tmp = _mm_slli_si128(tmp, 4); + *prev_lo = _mm_xor_si128(*prev_lo, tmp); + + hwgen = _mm_shuffle_epi32(hwgen, 0x55); + *prev_lo = _mm_xor_si128(*prev_lo, hwgen); + + tmp = _mm_shuffle_epi32(*prev_hi, 0xf3); + *prev_hi = _mm_xor_si128(*prev_hi, tmp); + + tmp = _mm_shuffle_epi32(*prev_lo, 0xff); + tmp = _mm_srli_si128(tmp, 8); + *prev_hi = _mm_xor_si128(*prev_hi, tmp); +} + +void __fastcall raw_aes192_expand_key_schedule( + AesBlock128 key_lo, + AesBlock128 key_hi, + Aes192KeySchedule* key_schedule) +{ + key_schedule->keys[0] = key_lo; + key_schedule->keys[1] = key_hi; + + aes192_keygen_assist(&key_lo, &key_hi, _mm_aeskeygenassist_si128(key_hi, 0x01)); + key_schedule->keys[1] = _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(key_schedule->keys[1]), _mm_castsi128_pd(key_lo), 0)); + key_schedule->keys[2] = _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(key_lo), _mm_castsi128_pd(key_hi), 1)); + + aes192_keygen_assist(&key_lo, &key_hi, _mm_aeskeygenassist_si128(key_hi, 0x02)); + key_schedule->keys[3] = key_lo; + key_schedule->keys[4] = key_hi; + + aes192_keygen_assist(&key_lo, &key_hi, _mm_aeskeygenassist_si128(key_hi, 0x04)); + key_schedule->keys[4] = _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(key_schedule->keys[4]), _mm_castsi128_pd(key_lo), 0)); + key_schedule->keys[5] = _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(key_lo), _mm_castsi128_pd(key_hi), 1)); + + aes192_keygen_assist(&key_lo, &key_hi, _mm_aeskeygenassist_si128(key_hi, 0x08)); + key_schedule->keys[6] = key_lo; + key_schedule->keys[7] = key_hi; + + aes192_keygen_assist(&key_lo, &key_hi, _mm_aeskeygenassist_si128(key_hi, 0x10)); + key_schedule->keys[7] = _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(key_schedule->keys[7]), _mm_castsi128_pd(key_lo), 0)); + key_schedule->keys[8] = _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(key_lo), _mm_castsi128_pd(key_hi), 1)); + + aes192_keygen_assist(&key_lo, &key_hi, _mm_aeskeygenassist_si128(key_hi, 0x20)); + key_schedule->keys[9] = key_lo; + key_schedule->keys[10] = key_hi; + + aes192_keygen_assist(&key_lo, &key_hi, _mm_aeskeygenassist_si128(key_hi, 0x40)); + key_schedule->keys[10] = _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(key_schedule->keys[10]), _mm_castsi128_pd(key_lo), 0)); + key_schedule->keys[11] = _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(key_lo), _mm_castsi128_pd(key_hi), 1)); + + aes192_keygen_assist(&key_lo, &key_hi, _mm_aeskeygenassist_si128(key_hi, 0x80)); + key_schedule->keys[12] = key_lo; +} + +void __fastcall raw_aes192_invert_key_schedule( + Aes192KeySchedule* key_schedule, + Aes192KeySchedule* inverted_schedule) +{ + inverted_schedule->keys[0] = key_schedule->keys[12]; + inverted_schedule->keys[1] = _mm_aesimc_si128(key_schedule->keys[11]); + inverted_schedule->keys[2] = _mm_aesimc_si128(key_schedule->keys[10]); + inverted_schedule->keys[3] = _mm_aesimc_si128(key_schedule->keys[9]); + inverted_schedule->keys[4] = _mm_aesimc_si128(key_schedule->keys[8]); + inverted_schedule->keys[5] = _mm_aesimc_si128(key_schedule->keys[7]); + inverted_schedule->keys[6] = _mm_aesimc_si128(key_schedule->keys[6]); + inverted_schedule->keys[7] = _mm_aesimc_si128(key_schedule->keys[5]); + inverted_schedule->keys[8] = _mm_aesimc_si128(key_schedule->keys[4]); + inverted_schedule->keys[9] = _mm_aesimc_si128(key_schedule->keys[3]); + inverted_schedule->keys[10] = _mm_aesimc_si128(key_schedule->keys[2]); + inverted_schedule->keys[11] = _mm_aesimc_si128(key_schedule->keys[1]); + inverted_schedule->keys[12] = key_schedule->keys[0]; +} diff --git a/src/aes256.c b/src/aes256.c new file mode 100644 index 0000000..1faa092 --- /dev/null +++ b/src/aes256.c @@ -0,0 +1,162 @@ +/** + * \file + * \author Egor Tensin <Egor.Tensin@gmail.com> + * \date 2015 + * \copyright This file is licensed under the terms of the MIT License. + * See LICENSE.txt for details. + */ + +#include <aesni/all.h> + +#include <emmintrin.h> +#include <wmmintrin.h> + +AesBlock128 __fastcall raw_aes256ecb_encrypt( + AesBlock128 plain, + Aes256KeySchedule* key_schedule) +{ + plain = _mm_xor_si128(plain, key_schedule->keys[0]); + plain = _mm_aesenc_si128(plain, key_schedule->keys[1]); + plain = _mm_aesenc_si128(plain, key_schedule->keys[2]); + plain = _mm_aesenc_si128(plain, key_schedule->keys[3]); + plain = _mm_aesenc_si128(plain, key_schedule->keys[4]); + plain = _mm_aesenc_si128(plain, key_schedule->keys[5]); + plain = _mm_aesenc_si128(plain, key_schedule->keys[6]); + plain = _mm_aesenc_si128(plain, key_schedule->keys[7]); + plain = _mm_aesenc_si128(plain, key_schedule->keys[8]); + plain = _mm_aesenc_si128(plain, key_schedule->keys[9]); + plain = _mm_aesenc_si128(plain, key_schedule->keys[10]); + plain = _mm_aesenc_si128(plain, key_schedule->keys[11]); + plain = _mm_aesenc_si128(plain, key_schedule->keys[12]); + plain = _mm_aesenc_si128(plain, key_schedule->keys[13]); + return _mm_aesenclast_si128(plain, key_schedule->keys[14]); +} + +AesBlock128 __fastcall raw_aes256ecb_decrypt( + AesBlock128 cipher, + Aes256KeySchedule* inverted_schedule) +{ + cipher = _mm_xor_si128(cipher, inverted_schedule->keys[0]); + cipher = _mm_aesdec_si128(cipher, inverted_schedule->keys[1]); + cipher = _mm_aesdec_si128(cipher, inverted_schedule->keys[2]); + cipher = _mm_aesdec_si128(cipher, inverted_schedule->keys[3]); + cipher = _mm_aesdec_si128(cipher, inverted_schedule->keys[4]); + cipher = _mm_aesdec_si128(cipher, inverted_schedule->keys[5]); + cipher = _mm_aesdec_si128(cipher, inverted_schedule->keys[6]); + cipher = _mm_aesdec_si128(cipher, inverted_schedule->keys[7]); + cipher = _mm_aesdec_si128(cipher, inverted_schedule->keys[8]); + cipher = _mm_aesdec_si128(cipher, inverted_schedule->keys[9]); + cipher = _mm_aesdec_si128(cipher, inverted_schedule->keys[10]); + cipher = _mm_aesdec_si128(cipher, inverted_schedule->keys[11]); + cipher = _mm_aesdec_si128(cipher, inverted_schedule->keys[12]); + cipher = _mm_aesdec_si128(cipher, inverted_schedule->keys[13]); + return _mm_aesdeclast_si128(cipher, inverted_schedule->keys[14]); +} + +static AesBlock128 __fastcall aes256_keygen_assist( + AesBlock128* prev_lo, + AesBlock128* prev_hi, + AesBlock128 hwgen) +{ + AesBlock128 tmp = *prev_lo; + + tmp = _mm_slli_si128(tmp, 4); + *prev_lo = _mm_xor_si128(*prev_lo, tmp); + tmp = _mm_slli_si128(tmp, 4); + *prev_lo = _mm_xor_si128(*prev_lo, tmp); + tmp = _mm_slli_si128(tmp, 4); + *prev_lo = _mm_xor_si128(*prev_lo, tmp); + + *prev_lo = _mm_xor_si128(*prev_lo, hwgen); + + *prev_hi = _mm_xor_si128(*prev_hi, *prev_lo); + *prev_lo = _mm_xor_si128(*prev_lo, *prev_hi); + *prev_hi = _mm_xor_si128(*prev_hi, *prev_lo); + + return *prev_hi; +} + +void __fastcall raw_aes256_expand_key_schedule( + AesBlock128 key_lo, + AesBlock128 key_hi, + Aes256KeySchedule* key_schedule) +{ + AesBlock128 prev_lo, prev_hi; + AesBlock128 hwgen; + + prev_lo = key_schedule->keys[0] = key_lo; + prev_hi = key_schedule->keys[1] = key_hi; + + hwgen = _mm_aeskeygenassist_si128(prev_hi, 0x01); + hwgen = _mm_shuffle_epi32(hwgen, 0xff); + key_schedule->keys[2] = aes256_keygen_assist(&prev_lo, &prev_hi, hwgen); + + hwgen = _mm_aeskeygenassist_si128(prev_hi, 0); + hwgen = _mm_shuffle_epi32(hwgen, 0xaa); + key_schedule->keys[3] = aes256_keygen_assist(&prev_lo, &prev_hi, hwgen); + + hwgen = _mm_aeskeygenassist_si128(prev_hi, 0x02); + hwgen = _mm_shuffle_epi32(hwgen, 0xff); + key_schedule->keys[4] = aes256_keygen_assist(&prev_lo, &prev_hi, hwgen); + + hwgen = _mm_aeskeygenassist_si128(prev_hi, 0); + hwgen = _mm_shuffle_epi32(hwgen, 0xaa); + key_schedule->keys[5] = aes256_keygen_assist(&prev_lo, &prev_hi, hwgen); + + hwgen = _mm_aeskeygenassist_si128(prev_hi, 0x04); + hwgen = _mm_shuffle_epi32(hwgen, 0xff); + key_schedule->keys[6] = aes256_keygen_assist(&prev_lo, &prev_hi, hwgen); + + hwgen = _mm_aeskeygenassist_si128(prev_hi, 0); + hwgen = _mm_shuffle_epi32(hwgen, 0xaa); + key_schedule->keys[7] = aes256_keygen_assist(&prev_lo, &prev_hi, hwgen); + + hwgen = _mm_aeskeygenassist_si128(prev_hi, 0x08); + hwgen = _mm_shuffle_epi32(hwgen, 0xff); + key_schedule->keys[8] = aes256_keygen_assist(&prev_lo, &prev_hi, hwgen); + + hwgen = _mm_aeskeygenassist_si128(prev_hi, 0); + hwgen = _mm_shuffle_epi32(hwgen, 0xaa); + key_schedule->keys[9] = aes256_keygen_assist(&prev_lo, &prev_hi, hwgen); + + hwgen = _mm_aeskeygenassist_si128(prev_hi, 0x10); + hwgen = _mm_shuffle_epi32(hwgen, 0xff); + key_schedule->keys[10] = aes256_keygen_assist(&prev_lo, &prev_hi, hwgen); + + hwgen = _mm_aeskeygenassist_si128(prev_hi, 0); + hwgen = _mm_shuffle_epi32(hwgen, 0xaa); + key_schedule->keys[11] = aes256_keygen_assist(&prev_lo, &prev_hi, hwgen); + + hwgen = _mm_aeskeygenassist_si128(prev_hi, 0x20); + hwgen = _mm_shuffle_epi32(hwgen, 0xff); + key_schedule->keys[12] = aes256_keygen_assist(&prev_lo, &prev_hi, hwgen); + + hwgen = _mm_aeskeygenassist_si128(prev_hi, 0); + hwgen = _mm_shuffle_epi32(hwgen, 0xaa); + key_schedule->keys[13] = aes256_keygen_assist(&prev_lo, &prev_hi, hwgen); + + hwgen = _mm_aeskeygenassist_si128(prev_hi, 0x40); + hwgen = _mm_shuffle_epi32(hwgen, 0xff); + key_schedule->keys[14] = aes256_keygen_assist(&prev_lo, &prev_hi, hwgen); +} + +void __fastcall raw_aes256_invert_key_schedule( + Aes256KeySchedule* key_schedule, + Aes256KeySchedule* inverted_schedule) +{ + inverted_schedule->keys[0] = key_schedule->keys[14]; + inverted_schedule->keys[1] = _mm_aesimc_si128(key_schedule->keys[13]); + inverted_schedule->keys[2] = _mm_aesimc_si128(key_schedule->keys[12]); + inverted_schedule->keys[3] = _mm_aesimc_si128(key_schedule->keys[11]); + inverted_schedule->keys[4] = _mm_aesimc_si128(key_schedule->keys[10]); + inverted_schedule->keys[5] = _mm_aesimc_si128(key_schedule->keys[9]); + inverted_schedule->keys[6] = _mm_aesimc_si128(key_schedule->keys[8]); + inverted_schedule->keys[7] = _mm_aesimc_si128(key_schedule->keys[7]); + inverted_schedule->keys[8] = _mm_aesimc_si128(key_schedule->keys[6]); + inverted_schedule->keys[9] = _mm_aesimc_si128(key_schedule->keys[5]); + inverted_schedule->keys[10] = _mm_aesimc_si128(key_schedule->keys[4]); + inverted_schedule->keys[11] = _mm_aesimc_si128(key_schedule->keys[3]); + inverted_schedule->keys[12] = _mm_aesimc_si128(key_schedule->keys[2]); + inverted_schedule->keys[13] = _mm_aesimc_si128(key_schedule->keys[1]); + inverted_schedule->keys[14] = key_schedule->keys[0]; +} |