diff options
author | Egor Tensin <Egor.Tensin@gmail.com> | 2020-10-04 10:50:19 +0300 |
---|---|---|
committer | Egor Tensin <Egor.Tensin@gmail.com> | 2020-10-04 10:50:19 +0300 |
commit | e3fc06afd790b0fab3d31e6a81d75ccc0d7bb1e9 (patch) | |
tree | 4189a9241f7463a2bd51c16071a878f5319a18e3 | |
parent | initial commit (diff) | |
download | winapi-utf8-e3fc06afd790b0fab3d31e6a81d75ccc0d7bb1e9.tar.gz winapi-utf8-e3fc06afd790b0fab3d31e6a81d75ccc0d7bb1e9.zip |
add string conversion functions + some tests
-rw-r--r-- | .gitmodules | 3 | ||||
-rw-r--r-- | CMakeLists.txt | 17 | ||||
m--------- | cmake | 0 | ||||
-rw-r--r-- | include/winapi/utf8.hpp | 22 | ||||
-rw-r--r-- | src/convert.cpp | 101 | ||||
-rw-r--r-- | test/CMakeLists.txt | 10 | ||||
-rw-r--r-- | test/convert.cpp | 81 | ||||
-rw-r--r-- | test/main.cpp | 7 | ||||
-rw-r--r-- | test/string.cpp | 150 |
9 files changed, 391 insertions, 0 deletions
diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..d65ecb9 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "cmake"] + path = cmake + url = https://github.com/egor-tensin/cmake-common.git diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..d43a1aa --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,17 @@ +cmake_minimum_required(VERSION 3.5) # for Boost::* imported targets + +project(winapi_utf8 CXX) + +include(cmake/common.cmake) + +file(GLOB_RECURSE winapi_utf8_include "include/*.hpp") +file(GLOB winapi_utf8_src "src/*.cpp") +add_library(winapi_utf8 ${winapi_utf8_include} ${winapi_utf8_src}) +target_include_directories(winapi_utf8 PUBLIC include/) +install(TARGETS winapi_utf8 LIBRARY DESTINATION lib) + +if(WINAPI_UTF8_ENABLE_TESTS) + add_subdirectory(test) +endif() + +install(FILES LICENSE.txt DESTINATION share) diff --git a/cmake b/cmake new file mode 160000 +Subproject 6333ff77ab603d86146394fd4a7a6fdc09e6b8e diff --git a/include/winapi/utf8.hpp b/include/winapi/utf8.hpp new file mode 100644 index 0000000..1eb6963 --- /dev/null +++ b/include/winapi/utf8.hpp @@ -0,0 +1,22 @@ +// Copyright (c) 2020 Egor Tensin <Egor.Tensin@gmail.com> +// This file is part of the "winapi-utf8" project. +// For details, see https://github.com/egor-tensin/winapi-utf8. +// Distributed under the MIT License. + +#pragma once + +#include <cstddef> +#include <string> +#include <vector> + +namespace winapi { + +std::wstring widen(const std::string&); +std::wstring widen(const std::vector<unsigned char>&); +std::wstring widen(const void*, std::size_t nb); + +std::string narrow(const std::wstring&); +std::string narrow(const std::vector<unsigned char>&); +std::string narrow(const void*, std::size_t nb); + +} diff --git a/src/convert.cpp b/src/convert.cpp new file mode 100644 index 0000000..6f4f400 --- /dev/null +++ b/src/convert.cpp @@ -0,0 +1,101 @@ +// Copyright (c) 2020 Egor Tensin <Egor.Tensin@gmail.com> +// This file is part of the "winapi-utf8" project. +// For details, see https://github.com/egor-tensin/winapi-utf8. +// Distributed under the MIT License. + +#include <winapi/utf8.hpp> + +#include <windows.h> + +#include <cstddef> +#include <sstream> +#include <stdexcept> +#include <string> +#include <vector> + +namespace winapi { +namespace { + +std::runtime_error error(const char* function, DWORD code) { + std::ostringstream oss; + oss << function << " failed with error code " << code; + return std::runtime_error{oss.str()}; +} + +} + +std::wstring widen(const std::string& src) { + return widen(src.c_str(), src.size()); +} + +std::wstring widen(const std::vector<unsigned char>& src) { + return widen(src.data(), src.size()); +} + +std::wstring widen(const void* src, std::size_t in_nb) { + const DWORD flags = MB_ERR_INVALID_CHARS + | MB_PRECOMPOSED; + + const char* in_data = reinterpret_cast<const char*>(src); + + auto out_nch = ::MultiByteToWideChar(CP_UTF8, flags, in_data, in_nb, NULL, 0); + + if (out_nch == 0) { + throw error("MultiByteToWideChar", GetLastError()); + } + + static_assert(sizeof(wchar_t) == sizeof(WCHAR), "wchar_t != WCHAR"); + std::vector<wchar_t> out; + out.resize(out_nch); + + out_nch = ::MultiByteToWideChar(CP_UTF8, flags, in_data, in_nb, out.data(), out.size()); + + if (out_nch == 0) { + throw error("MultiByteToWideChar", GetLastError()); + } + + return {out.data(), out.size()}; +} + +std::string narrow(const std::wstring& src) { + static_assert(sizeof(std::wstring::value_type) == sizeof(WCHAR), "wchar_t != WCHAR"); + return narrow(src.c_str(), src.size() * sizeof(std::wstring::value_type)); +} + +std::string narrow(const std::vector<unsigned char>& src) { + return narrow(src.data(), src.size()); +} + +std::string narrow(const void* src, std::size_t in_nb) { + if (in_nb % sizeof(WCHAR) != 0) { + std::ostringstream err_msg; + err_msg << "narrow: invalid buffer size: " << in_nb; + throw std::runtime_error{err_msg.str()}; + } + + const std::size_t in_nch = in_nb / sizeof(WCHAR); + + const DWORD flags = WC_ERR_INVALID_CHARS + | WC_NO_BEST_FIT_CHARS; + + const wchar_t* in_data = reinterpret_cast<const wchar_t*>(src); + + auto out_nb = ::WideCharToMultiByte(CP_UTF8, flags, in_data, in_nch, NULL, 0, NULL, NULL); + + if (out_nb == 0) { + throw error("WideCharToMultiByte", GetLastError()); + } + + std::vector<char> out; + out.resize(out_nb); + + out_nb = ::WideCharToMultiByte(CP_UTF8, flags, in_data, in_nch, out.data(), out.size(), NULL, NULL); + + if (out_nb == 0) { + throw error("WideCharToMultiByte", GetLastError()); + } + + return {out.data(), out.size()}; +} + +} diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt new file mode 100644 index 0000000..8fc1dee --- /dev/null +++ b/test/CMakeLists.txt @@ -0,0 +1,10 @@ +find_package(Boost REQUIRED COMPONENTS unit_test_framework) + +add_executable(unit_tests main.cpp convert.cpp string.cpp) +target_link_libraries(unit_tests PRIVATE winapi_utf8) +target_link_libraries(unit_tests PRIVATE Boost::disable_autolinking Boost::unit_test_framework) + +install(TARGETS unit_tests RUNTIME DESTINATION bin/test) +if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") + install(FILES "$<TARGET_PDB_FILE:unit_tests>" DESTINATION bin/test OPTIONAL) +endif() diff --git a/test/convert.cpp b/test/convert.cpp new file mode 100644 index 0000000..abd7173 --- /dev/null +++ b/test/convert.cpp @@ -0,0 +1,81 @@ +// Copyright (c) 2020 Egor Tensin <Egor.Tensin@gmail.com> +// This file is part of the "winapi-utf8" project. +// For details, see https://github.com/egor-tensin/winapi-utf8. +// Distributed under the MIT License. + +#include <winapi/utf8.hpp> + +#include <boost/format.hpp> +#include <boost/test/data/monomorphic.hpp> +#include <boost/test/data/test_case.hpp> +#include <boost/test/unit_test.hpp> + +#include <ostream> +#include <string> +#include <vector> + +namespace std { + +ostream& operator<<(ostream& os, unsigned char c) { + return os << boost::format("%|1$02x|") % static_cast<unsigned int>(c); +} + +ostream& operator<<(ostream& os, const vector<unsigned char>& cs) { + for (auto c : cs) { + os << c; + } + return os; +} + +} + +namespace { + +std::vector<unsigned char> from(std::initializer_list<unsigned char> xs) { + return {xs}; +} + +template <typename CharT> +std::vector<unsigned char> from(const std::basic_string<CharT>& s) { + const auto buf = reinterpret_cast<const unsigned char*>(s.c_str()); + const auto size = s.size() * sizeof(CharT); + return {buf, buf + size}; +} + +std::vector<std::vector<unsigned char>> utf16 = { + // Hello + from({0x48, 0x00, 0x65, 0x00, 0x6c, 0x00, 0x6c, 0x00, 0x6f, 0x00}), + // Привет + from({0x1f, 0x04, 0x40, 0x04, 0x38, 0x04, 0x32, 0x04, 0x35, 0x04, 0x42, 0x04}), +}; + +std::vector<std::vector<unsigned char>> utf8 = { + // Hello + from({0x48, 0x65, 0x6c, 0x6c, 0x6f}), + // Привет + from({0xd0, 0x9f, 0xd1, 0x80, 0xd0, 0xb8, 0xd0, 0xb2, 0xd0, 0xb5, 0xd1, 0x82}), +}; + +} + +BOOST_TEST_SPECIALIZED_COLLECTION_COMPARE(std::vector<unsigned char>); + +BOOST_AUTO_TEST_SUITE(convert_tests) + +BOOST_DATA_TEST_CASE(test_narrow, + boost::unit_test::data::make(utf16) ^ utf8, + input, + expected) { + auto actual = from(winapi::narrow(input)); + BOOST_TEST(actual == expected, "Expected: " << expected << ", actual: " << actual); +} + +BOOST_DATA_TEST_CASE(test_widen, + boost::unit_test::data::make(utf8) ^ utf16, + input, + expected) { + auto actual = from(winapi::widen(input)); + BOOST_TEST(actual == expected, "Expected: " << expected << ", actual: " << actual); +} + +BOOST_AUTO_TEST_SUITE_END() diff --git a/test/main.cpp b/test/main.cpp new file mode 100644 index 0000000..d32f6f4 --- /dev/null +++ b/test/main.cpp @@ -0,0 +1,7 @@ +// Copyright (c) 2020 Egor Tensin <Egor.Tensin@gmail.com> +// This file is part of the "winapi-utf8" project. +// For details, see https://github.com/egor-tensin/winapi-utf8. +// Distributed under the MIT License. + +#define BOOST_TEST_MODULE winapi_utf8 tests +#include <boost/test/unit_test.hpp> diff --git a/test/string.cpp b/test/string.cpp new file mode 100644 index 0000000..d5e7b3e --- /dev/null +++ b/test/string.cpp @@ -0,0 +1,150 @@ +// Copyright (c) 2020 Egor Tensin <Egor.Tensin@gmail.com> +// This file is part of the "winapi-utf8" project. +// For details, see https://github.com/egor-tensin/winapi-utf8. +// Distributed under the MIT License. + +#include <boost/test/unit_test.hpp> + +#include <cstddef> +#include <cstring> +#include <string> +#include <vector> + +namespace { + +std::string convert(const char* src, std::size_t nch) { + return std::string{src, nch}; +} + +std::string convert(const char* src) { + return std::string{src}; +} + +std::wstring convert(const wchar_t* src, std::size_t nch) { + return std::wstring{src, nch}; +} + +std::wstring convert(const wchar_t* src) { + return std::wstring{src}; +} + +} + +BOOST_AUTO_TEST_SUITE(string_tests) + +BOOST_AUTO_TEST_CASE(length) { + { + std::string empty; + BOOST_TEST(empty.size() == 0); + BOOST_TEST(empty.length() == 0); + } + { + std::wstring empty; + BOOST_TEST(empty.size() == 0); + BOOST_TEST(empty.length() == 0); + } + { + std::string s = "asdf"; + BOOST_TEST(s.size() == 4); + BOOST_TEST(s.length() == 4); + } + { + std::wstring s = L"asdf"; + BOOST_TEST(s.size() == 4); + BOOST_TEST(s.length() == 4); + } + + { + std::string s = "as\0df"; + BOOST_TEST(s.size() == 2); + BOOST_TEST(s.length() == 2); + } + { + std::string s = "\0asdf"; + BOOST_TEST(s.size() == 0); + BOOST_TEST(s.length() == 0); + } +} + +BOOST_AUTO_TEST_CASE(null_terminated_narrow) { + const std::vector<char> src{'a', 'b', 'c', '\0', '1', '2', '3'}; + BOOST_TEST(src.size() == 7); + + { + const auto converted = convert(src.data(), 7); + BOOST_TEST(converted.size() == 7); + BOOST_TEST(std::memcmp(converted.c_str(), src.data(), 7) == 0); + BOOST_TEST(converted.c_str()[3] == '\0'); + BOOST_TEST(converted.c_str()[7] == '\0'); + } + { + const auto converted = convert(src.data(), 4); + BOOST_TEST(converted.size() == 4); + BOOST_TEST(std::memcmp(converted.c_str(), src.data(), 4) == 0); + BOOST_TEST(converted.c_str()[3] == '\0'); + BOOST_TEST(converted.c_str()[4] == '\0'); + } + { + const auto converted = convert(src.data(), 3); + BOOST_TEST(converted.size() == 3); + BOOST_TEST(std::memcmp(converted.c_str(), src.data(), 3) == 0); + BOOST_TEST(converted.c_str()[3] == '\0'); + } + { + const auto converted = convert(src.data()); + BOOST_TEST(converted.size() == 3); + BOOST_TEST(converted == "abc"); + } + { + const auto converted = convert(src.data() + 2); + BOOST_TEST(converted.size() == 1); + BOOST_TEST(converted == "c"); + } +} + +BOOST_AUTO_TEST_CASE(null_terminated_wide) { + const std::vector<wchar_t> src{L'\0', L'a', L'b', L'c', L'\0', L'1', L'2', L'3'}; + BOOST_TEST(src.size() == 8); + + { + const auto converted = convert(src.data(), 8); + BOOST_TEST(converted.size() == 8); + BOOST_TEST(std::memcmp(converted.c_str(), src.data(), 8 * sizeof(wchar_t)) == 0); + BOOST_TEST(converted.c_str()[0] == L'\0'); + BOOST_TEST(converted.c_str()[4] == L'\0'); + BOOST_TEST(converted.c_str()[8] == L'\0'); + } + { + const auto converted = convert(src.data(), 5); + BOOST_TEST(converted.size() == 5); + BOOST_TEST(std::memcmp(converted.c_str(), src.data(), 5 * sizeof(wchar_t)) == 0); + BOOST_TEST(converted.c_str()[0] == L'\0'); + BOOST_TEST(converted.c_str()[4] == L'\0'); + BOOST_TEST(converted.c_str()[5] == L'\0'); + } + { + const auto converted = convert(src.data(), 4); + BOOST_TEST(converted.size() == 4); + BOOST_TEST(std::memcmp(converted.c_str(), src.data(), 4 * sizeof(wchar_t)) == 0); + BOOST_TEST(converted.c_str()[0] == L'\0'); + BOOST_TEST(converted.c_str()[4] == L'\0'); + } + { + const auto converted = convert(src.data()); + BOOST_TEST(converted.size() == 0); + } + { + const auto converted = convert(src.data() + 1); + BOOST_TEST(converted.size() == 3); + BOOST_TEST(converted[0] == L'a'); + BOOST_TEST(converted[1] == L'b'); + BOOST_TEST(converted[2] == L'c'); + } + { + const auto converted = convert(src.data() + 3); + BOOST_TEST(converted.size() == 1); + BOOST_TEST(converted[0] == L'c'); + } +} + +BOOST_AUTO_TEST_SUITE_END() |