aboutsummaryrefslogtreecommitdiffstatshomepage
diff options
context:
space:
mode:
authorEgor Tensin <Egor.Tensin@gmail.com>2020-10-04 10:50:19 +0300
committerEgor Tensin <Egor.Tensin@gmail.com>2020-10-04 10:50:19 +0300
commite3fc06afd790b0fab3d31e6a81d75ccc0d7bb1e9 (patch)
tree4189a9241f7463a2bd51c16071a878f5319a18e3
parentinitial commit (diff)
downloadwinapi-utf8-e3fc06afd790b0fab3d31e6a81d75ccc0d7bb1e9.tar.gz
winapi-utf8-e3fc06afd790b0fab3d31e6a81d75ccc0d7bb1e9.zip
add string conversion functions + some tests
-rw-r--r--.gitmodules3
-rw-r--r--CMakeLists.txt17
m---------cmake0
-rw-r--r--include/winapi/utf8.hpp22
-rw-r--r--src/convert.cpp101
-rw-r--r--test/CMakeLists.txt10
-rw-r--r--test/convert.cpp81
-rw-r--r--test/main.cpp7
-rw-r--r--test/string.cpp150
9 files changed, 391 insertions, 0 deletions
diff --git a/.gitmodules b/.gitmodules
new file mode 100644
index 0000000..d65ecb9
--- /dev/null
+++ b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "cmake"]
+ path = cmake
+ url = https://github.com/egor-tensin/cmake-common.git
diff --git a/CMakeLists.txt b/CMakeLists.txt
new file mode 100644
index 0000000..d43a1aa
--- /dev/null
+++ b/CMakeLists.txt
@@ -0,0 +1,17 @@
+cmake_minimum_required(VERSION 3.5) # for Boost::* imported targets
+
+project(winapi_utf8 CXX)
+
+include(cmake/common.cmake)
+
+file(GLOB_RECURSE winapi_utf8_include "include/*.hpp")
+file(GLOB winapi_utf8_src "src/*.cpp")
+add_library(winapi_utf8 ${winapi_utf8_include} ${winapi_utf8_src})
+target_include_directories(winapi_utf8 PUBLIC include/)
+install(TARGETS winapi_utf8 LIBRARY DESTINATION lib)
+
+if(WINAPI_UTF8_ENABLE_TESTS)
+ add_subdirectory(test)
+endif()
+
+install(FILES LICENSE.txt DESTINATION share)
diff --git a/cmake b/cmake
new file mode 160000
+Subproject 6333ff77ab603d86146394fd4a7a6fdc09e6b8e
diff --git a/include/winapi/utf8.hpp b/include/winapi/utf8.hpp
new file mode 100644
index 0000000..1eb6963
--- /dev/null
+++ b/include/winapi/utf8.hpp
@@ -0,0 +1,22 @@
+// Copyright (c) 2020 Egor Tensin <Egor.Tensin@gmail.com>
+// This file is part of the "winapi-utf8" project.
+// For details, see https://github.com/egor-tensin/winapi-utf8.
+// Distributed under the MIT License.
+
+#pragma once
+
+#include <cstddef>
+#include <string>
+#include <vector>
+
+namespace winapi {
+
+std::wstring widen(const std::string&);
+std::wstring widen(const std::vector<unsigned char>&);
+std::wstring widen(const void*, std::size_t nb);
+
+std::string narrow(const std::wstring&);
+std::string narrow(const std::vector<unsigned char>&);
+std::string narrow(const void*, std::size_t nb);
+
+}
diff --git a/src/convert.cpp b/src/convert.cpp
new file mode 100644
index 0000000..6f4f400
--- /dev/null
+++ b/src/convert.cpp
@@ -0,0 +1,101 @@
+// Copyright (c) 2020 Egor Tensin <Egor.Tensin@gmail.com>
+// This file is part of the "winapi-utf8" project.
+// For details, see https://github.com/egor-tensin/winapi-utf8.
+// Distributed under the MIT License.
+
+#include <winapi/utf8.hpp>
+
+#include <windows.h>
+
+#include <cstddef>
+#include <sstream>
+#include <stdexcept>
+#include <string>
+#include <vector>
+
+namespace winapi {
+namespace {
+
+std::runtime_error error(const char* function, DWORD code) {
+ std::ostringstream oss;
+ oss << function << " failed with error code " << code;
+ return std::runtime_error{oss.str()};
+}
+
+}
+
+std::wstring widen(const std::string& src) {
+ return widen(src.c_str(), src.size());
+}
+
+std::wstring widen(const std::vector<unsigned char>& src) {
+ return widen(src.data(), src.size());
+}
+
+std::wstring widen(const void* src, std::size_t in_nb) {
+ const DWORD flags = MB_ERR_INVALID_CHARS
+ | MB_PRECOMPOSED;
+
+ const char* in_data = reinterpret_cast<const char*>(src);
+
+ auto out_nch = ::MultiByteToWideChar(CP_UTF8, flags, in_data, in_nb, NULL, 0);
+
+ if (out_nch == 0) {
+ throw error("MultiByteToWideChar", GetLastError());
+ }
+
+ static_assert(sizeof(wchar_t) == sizeof(WCHAR), "wchar_t != WCHAR");
+ std::vector<wchar_t> out;
+ out.resize(out_nch);
+
+ out_nch = ::MultiByteToWideChar(CP_UTF8, flags, in_data, in_nb, out.data(), out.size());
+
+ if (out_nch == 0) {
+ throw error("MultiByteToWideChar", GetLastError());
+ }
+
+ return {out.data(), out.size()};
+}
+
+std::string narrow(const std::wstring& src) {
+ static_assert(sizeof(std::wstring::value_type) == sizeof(WCHAR), "wchar_t != WCHAR");
+ return narrow(src.c_str(), src.size() * sizeof(std::wstring::value_type));
+}
+
+std::string narrow(const std::vector<unsigned char>& src) {
+ return narrow(src.data(), src.size());
+}
+
+std::string narrow(const void* src, std::size_t in_nb) {
+ if (in_nb % sizeof(WCHAR) != 0) {
+ std::ostringstream err_msg;
+ err_msg << "narrow: invalid buffer size: " << in_nb;
+ throw std::runtime_error{err_msg.str()};
+ }
+
+ const std::size_t in_nch = in_nb / sizeof(WCHAR);
+
+ const DWORD flags = WC_ERR_INVALID_CHARS
+ | WC_NO_BEST_FIT_CHARS;
+
+ const wchar_t* in_data = reinterpret_cast<const wchar_t*>(src);
+
+ auto out_nb = ::WideCharToMultiByte(CP_UTF8, flags, in_data, in_nch, NULL, 0, NULL, NULL);
+
+ if (out_nb == 0) {
+ throw error("WideCharToMultiByte", GetLastError());
+ }
+
+ std::vector<char> out;
+ out.resize(out_nb);
+
+ out_nb = ::WideCharToMultiByte(CP_UTF8, flags, in_data, in_nch, out.data(), out.size(), NULL, NULL);
+
+ if (out_nb == 0) {
+ throw error("WideCharToMultiByte", GetLastError());
+ }
+
+ return {out.data(), out.size()};
+}
+
+}
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
new file mode 100644
index 0000000..8fc1dee
--- /dev/null
+++ b/test/CMakeLists.txt
@@ -0,0 +1,10 @@
+find_package(Boost REQUIRED COMPONENTS unit_test_framework)
+
+add_executable(unit_tests main.cpp convert.cpp string.cpp)
+target_link_libraries(unit_tests PRIVATE winapi_utf8)
+target_link_libraries(unit_tests PRIVATE Boost::disable_autolinking Boost::unit_test_framework)
+
+install(TARGETS unit_tests RUNTIME DESTINATION bin/test)
+if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
+ install(FILES "$<TARGET_PDB_FILE:unit_tests>" DESTINATION bin/test OPTIONAL)
+endif()
diff --git a/test/convert.cpp b/test/convert.cpp
new file mode 100644
index 0000000..abd7173
--- /dev/null
+++ b/test/convert.cpp
@@ -0,0 +1,81 @@
+// Copyright (c) 2020 Egor Tensin <Egor.Tensin@gmail.com>
+// This file is part of the "winapi-utf8" project.
+// For details, see https://github.com/egor-tensin/winapi-utf8.
+// Distributed under the MIT License.
+
+#include <winapi/utf8.hpp>
+
+#include <boost/format.hpp>
+#include <boost/test/data/monomorphic.hpp>
+#include <boost/test/data/test_case.hpp>
+#include <boost/test/unit_test.hpp>
+
+#include <ostream>
+#include <string>
+#include <vector>
+
+namespace std {
+
+ostream& operator<<(ostream& os, unsigned char c) {
+ return os << boost::format("%|1$02x|") % static_cast<unsigned int>(c);
+}
+
+ostream& operator<<(ostream& os, const vector<unsigned char>& cs) {
+ for (auto c : cs) {
+ os << c;
+ }
+ return os;
+}
+
+}
+
+namespace {
+
+std::vector<unsigned char> from(std::initializer_list<unsigned char> xs) {
+ return {xs};
+}
+
+template <typename CharT>
+std::vector<unsigned char> from(const std::basic_string<CharT>& s) {
+ const auto buf = reinterpret_cast<const unsigned char*>(s.c_str());
+ const auto size = s.size() * sizeof(CharT);
+ return {buf, buf + size};
+}
+
+std::vector<std::vector<unsigned char>> utf16 = {
+ // Hello
+ from({0x48, 0x00, 0x65, 0x00, 0x6c, 0x00, 0x6c, 0x00, 0x6f, 0x00}),
+ // Привет
+ from({0x1f, 0x04, 0x40, 0x04, 0x38, 0x04, 0x32, 0x04, 0x35, 0x04, 0x42, 0x04}),
+};
+
+std::vector<std::vector<unsigned char>> utf8 = {
+ // Hello
+ from({0x48, 0x65, 0x6c, 0x6c, 0x6f}),
+ // Привет
+ from({0xd0, 0x9f, 0xd1, 0x80, 0xd0, 0xb8, 0xd0, 0xb2, 0xd0, 0xb5, 0xd1, 0x82}),
+};
+
+}
+
+BOOST_TEST_SPECIALIZED_COLLECTION_COMPARE(std::vector<unsigned char>);
+
+BOOST_AUTO_TEST_SUITE(convert_tests)
+
+BOOST_DATA_TEST_CASE(test_narrow,
+ boost::unit_test::data::make(utf16) ^ utf8,
+ input,
+ expected) {
+ auto actual = from(winapi::narrow(input));
+ BOOST_TEST(actual == expected, "Expected: " << expected << ", actual: " << actual);
+}
+
+BOOST_DATA_TEST_CASE(test_widen,
+ boost::unit_test::data::make(utf8) ^ utf16,
+ input,
+ expected) {
+ auto actual = from(winapi::widen(input));
+ BOOST_TEST(actual == expected, "Expected: " << expected << ", actual: " << actual);
+}
+
+BOOST_AUTO_TEST_SUITE_END()
diff --git a/test/main.cpp b/test/main.cpp
new file mode 100644
index 0000000..d32f6f4
--- /dev/null
+++ b/test/main.cpp
@@ -0,0 +1,7 @@
+// Copyright (c) 2020 Egor Tensin <Egor.Tensin@gmail.com>
+// This file is part of the "winapi-utf8" project.
+// For details, see https://github.com/egor-tensin/winapi-utf8.
+// Distributed under the MIT License.
+
+#define BOOST_TEST_MODULE winapi_utf8 tests
+#include <boost/test/unit_test.hpp>
diff --git a/test/string.cpp b/test/string.cpp
new file mode 100644
index 0000000..d5e7b3e
--- /dev/null
+++ b/test/string.cpp
@@ -0,0 +1,150 @@
+// Copyright (c) 2020 Egor Tensin <Egor.Tensin@gmail.com>
+// This file is part of the "winapi-utf8" project.
+// For details, see https://github.com/egor-tensin/winapi-utf8.
+// Distributed under the MIT License.
+
+#include <boost/test/unit_test.hpp>
+
+#include <cstddef>
+#include <cstring>
+#include <string>
+#include <vector>
+
+namespace {
+
+std::string convert(const char* src, std::size_t nch) {
+ return std::string{src, nch};
+}
+
+std::string convert(const char* src) {
+ return std::string{src};
+}
+
+std::wstring convert(const wchar_t* src, std::size_t nch) {
+ return std::wstring{src, nch};
+}
+
+std::wstring convert(const wchar_t* src) {
+ return std::wstring{src};
+}
+
+}
+
+BOOST_AUTO_TEST_SUITE(string_tests)
+
+BOOST_AUTO_TEST_CASE(length) {
+ {
+ std::string empty;
+ BOOST_TEST(empty.size() == 0);
+ BOOST_TEST(empty.length() == 0);
+ }
+ {
+ std::wstring empty;
+ BOOST_TEST(empty.size() == 0);
+ BOOST_TEST(empty.length() == 0);
+ }
+ {
+ std::string s = "asdf";
+ BOOST_TEST(s.size() == 4);
+ BOOST_TEST(s.length() == 4);
+ }
+ {
+ std::wstring s = L"asdf";
+ BOOST_TEST(s.size() == 4);
+ BOOST_TEST(s.length() == 4);
+ }
+
+ {
+ std::string s = "as\0df";
+ BOOST_TEST(s.size() == 2);
+ BOOST_TEST(s.length() == 2);
+ }
+ {
+ std::string s = "\0asdf";
+ BOOST_TEST(s.size() == 0);
+ BOOST_TEST(s.length() == 0);
+ }
+}
+
+BOOST_AUTO_TEST_CASE(null_terminated_narrow) {
+ const std::vector<char> src{'a', 'b', 'c', '\0', '1', '2', '3'};
+ BOOST_TEST(src.size() == 7);
+
+ {
+ const auto converted = convert(src.data(), 7);
+ BOOST_TEST(converted.size() == 7);
+ BOOST_TEST(std::memcmp(converted.c_str(), src.data(), 7) == 0);
+ BOOST_TEST(converted.c_str()[3] == '\0');
+ BOOST_TEST(converted.c_str()[7] == '\0');
+ }
+ {
+ const auto converted = convert(src.data(), 4);
+ BOOST_TEST(converted.size() == 4);
+ BOOST_TEST(std::memcmp(converted.c_str(), src.data(), 4) == 0);
+ BOOST_TEST(converted.c_str()[3] == '\0');
+ BOOST_TEST(converted.c_str()[4] == '\0');
+ }
+ {
+ const auto converted = convert(src.data(), 3);
+ BOOST_TEST(converted.size() == 3);
+ BOOST_TEST(std::memcmp(converted.c_str(), src.data(), 3) == 0);
+ BOOST_TEST(converted.c_str()[3] == '\0');
+ }
+ {
+ const auto converted = convert(src.data());
+ BOOST_TEST(converted.size() == 3);
+ BOOST_TEST(converted == "abc");
+ }
+ {
+ const auto converted = convert(src.data() + 2);
+ BOOST_TEST(converted.size() == 1);
+ BOOST_TEST(converted == "c");
+ }
+}
+
+BOOST_AUTO_TEST_CASE(null_terminated_wide) {
+ const std::vector<wchar_t> src{L'\0', L'a', L'b', L'c', L'\0', L'1', L'2', L'3'};
+ BOOST_TEST(src.size() == 8);
+
+ {
+ const auto converted = convert(src.data(), 8);
+ BOOST_TEST(converted.size() == 8);
+ BOOST_TEST(std::memcmp(converted.c_str(), src.data(), 8 * sizeof(wchar_t)) == 0);
+ BOOST_TEST(converted.c_str()[0] == L'\0');
+ BOOST_TEST(converted.c_str()[4] == L'\0');
+ BOOST_TEST(converted.c_str()[8] == L'\0');
+ }
+ {
+ const auto converted = convert(src.data(), 5);
+ BOOST_TEST(converted.size() == 5);
+ BOOST_TEST(std::memcmp(converted.c_str(), src.data(), 5 * sizeof(wchar_t)) == 0);
+ BOOST_TEST(converted.c_str()[0] == L'\0');
+ BOOST_TEST(converted.c_str()[4] == L'\0');
+ BOOST_TEST(converted.c_str()[5] == L'\0');
+ }
+ {
+ const auto converted = convert(src.data(), 4);
+ BOOST_TEST(converted.size() == 4);
+ BOOST_TEST(std::memcmp(converted.c_str(), src.data(), 4 * sizeof(wchar_t)) == 0);
+ BOOST_TEST(converted.c_str()[0] == L'\0');
+ BOOST_TEST(converted.c_str()[4] == L'\0');
+ }
+ {
+ const auto converted = convert(src.data());
+ BOOST_TEST(converted.size() == 0);
+ }
+ {
+ const auto converted = convert(src.data() + 1);
+ BOOST_TEST(converted.size() == 3);
+ BOOST_TEST(converted[0] == L'a');
+ BOOST_TEST(converted[1] == L'b');
+ BOOST_TEST(converted[2] == L'c');
+ }
+ {
+ const auto converted = convert(src.data() + 3);
+ BOOST_TEST(converted.size() == 1);
+ BOOST_TEST(converted[0] == L'c');
+ }
+}
+
+BOOST_AUTO_TEST_SUITE_END()