diff options
31 files changed, 1087 insertions, 565 deletions
diff --git a/server/CMakeLists.txt b/server/CMakeLists.txt index 7bf356a..4339da2 100644 --- a/server/CMakeLists.txt +++ b/server/CMakeLists.txt @@ -1,11 +1,4 @@ -find_package(Boost REQUIRED COMPONENTS filesystem program_options) - -option(DEBUG_ASIO "enable debug output for Boost.Asio" OFF) - -add_executable(server main.cpp server.cpp session.cpp session_manager.cpp) -target_include_directories(server SYSTEM PRIVATE ${Boost_INCLUDE_DIRS}) -target_link_libraries(server PRIVATE ${Boost_LIBRARIES}) - -if(DEBUG_ASIO) - target_compile_definitions(server PRIVATE BOOST_ASIO_ENABLE_HANDLER_TRACKING) -endif() +add_subdirectory(common) +add_subdirectory(lexer) +add_subdirectory(parser) +add_subdirectory(main) diff --git a/server/common/CMakeLists.txt b/server/common/CMakeLists.txt new file mode 100644 index 0000000..5655861 --- /dev/null +++ b/server/common/CMakeLists.txt @@ -0,0 +1,6 @@ +find_package(Boost REQUIRED) + +add_library(common INTERFACE) + +target_include_directories(common SYSTEM INTERFACE ${Boost_INCLUDE_DIRS}) +target_link_libraries(common INTERFACE ${Boost_LIBRARIES}) diff --git a/server/error.hpp b/server/common/error.hpp index cbfbb1e..cbfbb1e 100644 --- a/server/error.hpp +++ b/server/common/error.hpp diff --git a/server/log.hpp b/server/common/log.hpp index ca0fafd..ca0fafd 100644 --- a/server/log.hpp +++ b/server/common/log.hpp diff --git a/server/lexer.hpp b/server/lexer.hpp deleted file mode 100644 index 8afe15c..0000000 --- a/server/lexer.hpp +++ /dev/null @@ -1,261 +0,0 @@ -#pragma once - -#include "error.hpp" - -#include <cmath> - -#include <exception> -#include <functional> -#include <limits> -#include <optional> -#include <regex> -#include <string> -#include <string_view> -#include <unordered_map> -#include <vector> - -namespace math::server { -namespace lexer { - -class Error : public server::Error { -public: - explicit Error(const std::string &what) - : server::Error{"lexer error: " + what} - { } -}; - -class Token { -public: - enum class Type { - LEFT_PAREN, - RIGHT_PAREN, - PLUS, - MINUS, - ASTERISK, - SLASH, - NUMBER, - }; - - explicit Token(Type type) - : m_type{type}, m_number_value{nan()} - { } - - explicit Token(double number_value) - : m_type{Type::NUMBER}, m_number_value{number_value} - { } - - bool operator==(const Token& other) const { - return m_type == other.m_type - && ((is_nan(m_number_value) && is_nan(other.m_number_value)) - || m_number_value == other.m_number_value); - } - - bool operator!=(const Token& other) const { return !(*this == other); } - - Type get_type() const { return m_type; } - - double get_number_value() const { - if (get_type() != Type::NUMBER) { - throw Error{"token must be a number to query its value"}; - } - return m_number_value; - } - -private: - static constexpr double nan() { return std::numeric_limits<double>::quiet_NaN(); } - - static bool is_nan(double x) { return std::isnan(x); } - - Type m_type; - double m_number_value; -}; - -namespace details { - -inline std::string_view match_number(const std::string_view& input) { - static constexpr std::regex::flag_type flags = - std::regex_constants::ECMAScript | - std::regex_constants::icase; - // This is a hacky attempt to describe a C-like grammar for floating-point - // numbers using a regex (the tests seem to pass though). - // A proper NFA would be better, I guess. - static const std::regex number_regex{R"REGEX(^(?:\d+(?:\.\d*)?|\.\d+)(e[+-]?(\d*))?)REGEX", flags}; - - std::cmatch match; - if (!std::regex_search(input.cbegin(), input.cend(), match, number_regex)) { - return {}; - } - // If we have the numeric part of a number followed by 'e' and no digits, - // 1) that 'e' definitely belongs to this number token, - // 2) the user forgot to type in the required digits. - const auto& exponent = match[1]; - const auto& abs_power = match[2]; - if (exponent.matched && abs_power.matched && abs_power.length() == 0) { - throw lexer::Error{"exponent has no digits: " + match[0].str()}; - } - return {match[0].first, match[0].length()}; -} - -inline std::optional<double> parse_number(const std::string_view& input, std::string_view& token) { - const auto view = match_number(input); - if (!view.data()) { - return {}; - } - try { - const auto result = std::stod(std::string{view}); - token = view; - return result; - } catch (const std::exception& e) { - throw lexer::Error{"couldn't parse number from: " + std::string{view}}; - } - return {}; -} - -inline std::optional<double> parse_number(const std::string_view& input) { - std::string_view token; - return parse_number(input, token); -} - -inline bool starts_with(const std::string_view& a, const std::string_view& b) noexcept { - return a.length() >= b.length() - && a.compare(0, b.length(), b) == 0; -} - -inline std::optional<Token::Type> parse_const_token(const std::string_view& input, std::string_view& token) { - // FIXME: Potentially error-prone if there's const token A which is a - // prefix of token B (if the map is not sorted, we'd parse token A, when it - // could've been token B). - // Can be solved by sorting the keys accordingly. - - static const std::unordered_map<std::string_view, Token::Type> const_tokens{ - {"(", Token::Type::LEFT_PAREN}, - {")", Token::Type::RIGHT_PAREN}, - {"+", Token::Type::PLUS}, - {"-", Token::Type::MINUS}, - {"*", Token::Type::ASTERISK}, - {"/", Token::Type::SLASH}, - }; - - for (const auto& it : const_tokens) { - const auto& str = it.first; - const auto& type = it.second; - - if (starts_with(input, str)) { - token = input.substr(0, str.length()); - return type; - } - } - - return {}; -} - -inline std::optional<Token::Type> parse_const_token(const std::string_view& input) { - std::string_view token; - return parse_const_token(input, token); -} - -inline std::string_view parse_whitespace(const std::string_view& input) { - static const std::regex ws_regex{R"(\s*)"}; - - std::cmatch match; - if (std::regex_search(input.cbegin(), input.cend(), match, ws_regex)) { - return {match[0].first, match[0].length()}; - } - return {}; -} - -} - -} - -class Lexer { -public: - explicit Lexer(const std::string_view& input) - : m_input{input} { - } - - using TokenProcessor = std::function<bool (const lexer::Token&)>; - - bool for_each_token(const TokenProcessor& process) { - parse_token(); - for (auto token = peek_token(); token.has_value(); drop_token(), token = peek_token()) { - if (!process(*token)) { - return false; - } - } - return true; - } - - std::vector<lexer::Token> get_tokens() { - std::vector<lexer::Token> tokens; - for_each_token([&tokens] (const lexer::Token& token) { - tokens.emplace_back(token); - return true; - }); - return tokens; - } - - void parse_token() { - if (m_input.length() == 0) { - return; - } - std::string_view token_view; - m_token_buffer = parse_token(token_view); - if (m_token_buffer.has_value()) { - m_input.remove_prefix(token_view.length()); - } - } - - bool has_token() const { - return peek_token().has_value(); - } - - std::optional<lexer::Token> peek_token() const { - return m_token_buffer; - } - - void drop_token() { - if (!has_token()) { - throw lexer::Error{"internal: no tokens to drop"}; - } - m_token_buffer = {}; - parse_token(); - } - - std::optional<lexer::Token> drop_token_if(lexer::Token::Type type) { - if (!has_token()) { - throw lexer::Error{"internal: no tokens to drop"}; - } - if (m_token_buffer.value().get_type() != type) { - return {}; - } - const auto result = m_token_buffer; - drop_token(); - return result; - } - -private: - void consume_whitespace() { - const auto ws = lexer::details::parse_whitespace(m_input); - m_input.remove_prefix(ws.length()); - } - - std::optional<lexer::Token> parse_token(std::string_view& token_view) { - consume_whitespace(); - if (m_input.length() == 0) { - return {}; - } - if (const auto const_token = lexer::details::parse_const_token(m_input, token_view); const_token.has_value()) { - return lexer::Token{*const_token}; - } - if (const auto number = lexer::details::parse_number(m_input, token_view); number.has_value()) { - return lexer::Token{*number}; - } - throw lexer::Error{"invalid input at: " + std::string{m_input}}; - } - - std::string_view m_input; - std::optional<lexer::Token> m_token_buffer; -}; - -} diff --git a/server/lexer/CMakeLists.txt b/server/lexer/CMakeLists.txt new file mode 100644 index 0000000..65523a7 --- /dev/null +++ b/server/lexer/CMakeLists.txt @@ -0,0 +1,2 @@ +add_library(lexer lexer.cpp token.cpp token_type.cpp) +target_link_libraries(lexer PUBLIC common) diff --git a/server/lexer/error.hpp b/server/lexer/error.hpp new file mode 100644 index 0000000..99944c7 --- /dev/null +++ b/server/lexer/error.hpp @@ -0,0 +1,16 @@ +#pragma once + +#include "../common/error.hpp" + +#include <string> + +namespace math::server { + +class LexerError : public Error { +public: + explicit LexerError(const std::string &what) + : Error{"lexer error: " + what} + { } +}; + +} diff --git a/server/lexer/input.hpp b/server/lexer/input.hpp new file mode 100644 index 0000000..1104a4b --- /dev/null +++ b/server/lexer/input.hpp @@ -0,0 +1,42 @@ +#pragma once + +#include "error.hpp" + +#include <cstddef> + +#include <string_view> + +namespace math::server::lexer { + +class Input { +public: + explicit Input(const std::string_view& input) + : m_pos{0}, m_input{input} + { } + + const std::string_view& get_input() const { return m_input; } + + std::size_t get_pos() const { return m_pos; } + + std::size_t get_length() const { return m_input.length(); } + + bool empty() const { return m_input.empty(); } + + void consume(std::size_t len) { + if (m_input.length() < len) { + throw LexerError{"internal: not enough input to consume"}; + } + m_pos += len; + m_input.remove_prefix(len); + } + + void consume(const std::string_view& sub) { + consume(sub.length()); + } + +private: + std::size_t m_pos; + std::string_view m_input; +}; + +} diff --git a/server/lexer/lexer.cpp b/server/lexer/lexer.cpp new file mode 100644 index 0000000..c7eea6d --- /dev/null +++ b/server/lexer/lexer.cpp @@ -0,0 +1,206 @@ +#include "error.hpp" +#include "lexer.hpp" +#include "token.hpp" +#include "token_type.hpp" + +#include <exception> +#include <optional> +#include <regex> +#include <string_view> +#include <string> +#include <vector> + +namespace math::server { +namespace lexer { +namespace { + +std::string_view match_number(const std::string_view& input) { + static constexpr std::regex::flag_type flags = + std::regex_constants::ECMAScript | + std::regex_constants::icase; + // This is a hacky attempt to describe a C-like grammar for floating-point + // numbers using a regex (the tests seem to pass though). + // A proper NFA would be better, I guess. + static const std::regex number_regex{R"REGEX(^(?:\d+(?:\.\d*)?|\.\d+)(e[+-]?(\d*))?)REGEX", flags}; + + std::cmatch match; + if (!std::regex_search(input.cbegin(), input.cend(), match, number_regex)) { + return {}; + } + { + // If we have the numeric part of a number followed by 'e' and no digits, + // 1) that 'e' definitely belongs to this number token, + // 2) the user forgot to type in the required digits. + const auto& exponent = match[1]; + const auto& abs_power = match[2]; + if (exponent.matched && abs_power.matched && abs_power.length() == 0) { + throw LexerError{"exponent has no digits: " + match[0].str()}; + } + } + return {match[0].first, match[0].length()}; +} + +std::optional<double> parse_number(const std::string_view& input, std::string_view& token) { + const auto view = match_number(input); + if (!view.data()) { + return {}; + } + try { + const auto result = std::stod(std::string{view}); + token = view; + return result; + } catch (const std::exception& e) { + throw LexerError{"internal: couldn't parse number from: " + std::string{view}}; + } + return {}; +} + +bool starts_with(const std::string_view& a, const std::string_view& b) noexcept { + return a.length() >= b.length() + && a.compare(0, b.length(), b) == 0; +} + +std::optional<token::Type> parse_const_token(const std::string_view& input, std::string_view& token) { + for (const auto type : token::const_tokens()) { + const auto str = token::type_to_string(type); + if (starts_with(input, str)) { + token = {input.cbegin(), str.length()}; + return {type}; + } + } + return {}; +} + +} + +namespace details { + +std::optional<double> parse_number(const std::string_view& input) { + std::string_view token; + return lexer::parse_number(input, token); +} + +std::optional<token::Type> parse_const_token(const std::string_view& input) { + std::string_view token; + return lexer::parse_const_token(input, token); +} + +std::string_view parse_whitespace(const std::string_view& input) { + static const std::regex ws_regex{R"(^\s+)"}; + + std::cmatch match; + if (std::regex_search(input.cbegin(), input.cend(), match, ws_regex)) { + return {match[0].first, match[0].length()}; + } + return {}; +} + +} +} + +Lexer::Lexer(const std::string_view& input) + : Lexer{lexer::Input{input}} { +} + +Lexer::Lexer(const lexer::Input& input) + : m_input{input} { + + consume_token(); +} + +bool Lexer::for_each_token(const TokenProcessor& process) { + for (auto token = peek_token(); token.has_value(); drop_token(), token = peek_token()) { + if (!process(*token)) { + return false; + } + } + return true; +} + +std::vector<Lexer::ParsedToken> Lexer::get_tokens() { + std::vector<ParsedToken> tokens; + for_each_token([&tokens] (const ParsedToken& token) { + tokens.emplace_back(token); + return true; + }); + return tokens; +} + +void Lexer::drop_token() { + if (!has_token()) { + throw LexerError{"internal: no tokens to drop"}; + } + m_token_buffer = {}; + consume_token(); +} + +std::optional<Lexer::ParsedToken> Lexer::drop_token_of_type(Type type) { + if (!has_token()) { + throw LexerError{"internal: no tokens to drop"}; + } + if (m_token_buffer.value().get_type() != type) { + return {}; + } + const auto result = m_token_buffer; + drop_token(); + return result; +} + +void Lexer::consume_whitespace() { + const auto ws = parse_whitespace(); + if (!ws.has_value()) { + return; + } + m_input.consume(ws->get_length()); +} + +void Lexer::consume_token() { + if (m_input.empty()) { + return; + } + consume_whitespace(); + if (m_input.empty()) { + return; + } + auto token{parse_token()}; + m_input.consume(token.get_length()); + m_token_buffer = std::move(token); +} + +std::optional<Lexer::ParsedToken> Lexer::parse_whitespace() const { + const auto token_view = lexer::details::parse_whitespace(m_input.get_input()); + if (token_view.empty()) { + return {}; + } + return ParsedToken{Token{Token::Type::WHITESPACE}, m_input.get_pos(), token_view}; +} + +std::optional<Lexer::ParsedToken> Lexer::parse_const_token() const { + std::string_view token_view; + const auto type = lexer::parse_const_token(m_input.get_input(), token_view); + if (!type.has_value()) { + return {}; + } + return ParsedToken{Token{*type}, m_input.get_pos(), token_view}; +} + +std::optional<Lexer::ParsedToken> Lexer::parse_number() const { + std::string_view token_view; + const auto number = lexer::parse_number(m_input.get_input(), token_view); + if (!number.has_value()) { + return {}; + } + return ParsedToken{Token{*number}, m_input.get_pos(), token_view}; +} + +Lexer::ParsedToken Lexer::parse_token() const { + if (const auto const_token = parse_const_token(); const_token.has_value()) { + return *const_token; + } + if (const auto number = parse_number(); number.has_value()) { + return *number; + } + throw LexerError{"invalid input at: " + std::string{m_input.get_input()}}; +} + +} diff --git a/server/lexer/lexer.hpp b/server/lexer/lexer.hpp new file mode 100644 index 0000000..d08a2df --- /dev/null +++ b/server/lexer/lexer.hpp @@ -0,0 +1,61 @@ +#pragma once + +#include "input.hpp" +#include "token.hpp" +#include "token_type.hpp" + +#include <functional> +#include <optional> +#include <string_view> +#include <vector> + +namespace math::server { +namespace lexer::details { + +// Exposed for testing: +std::string_view parse_whitespace(const std::string_view&); +std::optional<double> parse_number(const std::string_view&); +std::optional<token::Type> parse_const_token(const std::string_view&); + +} + +class Lexer { +public: + explicit Lexer(const std::string_view& input); + explicit Lexer(const lexer::Input& input); + + using Token = lexer::Token; + using ParsedToken = lexer::ParsedToken; + using Type = Token::Type; + using TokenProcessor = std::function<bool (const ParsedToken&)>; + + bool for_each_token(const TokenProcessor& process); + + std::vector<ParsedToken> get_tokens(); + + bool has_token() const { + return peek_token().has_value(); + } + + std::optional<ParsedToken> peek_token() const { + return m_token_buffer; + } + + void drop_token(); + std::optional<ParsedToken> drop_token_of_type(Type type); + +private: + std::optional<ParsedToken> parse_whitespace() const; + std::optional<ParsedToken> parse_const_token() const; + std::optional<ParsedToken> parse_number() const; + + ParsedToken parse_token() const; + + void consume_whitespace(); + void consume_token(); + + lexer::Input m_input; + std::optional<ParsedToken> m_token_buffer; +}; + +} diff --git a/server/lexer/token.cpp b/server/lexer/token.cpp new file mode 100644 index 0000000..6ffb721 --- /dev/null +++ b/server/lexer/token.cpp @@ -0,0 +1,71 @@ +#include "error.hpp" +#include "token.hpp" +#include "token_type.hpp" + +#include <cmath> + +#include <limits> +#include <variant> + +namespace math::server::lexer { +namespace { + +static constexpr double nan() { return std::numeric_limits<double>::quiet_NaN(); } + +static bool is_nan(double x) { return std::isnan(x); } + +static bool numbers_equal(double x, double y) { + if (is_nan(x) && is_nan(y)) { + return true; + } + return x == y; +} + +} + +Token::Token(Type type) + : m_type{type} { + + if (token::token_has_value(type)) { + throw LexerError{"internal: must have a value: " + token::type_to_int_string(type)}; + } +} + +Token::Token(double value) + : m_type{Type::NUMBER}, m_value{value} +{ } + +bool Token::operator==(const Token& other) const { + if (m_type != other.m_type) { + return false; + } + if (token::is_const_token(m_type)) { + return true; + } + if (m_type == Type::NUMBER) { + return numbers_equal(as_number(), other.as_number()); + } + throw LexerError{"internal: can't compare tokens of type: " + token::type_to_int_string(m_type)}; +} + +double Token::as_number() const { + const auto type = get_type(); + if (type != Type::NUMBER) { + throw LexerError{"internal: not a number: " + token::type_to_int_string(type)}; + } + return std::get<double>(m_value); +} + +std::ostream& operator<<(std::ostream& os, const Token& token) { + switch (token.m_type) { + case token::Type::NUMBER: + os << token.as_number(); + break; + default: + os << token::type_to_string(token.m_type); + break; + } + return os; +} + +} diff --git a/server/lexer/token.hpp b/server/lexer/token.hpp new file mode 100644 index 0000000..6f98383 --- /dev/null +++ b/server/lexer/token.hpp @@ -0,0 +1,49 @@ +#pragma once + +#include "token_type.hpp" + +#include <cstddef> + +#include <string_view> +#include <utility> +#include <variant> + +namespace math::server::lexer { + +class Token { +public: + using Type = token::Type; + + explicit Token(token::Type type); + explicit Token(double value); + + bool operator==(const Token& other) const; + bool operator!=(const Token& other) const { return !(*this == other); } + + Type get_type() const { return m_type; } + + double as_number() const; + +private: + token::Type m_type; + std::variant<double> m_value; + + friend std::ostream& operator<<(std::ostream&, const Token&); +}; + +class ParsedToken : public Token { +public: + ParsedToken(Token&& token, std::size_t pos, const std::string_view& view) + : Token{std::move(token)}, m_pos{pos}, m_view{view} { + } + + std::size_t get_pos() const { return m_pos; } + + std::size_t get_length() const { return m_view.length(); } + +private: + std::size_t m_pos; + std::string_view m_view; +}; + +} diff --git a/server/lexer/token_type.cpp b/server/lexer/token_type.cpp new file mode 100644 index 0000000..9a69ba1 --- /dev/null +++ b/server/lexer/token_type.cpp @@ -0,0 +1,164 @@ +#include "error.hpp" +#include "token_type.hpp" + +#include <functional> +#include <map> +#include <ostream> +#include <stdexcept> +#include <string> +#include <unordered_map> + +namespace math::server::lexer::token { +namespace { + +using ToStringMap = std::unordered_map<Type, std::string>; +using FromStringMap = std::map<std::string, Type, std::greater<std::string>>; + +class ToStringConverter { +public: + ToStringConverter() : m_map{to_string_map()} { + validate(); + } + + const ToStringMap& map() const { return m_map; } + +private: + static const ToStringMap& to_string_map() { + static const ToStringMap map{ + {Type::WHITESPACE, "whitespace"}, + {Type::PLUS, "+"}, + {Type::MINUS, "-"}, + {Type::ASTERISK, "*"}, + {Type::SLASH, "/"}, + {Type::LEFT_PAREN, "("}, + {Type::RIGHT_PAREN, ")"}, + {Type::NUMBER, "number"}, + }; + return map; + } + + void validate() const { + check_for_duplicates(); + } + + void check_for_duplicates() const { + std::unordered_set<std::string> strings; + for (const auto& [type, str] : m_map) { + const auto [_, inserted] = strings.emplace(str); + if (!inserted) { + throw std::logic_error{"multiple tokens have the same string representation: " + str}; + } + } + } + + const ToStringMap& m_map; +}; + +const ToStringMap& to_string_map() { + static const ToStringConverter converter; + return converter.map(); +} + +class FromStringConverter { +public: + FromStringConverter(const ToStringMap& to_string) + : m_map{build_map(to_string)} { + } + + const FromStringMap& map() const { return m_map; } + +private: + static FromStringMap build_map(const ToStringMap& to_string) { + FromStringMap from_string; + for (const auto& [type, str] : to_string) { + const auto [_, inserted] = from_string.emplace(str, type); + if (!inserted) { + throw std::logic_error{"multiple tokens have the same string representation: " + str}; + } + } + return from_string; + } + + FromStringMap m_map; +}; + +const FromStringMap& from_string_map() { + static const FromStringConverter converter{to_string_map()}; + return converter.map(); +} + +class ConstTokens { +public: + ConstTokens() { + const auto& map = to_string_map(); + for (const auto& [type, _] : map) { + if (is_const_token(type)) { + m_set.emplace(type); + } + } + } + + const TypeSet& set() const { return m_set; } + +private: + TypeSet m_set; +}; + +} + +TypeInt type_to_int(Type type) { + return static_cast<TypeInt>(type); +} + +std::string type_to_int_string(Type type) { + return std::to_string(type_to_int(type)); +} + +bool is_const_token(Type type) { + switch (type) { + case Type::WHITESPACE: + case Type::NUMBER: + return false; + default: + return true; + } +} + +const TypeSet& const_tokens() { + static const ConstTokens tokens; + return tokens.set(); +} + +bool token_has_value(Type type) { + switch (type) { + case Type::NUMBER: + return true; + default: + return false; + } +} + +std::string type_to_string(Type type) { + const auto& map = to_string_map(); + const auto it = map.find(type); + if (it == map.cend()) { + throw LexerError{"type_to_string: unsupported token type: " + type_to_int_string(type)}; + } + return it->second; +} + +Type type_from_string(const std::string& src) { + const auto& map = from_string_map(); + const auto it = map.find(src); + if (it == map.cend()) { + throw LexerError{"type_from_string: unsupported token: " + std::string{src}}; + } + return it->second; +} + +std::ostream& operator<<(std::ostream& os, const Type& type) { + os << type_to_int(type); + return os; +} + +} diff --git a/server/lexer/token_type.hpp b/server/lexer/token_type.hpp new file mode 100644 index 0000000..9489915 --- /dev/null +++ b/server/lexer/token_type.hpp @@ -0,0 +1,37 @@ +#pragma once + +#include <ostream> +#include <string> +#include <type_traits> +#include <unordered_set> + +namespace math::server::lexer::token { + +enum class Type { + WHITESPACE, + PLUS, + MINUS, + ASTERISK, + SLASH, + LEFT_PAREN, + RIGHT_PAREN, + NUMBER, +}; + +using TypeInt = std::underlying_type<Type>::type; +using TypeSet = std::unordered_set<Type>; + +TypeInt type_to_int(Type); +std::string type_to_int_string(Type); + +bool is_const_token(Type); +const TypeSet& const_tokens(); + +bool token_has_value(Type); + +std::string type_to_string(Type); +Type type_from_string(const std::string&); + +std::ostream& operator<<(std::ostream&, const Type&); + +} diff --git a/server/main/CMakeLists.txt b/server/main/CMakeLists.txt new file mode 100644 index 0000000..b322390 --- /dev/null +++ b/server/main/CMakeLists.txt @@ -0,0 +1,13 @@ +find_package(Boost REQUIRED COMPONENTS filesystem program_options) + +option(DEBUG_ASIO "enable debug output for Boost.Asio" OFF) + +add_executable(server main.cpp server.cpp session.cpp session_manager.cpp) +target_link_libraries(server PRIVATE common parser) + +target_include_directories(server SYSTEM PRIVATE ${Boost_INCLUDE_DIRS}) +target_link_libraries(server PRIVATE ${Boost_LIBRARIES}) + +if(DEBUG_ASIO) + target_compile_definitions(server PRIVATE BOOST_ASIO_ENABLE_HANDLER_TRACKING) +endif() diff --git a/server/main.cpp b/server/main/main.cpp index 2cf6d35..2cf6d35 100644 --- a/server/main.cpp +++ b/server/main/main.cpp diff --git a/server/server.cpp b/server/main/server.cpp index 4dc672c..3800144 100644 --- a/server/server.cpp +++ b/server/main/server.cpp @@ -1,10 +1,11 @@ -#include "error.hpp" -#include "log.hpp" #include "server.hpp" #include "session.hpp" #include "session_manager.hpp" #include "settings.hpp" +#include "../common/error.hpp" +#include "../common/log.hpp" + #include <boost/asio.hpp> #include <boost/system/error_code.hpp> #include <boost/system/system_error.hpp> diff --git a/server/server.hpp b/server/main/server.hpp index 5524f88..5524f88 100644 --- a/server/server.hpp +++ b/server/main/server.hpp diff --git a/server/session.cpp b/server/main/session.cpp index 409ca5a..0ee7f75 100644 --- a/server/session.cpp +++ b/server/main/session.cpp @@ -1,9 +1,10 @@ -#include "error.hpp" -#include "log.hpp" -#include "parser.hpp" #include "session.hpp" #include "session_manager.hpp" +#include "../common/error.hpp" +#include "../common/log.hpp" +#include "../parser/parser.hpp" + #include <boost/asio.hpp> #include <boost/lexical_cast.hpp> #include <boost/system/error_code.hpp> diff --git a/server/session.hpp b/server/main/session.hpp index ace3755..ace3755 100644 --- a/server/session.hpp +++ b/server/main/session.hpp diff --git a/server/session_manager.cpp b/server/main/session_manager.cpp index a42fca4..c2aef6d 100644 --- a/server/session_manager.cpp +++ b/server/main/session_manager.cpp @@ -1,7 +1,8 @@ -#include "log.hpp" #include "session.hpp" #include "session_manager.hpp" +#include "../common/log.hpp" + #include <memory> #include <mutex> diff --git a/server/session_manager.hpp b/server/main/session_manager.hpp index f0bec0b..f0bec0b 100644 --- a/server/session_manager.hpp +++ b/server/main/session_manager.hpp diff --git a/server/settings.hpp b/server/main/settings.hpp index 310163f..310163f 100644 --- a/server/settings.hpp +++ b/server/main/settings.hpp diff --git a/server/parser.hpp b/server/parser.hpp deleted file mode 100644 index a9e5f54..0000000 --- a/server/parser.hpp +++ /dev/null @@ -1,168 +0,0 @@ -#pragma once - -#include "error.hpp" -#include "lexer.hpp" - -#include <optional> -#include <string> -#include <string_view> - -namespace math::server { -namespace parser { - -class Error : public server::Error { -public: - explicit Error(const std::string& what) - : server::Error{"parser error: " + what} - { } -}; - -class BinaryOp { -public: - static bool is(const lexer::Token& token) { - using Type = lexer::Token::Type; - switch (token.get_type()) { - case Type::PLUS: - case Type::MINUS: - case Type::ASTERISK: - case Type::SLASH: - return true; - - default: - return false; - } - } - - static BinaryOp from_token(const lexer::Token& token) { - if (!is(token)) { - throw Error{"internal: token is not a binary operator"}; - } - return BinaryOp{token}; - } - - static constexpr unsigned min_precedence() { return 0; } - - unsigned get_precedence() const { - using Type = lexer::Token::Type; - switch (m_type) { - case Type::PLUS: - case Type::MINUS: - return min_precedence(); - - case Type::ASTERISK: - case Type::SLASH: - return min_precedence() + 1; - - default: - throw Error{"internal: undefined operator precedence"}; - } - } - - double exec(double lhs, double rhs) const { - using Type = lexer::Token::Type; - switch (m_type) { - case Type::PLUS: - return lhs + rhs; - case Type::MINUS: - return lhs - rhs; - case Type::ASTERISK: - return lhs * rhs; - case Type::SLASH: - // Trapping the CPU would be better? - if (rhs == 0.) { - throw Error{"division by zero"}; - } - return lhs / rhs; - default: - throw Error{"internal: unsupported operator"}; - } - } - -private: - explicit BinaryOp(const lexer::Token& token) - : m_type{token.get_type()} - { } - - lexer::Token::Type m_type; -}; - -} - -class Parser { -public: - // I did simple recursive descent parsing a long time ago (see - // https://github.com/egor-tensin/simple-interpreter), this appears to be - // a finer algorithm for parsing arithmetic expressions. - // Reference: https://en.wikipedia.org/wiki/Operator-precedence_parser - - explicit Parser(const std::string_view& input) - : m_lexer{input} - { } - - double exec() { - m_lexer.parse_token(); - const auto result = exec_expr(); - if (m_lexer.has_token()) { - throw parser::Error{"expected a binary operator"}; - } - return result; - } - -private: - double exec_expr() { - return exec_expr(exec_primary(), parser::BinaryOp::min_precedence()); - } - - double exec_expr(double lhs, unsigned min_prec) { - for (auto op = peek_operator(); op.has_value() && op->get_precedence() >= min_prec;) { - const auto lhs_op = *op; - m_lexer.drop_token(); - auto rhs = exec_primary(); - - for (op = peek_operator(); op.has_value() && op->get_precedence() > lhs_op.get_precedence(); op = peek_operator()) { - rhs = exec_expr(rhs, op->get_precedence()); - } - - lhs = lhs_op.exec(lhs, rhs); - } - return lhs; - } - - std::optional<parser::BinaryOp> peek_operator() { - const auto token = m_lexer.peek_token(); - if (!token.has_value() || !parser::BinaryOp::is(*token)) { - return {}; - } - return parser::BinaryOp::from_token(*token); - } - - double exec_primary() { - if (!m_lexer.has_token()) { - throw parser::Error{"expected '-', '(' or a number"}; - } - - using Type = lexer::Token::Type; - - if (m_lexer.drop_token_if(Type::MINUS).has_value()) { - return -exec_primary(); - } - - if (m_lexer.drop_token_if(Type::LEFT_PAREN).has_value()) { - const auto inner = exec_expr(); - if (!m_lexer.has_token() || !m_lexer.drop_token_if(Type::RIGHT_PAREN).has_value()) { - throw parser::Error{"missing closing ')'"}; - } - return inner; - } - - if (const auto token = m_lexer.drop_token_if(Type::NUMBER); token.has_value()) { - return token.value().get_number_value(); - } - - throw parser::Error{"expected '-', '(' or a number"}; - } - - Lexer m_lexer; -}; - -} diff --git a/server/parser/CMakeLists.txt b/server/parser/CMakeLists.txt new file mode 100644 index 0000000..2490e57 --- /dev/null +++ b/server/parser/CMakeLists.txt @@ -0,0 +1,2 @@ +add_library(parser INTERFACE) +target_link_libraries(parser INTERFACE common lexer) diff --git a/server/parser/error.hpp b/server/parser/error.hpp new file mode 100644 index 0000000..1ba29ed --- /dev/null +++ b/server/parser/error.hpp @@ -0,0 +1,16 @@ +#pragma once + +#include "../common/error.hpp" + +#include <string> + +namespace math::server { + +class ParserError : public Error { +public: + explicit ParserError(const std::string& what) + : Error{"parser error: " + what} + { } +}; + +} diff --git a/server/parser/operator.hpp b/server/parser/operator.hpp new file mode 100644 index 0000000..4226078 --- /dev/null +++ b/server/parser/operator.hpp @@ -0,0 +1,79 @@ +#pragma once + +#include "error.hpp" + +#include "../lexer/token.hpp" +#include "../lexer/token_type.hpp" + +namespace math::server::parser { + +class BinaryOp { +public: + using Token = lexer::Token; + using Type = Token::Type; + + static bool is(const Token& token) { + switch (token.get_type()) { + case Type::PLUS: + case Type::MINUS: + case Type::ASTERISK: + case Type::SLASH: + return true; + + default: + return false; + } + } + + static BinaryOp from_token(const Token& token) { + if (!is(token)) { + throw ParserError{"internal: token is not a binary operator"}; + } + return BinaryOp{token}; + } + + static constexpr unsigned min_precedence() { return 0; } + + unsigned get_precedence() const { + switch (m_type) { + case Type::PLUS: + case Type::MINUS: + return min_precedence(); + + case Type::ASTERISK: + case Type::SLASH: + return min_precedence() + 1; + + default: + throw ParserError{"internal: undefined operator precedence"}; + } + } + + double exec(double lhs, double rhs) const { + switch (m_type) { + case Type::PLUS: + return lhs + rhs; + case Type::MINUS: + return lhs - rhs; + case Type::ASTERISK: + return lhs * rhs; + case Type::SLASH: + // Trapping the CPU would be better? + if (rhs == 0.) { + throw ParserError{"division by zero"}; + } + return lhs / rhs; + default: + throw ParserError{"internal: unsupported operator"}; + } + } + +private: + explicit BinaryOp(const Token& token) + : m_type{token.get_type()} + { } + + Type m_type; +}; + +} diff --git a/server/parser/parser.hpp b/server/parser/parser.hpp new file mode 100644 index 0000000..1197c31 --- /dev/null +++ b/server/parser/parser.hpp @@ -0,0 +1,89 @@ +#pragma once + +#include "error.hpp" +#include "operator.hpp" + +#include "../lexer/lexer.hpp" + +#include <optional> +#include <string_view> + +namespace math::server { + +class Parser { +public: + // I did simple recursive descent parsing a long time ago (see + // https://github.com/egor-tensin/simple-interpreter), this appears to be + // a finer algorithm for parsing arithmetic expressions. + // Reference: https://en.wikipedia.org/wiki/Operator-precedence_parser + + explicit Parser(const std::string_view& input) + : m_lexer{input} + { } + + double exec() { + const auto result = exec_expr(); + if (m_lexer.has_token()) { + throw ParserError{"expected a binary operator"}; + } + return result; + } + +private: + double exec_expr() { + return exec_expr(exec_primary(), parser::BinaryOp::min_precedence()); + } + + double exec_expr(double lhs, unsigned min_prec) { + for (auto op = peek_operator(); op.has_value() && op->get_precedence() >= min_prec;) { + const auto lhs_op = *op; + m_lexer.drop_token(); + auto rhs = exec_primary(); + + for (op = peek_operator(); op.has_value() && op->get_precedence() > lhs_op.get_precedence(); op = peek_operator()) { + rhs = exec_expr(rhs, op->get_precedence()); + } + + lhs = lhs_op.exec(lhs, rhs); + } + return lhs; + } + + std::optional<parser::BinaryOp> peek_operator() { + const auto token = m_lexer.peek_token(); + if (!token.has_value() || !parser::BinaryOp::is(*token)) { + return {}; + } + return parser::BinaryOp::from_token(*token); + } + + double exec_primary() { + if (!m_lexer.has_token()) { + throw ParserError{"expected '-', '(' or a number"}; + } + + using Type = lexer::Token::Type; + + if (m_lexer.drop_token_of_type(Type::MINUS).has_value()) { + return -exec_primary(); + } + + if (m_lexer.drop_token_of_type(Type::LEFT_PAREN).has_value()) { + const auto inner = exec_expr(); + if (!m_lexer.has_token() || !m_lexer.drop_token_of_type(Type::RIGHT_PAREN).has_value()) { + throw ParserError{"missing closing ')'"}; + } + return inner; + } + + if (const auto token = m_lexer.drop_token_of_type(Type::NUMBER); token.has_value()) { + return token.value().as_number(); + } + + throw ParserError{"expected '-', '(' or a number"}; + } + + Lexer m_lexer; +}; + +} diff --git a/test/unit_tests/CMakeLists.txt b/test/unit_tests/CMakeLists.txt index d320974..ebdfbf1 100644 --- a/test/unit_tests/CMakeLists.txt +++ b/test/unit_tests/CMakeLists.txt @@ -1,6 +1,8 @@ find_package(Boost REQUIRED) add_executable(unit_tests main.cpp lexer.cpp parser.cpp) -target_include_directories(unit_tests SYSTEM PRIVATE ${Boost_INCLUDE_DIRS}) - +target_link_libraries(unit_tests PRIVATE lexer parser) target_include_directories(unit_tests PRIVATE ../..) + +target_include_directories(unit_tests SYSTEM PRIVATE ${Boost_INCLUDE_DIRS}) +target_link_libraries(unit_tests PRIVATE ${Boost_LIBRARIES}) diff --git a/test/unit_tests/lexer.cpp b/test/unit_tests/lexer.cpp index 7b513e8..fdc93e1 100644 --- a/test/unit_tests/lexer.cpp +++ b/test/unit_tests/lexer.cpp @@ -1,12 +1,27 @@ -#include <server/lexer.hpp> +#include <server/lexer/error.hpp> +#include <server/lexer/lexer.hpp> +#include <server/lexer/token.hpp> +#include <server/lexer/token_type.hpp> +#include <boost/test/data/test_case.hpp> +#include <boost/test/data/monomorphic.hpp> #include <boost/test/unit_test.hpp> +#include <ostream> +#include <string> +#include <string_view> #include <vector> -BOOST_AUTO_TEST_CASE(test_lexer_parse_number) { - using namespace math::server::lexer; +BOOST_AUTO_TEST_SUITE(lexer_tests) +namespace bdata = boost::unit_test::data; +using math::server::Lexer; +using math::server::LexerError; +using math::server::lexer::Token; +using math::server::lexer::token::Type; +namespace details = math::server::lexer::details; + +BOOST_AUTO_TEST_CASE(test_parse_number) { // These are valid numbers: BOOST_TEST(details::parse_number("0").value() == 0); BOOST_TEST(details::parse_number("1.").value() == 1.); @@ -25,85 +40,129 @@ BOOST_AUTO_TEST_CASE(test_lexer_parse_number) { BOOST_TEST(!details::parse_number("e12").has_value()); // This is definitely a number, but a malformed one (an exponent must be // followed by some digits). - BOOST_CHECK_THROW(details::parse_number("12e"), Error); + BOOST_CHECK_THROW(details::parse_number("12e"), LexerError); } -BOOST_AUTO_TEST_CASE(test_lexer_parse_const_token) { - using namespace math::server::lexer; - - // TODO: No time to implement the required string conversions, hence the - // extra parentheses. - BOOST_TEST((details::parse_const_token("+").value() == Token::Type::PLUS)); +BOOST_AUTO_TEST_CASE(test_parse_const_token) { + BOOST_TEST(details::parse_const_token("+").value() == Type::PLUS); // parse_* functions only consume a single token: - BOOST_TEST((details::parse_const_token("+++").value() == Token::Type::PLUS)); - BOOST_TEST((details::parse_const_token("-").value() == Token::Type::MINUS)); + BOOST_TEST(details::parse_const_token("+/*").value() == Type::PLUS); + BOOST_TEST(details::parse_const_token("-").value() == Type::MINUS); BOOST_TEST(!details::parse_const_token("&+").has_value()); } -BOOST_AUTO_TEST_CASE(test_lexer_get_tokens) { - using namespace math::server; - using namespace math::server::lexer; +namespace { +namespace get_tokens::valid { - // TODO: No time to implement the required string conversions, hence the - // extra parentheses. - { - Lexer lexer{""}; - BOOST_TEST((lexer.get_tokens() == std::vector<Token>{})); - } - { - Lexer lexer{" + - "}; - BOOST_TEST((lexer.get_tokens() == std::vector<Token>{ - Token{Token::Type::PLUS}, - Token{Token::Type::MINUS}, - })); - } - { - Lexer lexer{"&"}; - BOOST_CHECK_THROW((lexer.get_tokens()), lexer::Error); - } - { - Lexer lexer{" 1 + 123 & 456"}; - BOOST_CHECK_THROW((lexer.get_tokens()), lexer::Error); - } - { - Lexer lexer{"1+2"}; - BOOST_TEST((lexer.get_tokens() == std::vector<Token>{ - Token{1}, - Token{Token::Type::PLUS}, - Token{2}, - })); - } - { - Lexer lexer{"1+2 * (3- 4e-2)"}; - BOOST_TEST((lexer.get_tokens() == std::vector<Token>{ - Token{1}, - Token{Token::Type::PLUS}, - Token{2}, - Token{Token::Type::ASTERISK}, - Token{Token::Type::LEFT_PAREN}, - Token{3}, - Token{Token::Type::MINUS}, - Token{4e-2}, - Token{Token::Type::RIGHT_PAREN}, - })); +const std::vector<std::string_view> input{ + "", + " + - ", + "1+2", + "1+2 * (3- 4e-2)", + " 2 * (1 + 3 * (1 - -3)) ", +}; + +// Some black magic-fuckery to resolve operator<< for std::vector<Token>. +// See https://stackoverflow.com/a/18817428/514684. + +struct Expected { + std::vector<Token> m_tokens; +}; + +std::ostream& operator<<(std::ostream& os, const Expected& expected) { + for (const auto& token : expected.m_tokens) { + os << token; } - { - Lexer lexer{" 2 * (1 + 3 * (1 - -3)) "}; - BOOST_TEST((lexer.get_tokens() == std::vector<Token>{ - Token{2}, - Token{Token::Type::ASTERISK}, - Token{Token::Type::LEFT_PAREN}, - Token{1}, - Token{Token::Type::PLUS}, - Token{3}, - Token{Token::Type::ASTERISK}, - Token{Token::Type::LEFT_PAREN}, - Token{1}, - Token{Token::Type::MINUS}, - Token{Token::Type::MINUS}, - Token{3}, - Token{Token::Type::RIGHT_PAREN}, - Token{Token::Type::RIGHT_PAREN}, - })); + return os; +} + +const std::vector<Expected> expected{ + {{}}, + {{ + Token{Type::PLUS}, + Token{Type::MINUS}, + }}, + {{ + Token{1}, + Token{Type::PLUS}, + Token{2}, + }}, + {{ + Token{1}, + Token{Type::PLUS}, + Token{2}, + Token{Type::ASTERISK}, + Token{Type::LEFT_PAREN}, + Token{3}, + Token{Type::MINUS}, + Token{4e-2}, + Token{Type::RIGHT_PAREN}, + }}, + {{ + Token{2}, + Token{Type::ASTERISK}, + Token{Type::LEFT_PAREN}, + Token{1}, + Token{Type::PLUS}, + Token{3}, + Token{Type::ASTERISK}, + Token{Type::LEFT_PAREN}, + Token{1}, + Token{Type::MINUS}, + Token{Type::MINUS}, + Token{3}, + Token{Type::RIGHT_PAREN}, + Token{Type::RIGHT_PAREN}, + }}, +}; + +} + +namespace get_tokens::invalid { + +const std::vector<std::string_view> input{ + "&", + " 1 + 123 & 456", +}; + +const std::vector<std::string> error_msg{ + "server error: lexer error: invalid input at: &", + "server error: lexer error: invalid input at: & 456", +}; + +} +} + +BOOST_DATA_TEST_CASE( + test_get_tokens_valid, + bdata::make(get_tokens::valid::input) ^ get_tokens::valid::expected, + input, + expected) { + + Lexer lexer{input}; + const auto actual = lexer.get_tokens(); + BOOST_CHECK_EQUAL_COLLECTIONS(actual.cbegin(), actual.cend(), + expected.m_tokens.cbegin(), + expected.m_tokens.cend()); +} + +BOOST_DATA_TEST_CASE( + test_get_tokens_invalid, + bdata::make(get_tokens::invalid::input) ^ get_tokens::invalid::error_msg, + input, + error_msg) { + + BOOST_REQUIRE_THROW(do { + Lexer lexer{input}; + lexer.get_tokens(); + } while (0), LexerError); + + try { + Lexer lexer{input}; + lexer.get_tokens(); + } catch (const LexerError& e) { + BOOST_TEST(error_msg == e.what()); } } + +BOOST_AUTO_TEST_SUITE_END() diff --git a/test/unit_tests/parser.cpp b/test/unit_tests/parser.cpp index 11f48d3..bf31223 100644 --- a/test/unit_tests/parser.cpp +++ b/test/unit_tests/parser.cpp @@ -1,48 +1,89 @@ -#include <server/parser.hpp> +#include <server/parser/error.hpp> +#include <server/parser/parser.hpp> +#include <boost/test/data/test_case.hpp> +#include <boost/test/data/monomorphic.hpp> #include <boost/test/unit_test.hpp> -BOOST_AUTO_TEST_CASE(test_parser_exec) { - using namespace math::server; +#include <string> +#include <string_view> +#include <vector> - { - Parser parser{""}; - BOOST_CHECK_THROW(parser.exec(), parser::Error); - } - { - Parser parser{"1"}; - BOOST_TEST(parser.exec() == 1); - } - { - Parser parser{" 1 + "}; - BOOST_CHECK_THROW(parser.exec(), parser::Error); - } - { - Parser parser{" 1 + 2 "}; - BOOST_TEST(parser.exec() == 3); - } - { - Parser parser{" 2 * 1 + 3 "}; - BOOST_TEST(parser.exec() == 5); - } - { - Parser parser{" 2 * (1 + 3) "}; - BOOST_TEST(parser.exec() == 8); - } - { - Parser parser{" 2 * (1 + 3 "}; - BOOST_CHECK_THROW(parser.exec(), parser::Error); - } - { - Parser parser{" 2 * (1 + 3) )"}; - BOOST_CHECK_THROW(parser.exec(), parser::Error); - } - { - Parser parser{" 2 * (1 + 3 * (1 - -3)) "}; - BOOST_TEST(parser.exec() == 26); - } - { - Parser parser{" -2 * ---- (3 + -100e-1) "}; // Looks weird, but also works in e.g. Python - BOOST_TEST(parser.exec() == 14); +BOOST_AUTO_TEST_SUITE(parser_tests) + +namespace bdata = boost::unit_test::data; +using math::server::Parser; +using math::server::ParserError; + +namespace { +namespace exec::valid { + +const std::vector<std::string_view> input{ + "1", + " 1 + 2 ", + " 2 * 1 + 3 ", + " 2 * (1 + 3) ", + " 2 * (1 + 3 * (1 - -3)) ", + " -2 * ---- (3 + -100e-1) ", // Looks weird, but also works in e.g. Python +}; + +const std::vector<double> expected{ + 1, + 3, + 5, + 8, + 26, + 14, +}; + +} + +namespace exec::invalid { + +const std::vector<std::string_view> input{ + "", + " 1 + ", + " 2 * (1 + 3 ", + " 2 * (1 + 3) )", +}; + +const std::vector<std::string> error_msg{ + "server error: parser error: expected '-', '(' or a number", + "server error: parser error: expected '-', '(' or a number", + "server error: parser error: missing closing ')'", + "server error: parser error: expected a binary operator", +}; + +} +} + +BOOST_DATA_TEST_CASE( + test_exec_valid, + bdata::make(exec::valid::input) ^ exec::valid::expected, + input, + expected) { + + Parser parser{input}; + BOOST_TEST(parser.exec() == expected); +} + +BOOST_DATA_TEST_CASE( + test_exec_invalid, + bdata::make(exec::invalid::input) ^ exec::invalid::error_msg, + input, + error_msg) { + + BOOST_REQUIRE_THROW(do { + Parser parser{input}; + parser.exec(); + } while (0), ParserError); + + try { + Parser parser{input}; + parser.exec(); + } catch (const ParserError& e) { + BOOST_TEST(error_msg == e.what()); } } + +BOOST_AUTO_TEST_SUITE_END() |