diff options
author | Egor Tensin <Egor.Tensin@gmail.com> | 2020-01-03 00:58:56 +0300 |
---|---|---|
committer | Egor Tensin <Egor.Tensin@gmail.com> | 2020-01-03 00:58:56 +0300 |
commit | ad748449fb2d6154f0d7677f2a933aa29919075b (patch) | |
tree | 11278ebbf3890823fb6bf85014ade1b62921ff6e /server/lexer/lexer.cpp | |
parent | update cmake-common (diff) | |
download | math-server-ad748449fb2d6154f0d7677f2a933aa29919075b.tar.gz math-server-ad748449fb2d6154f0d7677f2a933aa29919075b.zip |
lexer: split out testing-exposed functions
Diffstat (limited to '')
-rw-r--r-- | server/lexer/lexer.cpp | 104 |
1 files changed, 5 insertions, 99 deletions
diff --git a/server/lexer/lexer.cpp b/server/lexer/lexer.cpp index 24e3f14..cf24189 100644 --- a/server/lexer/lexer.cpp +++ b/server/lexer/lexer.cpp @@ -3,113 +3,19 @@ // For details, see https://github.com/egor-tensin/math-server. // Distributed under the MIT License. +#include "details/parse.hpp" #include "error.hpp" #include "lexer.hpp" #include "token.hpp" #include "token_type.hpp" -#include <exception> #include <optional> -#include <regex> -#include <string_view> #include <string> +#include <string_view> +#include <utility> #include <vector> namespace math::server { -namespace lexer { -namespace { - -std::string_view match_number(const std::string_view& input) { - static constexpr std::regex::flag_type flags = - std::regex_constants::ECMAScript | - std::regex_constants::icase; - // This is a hacky attempt to describe a C-like grammar for floating-point - // numbers using a regex (the tests seem to pass though). - // A proper NFA would be better, I guess. - static const std::regex number_regex{R"REGEX(^(?:\d+(?:\.\d*)?|\.\d+)(e[+-]?(\d*))?)REGEX", flags}; - - std::cmatch match; - { - const auto begin = input.data(); - const auto end = begin + input.length(); - if (!std::regex_search(begin, end, match, number_regex)) { - return {}; - } - } - { - // If we have the numeric part of a number followed by 'e' and no digits, - // 1) that 'e' definitely belongs to this number token, - // 2) the user forgot to type in the required digits. - const auto& exponent = match[1]; - const auto& abs_power = match[2]; - if (exponent.matched && abs_power.matched && abs_power.length() == 0) { - throw LexerError{"exponent has no digits: " + match[0].str()}; - } - } - return {match[0].first, static_cast<std::size_t>(match[0].length())}; -} - -std::optional<double> parse_number(const std::string_view& input, std::string_view& token) { - const auto view = match_number(input); - if (!view.data()) { - return {}; - } - try { - const auto result = std::stod(std::string{view}); - token = view; - return result; - } catch (const std::exception&) { - throw LexerError{"internal: couldn't parse number from: " + std::string{view}}; - } - return {}; -} - -bool starts_with(const std::string_view& a, const std::string_view& b) noexcept { - return a.length() >= b.length() - && a.compare(0, b.length(), b) == 0; -} - -std::optional<token::Type> parse_const_token(const std::string_view& input, std::string_view& token) { - for (const auto type : token::const_tokens()) { - const auto str = token::type_to_string(type); - if (starts_with(input, str)) { - token = std::string_view(input.data(), str.length()); - return {type}; - } - } - return {}; -} - -} - -namespace details { - -std::optional<double> parse_number(const std::string_view& input) { - std::string_view token; - return lexer::parse_number(input, token); -} - -std::optional<token::Type> parse_const_token(const std::string_view& input) { - std::string_view token; - return lexer::parse_const_token(input, token); -} - -std::string_view parse_whitespace(const std::string_view& input) { - static const std::regex ws_regex{R"(^\s+)"}; - - std::cmatch match; - { - const auto begin = input.data(); - const auto end = begin + input.length(); - if (std::regex_search(begin, end, match, ws_regex)) { - return std::string_view(match[0].first, match[0].length()); - } - } - return {}; -} - -} -} Lexer::Lexer(const std::string_view& input) : Lexer{lexer::Input{input}} { @@ -190,7 +96,7 @@ std::optional<Lexer::ParsedToken> Lexer::parse_whitespace() const { std::optional<Lexer::ParsedToken> Lexer::parse_const_token() const { std::string_view token_view; - const auto type = lexer::parse_const_token(m_input.get_input(), token_view); + const auto type = lexer::details::parse_const_token(m_input.get_input(), token_view); if (!type.has_value()) { return {}; } @@ -199,7 +105,7 @@ std::optional<Lexer::ParsedToken> Lexer::parse_const_token() const { std::optional<Lexer::ParsedToken> Lexer::parse_number() const { std::string_view token_view; - const auto number = lexer::parse_number(m_input.get_input(), token_view); + const auto number = lexer::details::parse_number(m_input.get_input(), token_view); if (!number.has_value()) { return {}; } |