aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/server/lexer
diff options
context:
space:
mode:
authorEgor Tensin <Egor.Tensin@gmail.com>2020-01-03 00:58:56 +0300
committerEgor Tensin <Egor.Tensin@gmail.com>2020-01-03 00:58:56 +0300
commitad748449fb2d6154f0d7677f2a933aa29919075b (patch)
tree11278ebbf3890823fb6bf85014ade1b62921ff6e /server/lexer
parentupdate cmake-common (diff)
downloadmath-server-ad748449fb2d6154f0d7677f2a933aa29919075b.tar.gz
math-server-ad748449fb2d6154f0d7677f2a933aa29919075b.zip
lexer: split out testing-exposed functions
Diffstat (limited to 'server/lexer')
-rw-r--r--server/lexer/CMakeLists.txt2
-rw-r--r--server/lexer/details/parse.cpp107
-rw-r--r--server/lexer/details/parse.hpp22
-rw-r--r--server/lexer/lexer.cpp104
-rw-r--r--server/lexer/lexer.hpp8
5 files changed, 135 insertions, 108 deletions
diff --git a/server/lexer/CMakeLists.txt b/server/lexer/CMakeLists.txt
index 65523a7..b62e47a 100644
--- a/server/lexer/CMakeLists.txt
+++ b/server/lexer/CMakeLists.txt
@@ -1,2 +1,2 @@
-add_library(lexer lexer.cpp token.cpp token_type.cpp)
+add_library(lexer details/parse.cpp lexer.cpp token.cpp token_type.cpp)
target_link_libraries(lexer PUBLIC common)
diff --git a/server/lexer/details/parse.cpp b/server/lexer/details/parse.cpp
new file mode 100644
index 0000000..464dfb2
--- /dev/null
+++ b/server/lexer/details/parse.cpp
@@ -0,0 +1,107 @@
+// Copyright (c) 2020 Egor Tensin <Egor.Tensin@gmail.com>
+// This file is part of the "math-server" project.
+// For details, see https://github.com/egor-tensin/math-server.
+// Distributed under the MIT License.
+
+#include "../error.hpp"
+#include "../token_type.hpp"
+
+#include <cstddef>
+
+#include <exception>
+#include <optional>
+#include <regex>
+#include <string>
+#include <string_view>
+
+namespace math::server::lexer::details {
+namespace {
+
+std::string_view match_number(const std::string_view& input) {
+ static constexpr std::regex::flag_type flags =
+ std::regex_constants::ECMAScript |
+ std::regex_constants::icase;
+ // This is a hacky attempt to describe a C-like grammar for floating-point
+ // numbers using a regex (the tests seem to pass though).
+ // A proper NFA would be better, I guess.
+ static const std::regex number_regex{R"REGEX(^(?:\d+(?:\.\d*)?|\.\d+)(e[+-]?(\d*))?)REGEX", flags};
+
+ std::cmatch match;
+ {
+ const auto begin = input.data();
+ const auto end = begin + input.length();
+ if (!std::regex_search(begin, end, match, number_regex)) {
+ return {};
+ }
+ }
+ {
+ // If we have the numeric part of a number followed by 'e' and no digits,
+ // 1) that 'e' definitely belongs to this number token,
+ // 2) the user forgot to type in the required digits.
+ const auto& exponent = match[1];
+ const auto& abs_power = match[2];
+ if (exponent.matched && abs_power.matched && abs_power.length() == 0) {
+ throw LexerError{"exponent has no digits: " + match[0].str()};
+ }
+ }
+ return {match[0].first, static_cast<std::size_t>(match[0].length())};
+}
+
+bool starts_with(const std::string_view& a, const std::string_view& b) noexcept {
+ return a.length() >= b.length()
+ && a.compare(0, b.length(), b) == 0;
+}
+
+}
+
+std::optional<double> parse_number(const std::string_view& input, std::string_view& token) {
+ const auto view = match_number(input);
+ if (!view.data()) {
+ return {};
+ }
+ try {
+ const auto result = std::stod(std::string{view});
+ token = view;
+ return result;
+ } catch (const std::exception&) {
+ throw LexerError{"internal: couldn't parse number from: " + std::string{view}};
+ }
+ return {};
+}
+
+std::optional<double> parse_number(const std::string_view& input) {
+ std::string_view token;
+ return parse_number(input, token);
+}
+
+std::optional<token::Type> parse_const_token(const std::string_view& input, std::string_view& token) {
+ for (const auto type : token::const_tokens()) {
+ const auto str = token::type_to_string(type);
+ if (starts_with(input, str)) {
+ token = std::string_view(input.data(), str.length());
+ return {type};
+ }
+ }
+ return {};
+}
+
+std::optional<token::Type> parse_const_token(const std::string_view& input) {
+ std::string_view token;
+ return parse_const_token(input, token);
+}
+
+std::string_view parse_whitespace(const std::string_view& input) {
+ static const std::regex ws_regex{R"(^\s+)"};
+
+ std::cmatch match;
+ {
+ const auto begin = input.data();
+ const auto end = begin + input.length();
+ if (std::regex_search(begin, end, match, ws_regex)) {
+ return std::string_view(match[0].first, match[0].length());
+ }
+ }
+ return {};
+}
+
+}
diff --git a/server/lexer/details/parse.hpp b/server/lexer/details/parse.hpp
new file mode 100644
index 0000000..72da234
--- /dev/null
+++ b/server/lexer/details/parse.hpp
@@ -0,0 +1,22 @@
+// Copyright (c) 2020 Egor Tensin <Egor.Tensin@gmail.com>
+// This file is part of the "math-server" project.
+// For details, see https://github.com/egor-tensin/math-server.
+// Distributed under the MIT License.
+
+#pragma once
+
+#include "../token_type.hpp"
+
+#include <optional>
+#include <string_view>
+
+namespace math::server::lexer::details {
+
+// Exposed for testing:
+std::string_view parse_whitespace(const std::string_view&);
+std::optional<double> parse_number(const std::string_view&, std::string_view&);
+std::optional<double> parse_number(const std::string_view&);
+std::optional<token::Type> parse_const_token(const std::string_view&, std::string_view&);
+std::optional<token::Type> parse_const_token(const std::string_view&);
+
+}
diff --git a/server/lexer/lexer.cpp b/server/lexer/lexer.cpp
index 24e3f14..cf24189 100644
--- a/server/lexer/lexer.cpp
+++ b/server/lexer/lexer.cpp
@@ -3,113 +3,19 @@
// For details, see https://github.com/egor-tensin/math-server.
// Distributed under the MIT License.
+#include "details/parse.hpp"
#include "error.hpp"
#include "lexer.hpp"
#include "token.hpp"
#include "token_type.hpp"
-#include <exception>
#include <optional>
-#include <regex>
-#include <string_view>
#include <string>
+#include <string_view>
+#include <utility>
#include <vector>
namespace math::server {
-namespace lexer {
-namespace {
-
-std::string_view match_number(const std::string_view& input) {
- static constexpr std::regex::flag_type flags =
- std::regex_constants::ECMAScript |
- std::regex_constants::icase;
- // This is a hacky attempt to describe a C-like grammar for floating-point
- // numbers using a regex (the tests seem to pass though).
- // A proper NFA would be better, I guess.
- static const std::regex number_regex{R"REGEX(^(?:\d+(?:\.\d*)?|\.\d+)(e[+-]?(\d*))?)REGEX", flags};
-
- std::cmatch match;
- {
- const auto begin = input.data();
- const auto end = begin + input.length();
- if (!std::regex_search(begin, end, match, number_regex)) {
- return {};
- }
- }
- {
- // If we have the numeric part of a number followed by 'e' and no digits,
- // 1) that 'e' definitely belongs to this number token,
- // 2) the user forgot to type in the required digits.
- const auto& exponent = match[1];
- const auto& abs_power = match[2];
- if (exponent.matched && abs_power.matched && abs_power.length() == 0) {
- throw LexerError{"exponent has no digits: " + match[0].str()};
- }
- }
- return {match[0].first, static_cast<std::size_t>(match[0].length())};
-}
-
-std::optional<double> parse_number(const std::string_view& input, std::string_view& token) {
- const auto view = match_number(input);
- if (!view.data()) {
- return {};
- }
- try {
- const auto result = std::stod(std::string{view});
- token = view;
- return result;
- } catch (const std::exception&) {
- throw LexerError{"internal: couldn't parse number from: " + std::string{view}};
- }
- return {};
-}
-
-bool starts_with(const std::string_view& a, const std::string_view& b) noexcept {
- return a.length() >= b.length()
- && a.compare(0, b.length(), b) == 0;
-}
-
-std::optional<token::Type> parse_const_token(const std::string_view& input, std::string_view& token) {
- for (const auto type : token::const_tokens()) {
- const auto str = token::type_to_string(type);
- if (starts_with(input, str)) {
- token = std::string_view(input.data(), str.length());
- return {type};
- }
- }
- return {};
-}
-
-}
-
-namespace details {
-
-std::optional<double> parse_number(const std::string_view& input) {
- std::string_view token;
- return lexer::parse_number(input, token);
-}
-
-std::optional<token::Type> parse_const_token(const std::string_view& input) {
- std::string_view token;
- return lexer::parse_const_token(input, token);
-}
-
-std::string_view parse_whitespace(const std::string_view& input) {
- static const std::regex ws_regex{R"(^\s+)"};
-
- std::cmatch match;
- {
- const auto begin = input.data();
- const auto end = begin + input.length();
- if (std::regex_search(begin, end, match, ws_regex)) {
- return std::string_view(match[0].first, match[0].length());
- }
- }
- return {};
-}
-
-}
-}
Lexer::Lexer(const std::string_view& input)
: Lexer{lexer::Input{input}} {
@@ -190,7 +96,7 @@ std::optional<Lexer::ParsedToken> Lexer::parse_whitespace() const {
std::optional<Lexer::ParsedToken> Lexer::parse_const_token() const {
std::string_view token_view;
- const auto type = lexer::parse_const_token(m_input.get_input(), token_view);
+ const auto type = lexer::details::parse_const_token(m_input.get_input(), token_view);
if (!type.has_value()) {
return {};
}
@@ -199,7 +105,7 @@ std::optional<Lexer::ParsedToken> Lexer::parse_const_token() const {
std::optional<Lexer::ParsedToken> Lexer::parse_number() const {
std::string_view token_view;
- const auto number = lexer::parse_number(m_input.get_input(), token_view);
+ const auto number = lexer::details::parse_number(m_input.get_input(), token_view);
if (!number.has_value()) {
return {};
}
diff --git a/server/lexer/lexer.hpp b/server/lexer/lexer.hpp
index 44831d8..68950cb 100644
--- a/server/lexer/lexer.hpp
+++ b/server/lexer/lexer.hpp
@@ -15,14 +15,6 @@
#include <vector>
namespace math::server {
-namespace lexer::details {
-
-// Exposed for testing:
-std::string_view parse_whitespace(const std::string_view&);
-std::optional<double> parse_number(const std::string_view&);
-std::optional<token::Type> parse_const_token(const std::string_view&);
-
-}
class Lexer {
public: