aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/server/lexer
diff options
context:
space:
mode:
authorEgor Tensin <Egor.Tensin@gmail.com>2020-01-03 04:13:47 +0300
committerEgor Tensin <Egor.Tensin@gmail.com>2020-01-03 04:13:47 +0300
commit42820c4161edcabb1e36336d175bb8645f529df3 (patch)
treec65cec6d00a0494ea26b9f45903eb512f5e6e302 /server/lexer
parentcmake: don't install benchmark the library (diff)
downloadmath-server-42820c4161edcabb1e36336d175bb8645f529df3.tar.gz
math-server-42820c4161edcabb1e36336d175bb8645f529df3.zip
lexer: add alternative boost::regex implementations
Diffstat (limited to 'server/lexer')
-rw-r--r--server/lexer/CMakeLists.txt3
-rw-r--r--server/lexer/details/parse.cpp118
-rw-r--r--server/lexer/details/parse.hpp8
3 files changed, 104 insertions, 25 deletions
diff --git a/server/lexer/CMakeLists.txt b/server/lexer/CMakeLists.txt
index b62e47a..b5b8b63 100644
--- a/server/lexer/CMakeLists.txt
+++ b/server/lexer/CMakeLists.txt
@@ -1,2 +1,5 @@
+find_package(Boost REQUIRED COMPONENTS regex)
+
add_library(lexer details/parse.cpp lexer.cpp token.cpp token_type.cpp)
target_link_libraries(lexer PUBLIC common)
+target_link_libraries(lexer PRIVATE Boost::regex)
diff --git a/server/lexer/details/parse.cpp b/server/lexer/details/parse.cpp
index 464dfb2..01ef11c 100644
--- a/server/lexer/details/parse.cpp
+++ b/server/lexer/details/parse.cpp
@@ -6,9 +6,12 @@
#include "../error.hpp"
#include "../token_type.hpp"
+#include <boost/regex.hpp>
+
#include <cstddef>
#include <exception>
+#include <functional>
#include <optional>
#include <regex>
#include <string>
@@ -17,45 +20,77 @@
namespace math::server::lexer::details {
namespace {
-std::string_view match_number(const std::string_view& input) {
- static constexpr std::regex::flag_type flags =
- std::regex_constants::ECMAScript |
- std::regex_constants::icase;
- // This is a hacky attempt to describe a C-like grammar for floating-point
- // numbers using a regex (the tests seem to pass though).
- // A proper NFA would be better, I guess.
- static const std::regex number_regex{R"REGEX(^(?:\d+(?:\.\d*)?|\.\d+)(e[+-]?(\d*))?)REGEX", flags};
-
+std::cmatch std_regex_search(const std::string_view& input, const std::regex& regex) {
std::cmatch match;
{
const auto begin = input.data();
const auto end = begin + input.length();
- if (!std::regex_search(begin, end, match, number_regex)) {
- return {};
- }
+ std::regex_search(begin, end, match, regex);
}
+ return match;
+}
+
+boost::cmatch boost_regex_search(const std::string_view& input, const boost::regex& regex) {
+ boost::cmatch match;
{
- // If we have the numeric part of a number followed by 'e' and no digits,
- // 1) that 'e' definitely belongs to this number token,
- // 2) the user forgot to type in the required digits.
- const auto& exponent = match[1];
- const auto& abs_power = match[2];
- if (exponent.matched && abs_power.matched && abs_power.length() == 0) {
- throw LexerError{"exponent has no digits: " + match[0].str()};
- }
+ const auto begin = input.data();
+ const auto end = begin + input.length();
+ boost::regex_search(begin, end, match, regex);
+ }
+ return match;
+}
+
+// CMatch is either std::cmatch or boost::cmatch.
+template <typename CMatch>
+void check_exponent(const CMatch& match) {
+ // If we have the numeric part of a number followed by 'e' and no digits,
+ // 1) that 'e' definitely belongs to this number token,
+ // 2) the user forgot to type in the required digits.
+ const auto& exponent = match[1];
+ const auto& abs_power = match[2];
+ if (exponent.matched && abs_power.matched && abs_power.length() == 0) {
+ throw LexerError{"exponent has no digits: " + match[0].str()};
}
+}
+
+// This is a hacky attempt to describe a C-like grammar for floating-point
+// numbers using a regex (the tests seem to pass though).
+// A proper NFA would be better, I guess.
+const std::string_view NUMBER_REGEX = R"REGEX(^(?:\d+(?:\.\d*)?|\.\d+)(e[+-]?(\d*))?)REGEX";
+
+std::string_view std_match_number(const std::string_view& input) {
+ static constexpr auto flags =
+ std::regex_constants::ECMAScript |
+ std::regex_constants::icase;
+ static const std::regex number_regex{NUMBER_REGEX.data(), NUMBER_REGEX.length(), flags};
+
+ const auto match = std_regex_search(input, number_regex);
+ if (match.empty()) {
+ return {};
+ }
+ check_exponent(match);
return {match[0].first, static_cast<std::size_t>(match[0].length())};
}
-bool starts_with(const std::string_view& a, const std::string_view& b) noexcept {
- return a.length() >= b.length()
- && a.compare(0, b.length(), b) == 0;
+std::string_view boost_match_number(const std::string_view& input) {
+ static const boost::regex number_regex{NUMBER_REGEX.data(), NUMBER_REGEX.length(), boost::regex::icase};
+
+ const auto match = boost_regex_search(input, number_regex);
+ if (match.empty()) {
+ return {};
+ }
+ check_exponent(match);
+ return {match[0].first, static_cast<std::size_t>(match[0].length())};
}
+std::string_view match_number(const std::string_view& input) {
+ return std_match_number(input);
}
-std::optional<double> parse_number(const std::string_view& input, std::string_view& token) {
- const auto view = match_number(input);
+using NumberMatcher = std::function<std::string_view (const std::string_view&)>;
+
+std::optional<double> parse_number(const std::string_view& input, const NumberMatcher& match, std::string_view& token) {
+ const auto view = match(input);
if (!view.data()) {
return {};
}
@@ -69,6 +104,39 @@ std::optional<double> parse_number(const std::string_view& input, std::string_vi
return {};
}
+bool starts_with(const std::string_view& a, const std::string_view& b) noexcept {
+ return a.length() >= b.length()
+ && a.compare(0, b.length(), b) == 0;
+}
+
+}
+
+namespace impl {
+
+std::optional<double> std_parse_number(const std::string_view& input, std::string_view& token) {
+ return parse_number(input, &std_match_number, token);
+}
+
+std::optional<double> std_parse_number(const std::string_view& input) {
+ std::string_view token;
+ return std_parse_number(input, token);
+}
+
+std::optional<double> boost_parse_number(const std::string_view& input, std::string_view& token) {
+ return parse_number(input, &boost_match_number, token);
+}
+
+std::optional<double> boost_parse_number(const std::string_view& input) {
+ std::string_view token;
+ return boost_parse_number(input, token);
+}
+
+}
+
+std::optional<double> parse_number(const std::string_view& input, std::string_view& token) {
+ return impl::std_parse_number(input, token);
+}
+
std::optional<double> parse_number(const std::string_view& input) {
std::string_view token;
return parse_number(input, token);
diff --git a/server/lexer/details/parse.hpp b/server/lexer/details/parse.hpp
index 72da234..6a8688b 100644
--- a/server/lexer/details/parse.hpp
+++ b/server/lexer/details/parse.hpp
@@ -11,6 +11,14 @@
#include <string_view>
namespace math::server::lexer::details {
+namespace impl {
+
+std::optional<double> std_parse_number(const std::string_view&, std::string_view&);
+std::optional<double> std_parse_number(const std::string_view&);
+std::optional<double> boost_parse_number(const std::string_view&, std::string_view&);
+std::optional<double> boost_parse_number(const std::string_view&);
+
+}
// Exposed for testing:
std::string_view parse_whitespace(const std::string_view&);