aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/server/lexer/details
diff options
context:
space:
mode:
authorEgor Tensin <Egor.Tensin@gmail.com>2020-01-06 13:40:22 +0300
committerEgor Tensin <Egor.Tensin@gmail.com>2020-01-06 14:38:46 +0300
commit5c11d731cadd06204c8c6d6c1aea2253450204a8 (patch)
tree4bbea3a2f1180c39e958f9dad109ebf4adfe4e05 /server/lexer/details
parentlexer: switch to Boost.Regex (diff)
downloadmath-server-5c11d731cadd06204c8c6d6c1aea2253450204a8.tar.gz
math-server-5c11d731cadd06204c8c6d6c1aea2253450204a8.zip
lexer: std:: vs boost:: for whitespace parsing
Added the corresponding benchmarks too.
Diffstat (limited to '')
-rw-r--r--server/lexer/details/parse.cpp132
-rw-r--r--server/lexer/details/parse.hpp5
2 files changed, 89 insertions, 48 deletions
diff --git a/server/lexer/details/parse.cpp b/server/lexer/details/parse.cpp
index 79ad4cf..25a77ce 100644
--- a/server/lexer/details/parse.cpp
+++ b/server/lexer/details/parse.cpp
@@ -20,7 +20,28 @@
namespace math::server::lexer::details {
namespace {
-class RegexNumberMatcher {
+template <template<typename> typename MatchResultsT>
+class RegexMatcher {
+public:
+ using MatchResults = MatchResultsT<std::string_view::const_iterator>;
+
+ virtual ~RegexMatcher() = default;
+
+ virtual bool match_regex(const std::string_view& input) = 0;
+
+ std::string_view to_view() const {
+ return {&*m_match[0].first, static_cast<std::size_t>(m_match[0].length())};
+ // ^ I fucking hate C++.
+ }
+
+ std::string to_str() const { return m_match[0].str(); }
+
+protected:
+ MatchResults m_match;
+};
+
+template <template<typename> typename MatchResultsT>
+class RegexNumberMatcher : public RegexMatcher<MatchResultsT> {
public:
bool match(const std::string_view& input) {
if (!match_regex(input)) {
@@ -36,8 +57,6 @@ public:
return true;
}
- virtual std::string_view to_view() const = 0;
-
protected:
// This is a hacky attempt to describe a C-like grammar for floating-point
// numbers using a regex (the tests seem to pass though).
@@ -45,27 +64,18 @@ protected:
static constexpr std::string_view NUMBER_REGEX{R"REGEX(^(?:\d+(?:\.\d*)?|\.\d+)(e[+-]?(\d*))?)REGEX"};
private:
- virtual bool match_regex(const std::string_view& input) = 0;
+ bool matched_e() const { return m_match[1].matched; }
- virtual std::string to_str() const = 0;
-
- virtual bool matched_e() const = 0;
-
- virtual bool matched_e_power() const = 0;
+ bool matched_e_power() const { return m_match[2].matched && m_match[2].length() != 0; }
};
-class StdNumberMatcher : public RegexNumberMatcher {
+class StdNumberMatcher : public RegexNumberMatcher<std::match_results> {
public:
- std::string_view to_view() const override {
- return {&*m_match[0].first, static_cast<std::size_t>(m_match[0].length())};
- // ^ I fucking hate C++.
- }
-
-private:
bool match_regex(const std::string_view& input) override {
return std::regex_search(input.cbegin(), input.cend(), m_match, get_regex());
}
+private:
static const std::regex& get_regex() {
static constexpr auto flags =
std::regex_constants::ECMAScript |
@@ -73,28 +83,15 @@ private:
static const std::regex regex{NUMBER_REGEX.data(), NUMBER_REGEX.length(), flags};
return regex;
}
-
- std::string to_str() const override { return m_match[0].str(); }
-
- bool matched_e() const override { return m_match[1].matched; }
-
- bool matched_e_power() const override { return m_match[2].matched && m_match[2].length() != 0; }
-
- std::match_results<std::string_view::const_iterator> m_match;
};
-class BoostNumberMatcher : public RegexNumberMatcher {
+class BoostNumberMatcher : public RegexNumberMatcher<boost::match_results> {
public:
- std::string_view to_view() const override {
- return {&*m_match[0].first, static_cast<std::size_t>(m_match[0].length())};
- // ^ I fucking hate C++.
- }
-
-private:
bool match_regex(const std::string_view& input) override {
return boost::regex_search(input.cbegin(), input.cend(), m_match, get_regex());
}
+private:
static const boost::regex& get_regex() {
static constexpr boost::regex::flag_type flags =
boost::regex::ECMAScript |
@@ -102,17 +99,10 @@ private:
static const boost::regex regex{NUMBER_REGEX.data(), NUMBER_REGEX.length(), flags};
return regex;
}
-
- std::string to_str() const override { return m_match[0].str(); }
-
- bool matched_e() const override { return m_match[1].matched; }
-
- bool matched_e_power() const override { return m_match[2].matched && m_match[2].length() != 0; }
-
- boost::match_results<std::string_view::const_iterator> m_match;
};
-std::optional<double> parse_number(const std::string_view& input, RegexNumberMatcher&& matcher, std::string_view& token) {
+template <template<typename> typename MatchResultsT>
+std::optional<double> parse_number(const std::string_view& input, RegexNumberMatcher<MatchResultsT>&& matcher, std::string_view& token) {
if (!matcher.match(input)) {
return {};
}
@@ -127,6 +117,51 @@ std::optional<double> parse_number(const std::string_view& input, RegexNumberMat
return {};
}
+template <template<typename> typename MatchResultsT>
+class RegexWhitespaceMatcher : public RegexMatcher<MatchResultsT> {
+protected:
+ // This is a hacky attempt to describe a C-like grammar for floating-point
+ // numbers using a regex (the tests seem to pass though).
+ // A proper NFA would be better, I guess.
+ static constexpr std::string_view WS_REGEX{R"(^\s+)"};
+};
+
+class StdWhitespaceMatcher : public RegexWhitespaceMatcher<std::match_results> {
+public:
+ bool match_regex(const std::string_view& input) override {
+ return std::regex_search(input.cbegin(), input.cend(), m_match, get_regex());
+ }
+
+private:
+ static const std::regex& get_regex() {
+ static constexpr auto flags = std::regex_constants::ECMAScript;
+ static const std::regex regex{WS_REGEX.data(), WS_REGEX.length(), flags};
+ return regex;
+ }
+};
+
+class BoostWhitespaceMatcher : public RegexWhitespaceMatcher<boost::match_results> {
+public:
+ bool match_regex(const std::string_view& input) override {
+ return boost::regex_search(input.cbegin(), input.cend(), m_match, get_regex());
+ }
+
+private:
+ static const boost::regex& get_regex() {
+ static constexpr boost::regex::flag_type flags = boost::regex::ECMAScript;
+ static const boost::regex regex{WS_REGEX.data(), WS_REGEX.length(), flags};
+ return regex;
+ }
+};
+
+template <template<typename> typename MatchResultsT>
+std::string_view parse_whitespace(const std::string_view& input, RegexWhitespaceMatcher<MatchResultsT>&& matcher) {
+ if (matcher.match_regex(input)) {
+ return matcher.to_view();
+ }
+ return {};
+}
+
bool starts_with(const std::string_view& a, const std::string_view& b) noexcept {
return a.length() >= b.length()
&& a.compare(0, b.length(), b) == 0;
@@ -154,6 +189,14 @@ std::optional<double> boost_parse_number(const std::string_view& input) {
return boost_parse_number(input, token);
}
+std::string_view std_parse_whitespace(const std::string_view& input) {
+ return parse_whitespace(input, StdWhitespaceMatcher{});
+}
+
+std::string_view boost_parse_whitespace(const std::string_view& input) {
+ return parse_whitespace(input, BoostWhitespaceMatcher{});
+}
+
}
std::optional<double> parse_number(const std::string_view& input, std::string_view& token) {
@@ -182,14 +225,7 @@ std::optional<token::Type> parse_const_token(const std::string_view& input) {
}
std::string_view parse_whitespace(const std::string_view& input) {
- static const boost::regex ws_regex{R"(^\s+)"};
-
- boost::match_results<std::string_view::const_iterator> match;
- if (boost::regex_search(input.cbegin(), input.cend(), match, ws_regex)) {
- return {&*match[0].first, static_cast<std::size_t>(match[0].length())};
- // ^ Still fucking hate C++.
- }
- return {};
+ return impl::boost_parse_whitespace(input);
}
}
diff --git a/server/lexer/details/parse.hpp b/server/lexer/details/parse.hpp
index 6a8688b..693dd35 100644
--- a/server/lexer/details/parse.hpp
+++ b/server/lexer/details/parse.hpp
@@ -13,11 +13,16 @@
namespace math::server::lexer::details {
namespace impl {
+// Exposed for benchmarking:
+
std::optional<double> std_parse_number(const std::string_view&, std::string_view&);
std::optional<double> std_parse_number(const std::string_view&);
std::optional<double> boost_parse_number(const std::string_view&, std::string_view&);
std::optional<double> boost_parse_number(const std::string_view&);
+std::string_view std_parse_whitespace(const std::string_view&);
+std::string_view boost_parse_whitespace(const std::string_view&);
+
}
// Exposed for testing: