From 609fac6e9259d1215ee318672ae58986ff0213e0 Mon Sep 17 00:00:00 2001
From: Egor Tensin <Egor.Tensin@gmail.com>
Date: Fri, 3 Jan 2020 05:23:35 +0300
Subject: lexer: refactor details/parse.cpp a bit

---
 server/lexer/details/parse.cpp | 139 ++++++++++++++++++++++++-----------------
 1 file changed, 82 insertions(+), 57 deletions(-)

(limited to 'server/lexer')

diff --git a/server/lexer/details/parse.cpp b/server/lexer/details/parse.cpp
index 01ef11c..5e9ce9f 100644
--- a/server/lexer/details/parse.cpp
+++ b/server/lexer/details/parse.cpp
@@ -20,80 +20,105 @@
 namespace math::server::lexer::details {
 namespace {
 
-std::cmatch std_regex_search(const std::string_view& input, const std::regex& regex) {
-    std::cmatch match;
-    {
-        const auto begin = input.data();
-        const auto end = begin + input.length();
-        std::regex_search(begin, end, match, regex);
+class RegexNumberMatcher {
+public:
+    bool match(const std::string_view& input) {
+        if (!basic_match(input)) {
+            return false;
+        }
+        // If we have the numeric part of a number followed by 'e' and no
+        // digits,
+        // 1) that 'e' definitely belongs to this number token,
+        // 2) the user forgot to type in the required digits.
+        if (matched_e() && !matched_e_power()) {
+            throw LexerError{"exponent has no digits: " + to_str()};
+        }
+        return true;
     }
-    return match;
-}
 
-boost::cmatch boost_regex_search(const std::string_view& input, const boost::regex& regex) {
-    boost::cmatch match;
-    {
+    virtual std::string_view get() const = 0;
+
+protected:
+    // This is a hacky attempt to describe a C-like grammar for floating-point
+    // numbers using a regex (the tests seem to pass though).
+    // A proper NFA would be better, I guess.
+    static constexpr std::string_view NUMBER_REGEX{R"REGEX(^(?:\d+(?:\.\d*)?|\.\d+)(e[+-]?(\d*))?)REGEX"};
+
+private:
+    virtual bool basic_match(const std::string_view& input) = 0;
+
+    virtual std::string to_str() const = 0;
+
+    virtual bool matched_e() const = 0;
+
+    virtual bool matched_e_power() const = 0;
+};
+
+class StdNumberMatcher : public RegexNumberMatcher {
+public:
+    std::string_view get() const override {
+        return {m_match[0].first, static_cast<std::size_t>(m_match[0].length())};
+    }
+
+private:
+    bool basic_match(const std::string_view& input) override {
         const auto begin = input.data();
         const auto end = begin + input.length();
-        boost::regex_search(begin, end, match, regex);
+        return std::regex_search(begin, end, m_match, get_regex());
     }
-    return match;
-}
 
-// CMatch is either std::cmatch or boost::cmatch.
-template <typename CMatch>
-void check_exponent(const CMatch& match) {
-    // If we have the numeric part of a number followed by 'e' and no digits,
-    // 1) that 'e' definitely belongs to this number token,
-    // 2) the user forgot to type in the required digits.
-    const auto& exponent = match[1];
-    const auto& abs_power = match[2];
-    if (exponent.matched && abs_power.matched && abs_power.length() == 0) {
-        throw LexerError{"exponent has no digits: " + match[0].str()};
+    static const std::regex& get_regex() {
+        static constexpr auto flags =
+            std::regex_constants::ECMAScript |
+            std::regex_constants::icase;
+        static const std::regex regex{NUMBER_REGEX.data(), NUMBER_REGEX.length(), flags};
+        return regex;
     }
-}
 
-// This is a hacky attempt to describe a C-like grammar for floating-point
-// numbers using a regex (the tests seem to pass though).
-// A proper NFA would be better, I guess.
-const std::string_view NUMBER_REGEX = R"REGEX(^(?:\d+(?:\.\d*)?|\.\d+)(e[+-]?(\d*))?)REGEX";
+    std::string to_str() const override { return m_match[0].str(); }
 
-std::string_view std_match_number(const std::string_view& input) {
-    static constexpr auto flags =
-        std::regex_constants::ECMAScript |
-        std::regex_constants::icase;
-    static const std::regex number_regex{NUMBER_REGEX.data(), NUMBER_REGEX.length(), flags};
+    bool matched_e() const override { return m_match[1].matched; }
 
-    const auto match = std_regex_search(input, number_regex);
-    if (match.empty()) {
-        return {};
+    bool matched_e_power() const override { return m_match[2].matched && m_match[2].length() != 0; }
+
+    std::cmatch m_match;
+};
+
+class BoostNumberMatcher : public RegexNumberMatcher {
+public:
+    std::string_view get() const override {
+        return {m_match[0].first, static_cast<std::size_t>(m_match[0].length())};
     }
-    check_exponent(match);
-    return {match[0].first, static_cast<std::size_t>(match[0].length())};
-}
 
-std::string_view boost_match_number(const std::string_view& input) {
-    static const boost::regex number_regex{NUMBER_REGEX.data(), NUMBER_REGEX.length(), boost::regex::icase};
+private:
+    bool basic_match(const std::string_view& input) override {
+        const auto begin = input.data();
+        const auto end = begin + input.length();
+        return boost::regex_search(begin, end, m_match, get_regex());
+    }
 
-    const auto match = boost_regex_search(input, number_regex);
-    if (match.empty()) {
-        return {};
+    static const boost::regex& get_regex() {
+        static constexpr boost::regex::flag_type flags =
+            boost::regex::ECMAScript |
+            boost::regex::icase;
+        static const boost::regex regex{NUMBER_REGEX.data(), NUMBER_REGEX.length(), flags};
+        return regex;
     }
-    check_exponent(match);
-    return {match[0].first, static_cast<std::size_t>(match[0].length())};
-}
 
-std::string_view match_number(const std::string_view& input) {
-    return std_match_number(input);
-}
+    std::string to_str() const override { return m_match[0].str(); }
+
+    bool matched_e() const override { return m_match[1].matched; }
+
+    bool matched_e_power() const override { return m_match[2].matched && m_match[2].length() != 0; }
 
-using NumberMatcher = std::function<std::string_view (const std::string_view&)>;
+    boost::cmatch m_match;
+};
 
-std::optional<double> parse_number(const std::string_view& input, const NumberMatcher& match, std::string_view& token) {
-    const auto view = match(input);
-    if (!view.data()) {
+std::optional<double> parse_number(const std::string_view& input, RegexNumberMatcher&& matcher, std::string_view& token) {
+    if (!matcher.match(input)) {
         return {};
     }
+    const auto view = matcher.get();
     try {
         const auto result = std::stod(std::string{view});
         token = view;
@@ -114,7 +139,7 @@ bool starts_with(const std::string_view& a, const std::string_view& b) noexcept
 namespace impl {
 
 std::optional<double> std_parse_number(const std::string_view& input, std::string_view& token) {
-    return parse_number(input, &std_match_number, token);
+    return parse_number(input, StdNumberMatcher{}, token);
 }
 
 std::optional<double> std_parse_number(const std::string_view& input) {
@@ -123,7 +148,7 @@ std::optional<double> std_parse_number(const std::string_view& input) {
 }
 
 std::optional<double> boost_parse_number(const std::string_view& input, std::string_view& token) {
-    return parse_number(input, &boost_match_number, token);
+    return parse_number(input, BoostNumberMatcher{}, token);
 }
 
 std::optional<double> boost_parse_number(const std::string_view& input) {
-- 
cgit v1.2.3