From 609fac6e9259d1215ee318672ae58986ff0213e0 Mon Sep 17 00:00:00 2001 From: Egor Tensin Date: Fri, 3 Jan 2020 05:23:35 +0300 Subject: lexer: refactor details/parse.cpp a bit --- server/lexer/details/parse.cpp | 139 ++++++++++++++++++++++++----------------- 1 file changed, 82 insertions(+), 57 deletions(-) (limited to 'server') diff --git a/server/lexer/details/parse.cpp b/server/lexer/details/parse.cpp index 01ef11c..5e9ce9f 100644 --- a/server/lexer/details/parse.cpp +++ b/server/lexer/details/parse.cpp @@ -20,80 +20,105 @@ namespace math::server::lexer::details { namespace { -std::cmatch std_regex_search(const std::string_view& input, const std::regex& regex) { - std::cmatch match; - { - const auto begin = input.data(); - const auto end = begin + input.length(); - std::regex_search(begin, end, match, regex); +class RegexNumberMatcher { +public: + bool match(const std::string_view& input) { + if (!basic_match(input)) { + return false; + } + // If we have the numeric part of a number followed by 'e' and no + // digits, + // 1) that 'e' definitely belongs to this number token, + // 2) the user forgot to type in the required digits. + if (matched_e() && !matched_e_power()) { + throw LexerError{"exponent has no digits: " + to_str()}; + } + return true; } - return match; -} -boost::cmatch boost_regex_search(const std::string_view& input, const boost::regex& regex) { - boost::cmatch match; - { + virtual std::string_view get() const = 0; + +protected: + // This is a hacky attempt to describe a C-like grammar for floating-point + // numbers using a regex (the tests seem to pass though). + // A proper NFA would be better, I guess. + static constexpr std::string_view NUMBER_REGEX{R"REGEX(^(?:\d+(?:\.\d*)?|\.\d+)(e[+-]?(\d*))?)REGEX"}; + +private: + virtual bool basic_match(const std::string_view& input) = 0; + + virtual std::string to_str() const = 0; + + virtual bool matched_e() const = 0; + + virtual bool matched_e_power() const = 0; +}; + +class StdNumberMatcher : public RegexNumberMatcher { +public: + std::string_view get() const override { + return {m_match[0].first, static_cast(m_match[0].length())}; + } + +private: + bool basic_match(const std::string_view& input) override { const auto begin = input.data(); const auto end = begin + input.length(); - boost::regex_search(begin, end, match, regex); + return std::regex_search(begin, end, m_match, get_regex()); } - return match; -} -// CMatch is either std::cmatch or boost::cmatch. -template -void check_exponent(const CMatch& match) { - // If we have the numeric part of a number followed by 'e' and no digits, - // 1) that 'e' definitely belongs to this number token, - // 2) the user forgot to type in the required digits. - const auto& exponent = match[1]; - const auto& abs_power = match[2]; - if (exponent.matched && abs_power.matched && abs_power.length() == 0) { - throw LexerError{"exponent has no digits: " + match[0].str()}; + static const std::regex& get_regex() { + static constexpr auto flags = + std::regex_constants::ECMAScript | + std::regex_constants::icase; + static const std::regex regex{NUMBER_REGEX.data(), NUMBER_REGEX.length(), flags}; + return regex; } -} -// This is a hacky attempt to describe a C-like grammar for floating-point -// numbers using a regex (the tests seem to pass though). -// A proper NFA would be better, I guess. -const std::string_view NUMBER_REGEX = R"REGEX(^(?:\d+(?:\.\d*)?|\.\d+)(e[+-]?(\d*))?)REGEX"; + std::string to_str() const override { return m_match[0].str(); } -std::string_view std_match_number(const std::string_view& input) { - static constexpr auto flags = - std::regex_constants::ECMAScript | - std::regex_constants::icase; - static const std::regex number_regex{NUMBER_REGEX.data(), NUMBER_REGEX.length(), flags}; + bool matched_e() const override { return m_match[1].matched; } - const auto match = std_regex_search(input, number_regex); - if (match.empty()) { - return {}; + bool matched_e_power() const override { return m_match[2].matched && m_match[2].length() != 0; } + + std::cmatch m_match; +}; + +class BoostNumberMatcher : public RegexNumberMatcher { +public: + std::string_view get() const override { + return {m_match[0].first, static_cast(m_match[0].length())}; } - check_exponent(match); - return {match[0].first, static_cast(match[0].length())}; -} -std::string_view boost_match_number(const std::string_view& input) { - static const boost::regex number_regex{NUMBER_REGEX.data(), NUMBER_REGEX.length(), boost::regex::icase}; +private: + bool basic_match(const std::string_view& input) override { + const auto begin = input.data(); + const auto end = begin + input.length(); + return boost::regex_search(begin, end, m_match, get_regex()); + } - const auto match = boost_regex_search(input, number_regex); - if (match.empty()) { - return {}; + static const boost::regex& get_regex() { + static constexpr boost::regex::flag_type flags = + boost::regex::ECMAScript | + boost::regex::icase; + static const boost::regex regex{NUMBER_REGEX.data(), NUMBER_REGEX.length(), flags}; + return regex; } - check_exponent(match); - return {match[0].first, static_cast(match[0].length())}; -} -std::string_view match_number(const std::string_view& input) { - return std_match_number(input); -} + std::string to_str() const override { return m_match[0].str(); } + + bool matched_e() const override { return m_match[1].matched; } + + bool matched_e_power() const override { return m_match[2].matched && m_match[2].length() != 0; } -using NumberMatcher = std::function; + boost::cmatch m_match; +}; -std::optional parse_number(const std::string_view& input, const NumberMatcher& match, std::string_view& token) { - const auto view = match(input); - if (!view.data()) { +std::optional parse_number(const std::string_view& input, RegexNumberMatcher&& matcher, std::string_view& token) { + if (!matcher.match(input)) { return {}; } + const auto view = matcher.get(); try { const auto result = std::stod(std::string{view}); token = view; @@ -114,7 +139,7 @@ bool starts_with(const std::string_view& a, const std::string_view& b) noexcept namespace impl { std::optional std_parse_number(const std::string_view& input, std::string_view& token) { - return parse_number(input, &std_match_number, token); + return parse_number(input, StdNumberMatcher{}, token); } std::optional std_parse_number(const std::string_view& input) { @@ -123,7 +148,7 @@ std::optional std_parse_number(const std::string_view& input) { } std::optional boost_parse_number(const std::string_view& input, std::string_view& token) { - return parse_number(input, &boost_match_number, token); + return parse_number(input, BoostNumberMatcher{}, token); } std::optional boost_parse_number(const std::string_view& input) { -- cgit v1.2.3