aboutsummaryrefslogblamecommitdiffstatshomepage
path: root/server/lexer/details/parse.cpp
blob: ccabb7e9242b92405a2c005e56007c63f1cc302c (plain) (tree)
1
2
3
4
5
6
7
8
9
10







                                                               

                          
                  








                                        
                                                                  
                                                     

                                 

                    











                                                                                   
                          

  
                                 
                                                               

                                               
                                        






                                                                         
                                                                          

                    
     
 



                                                                              

                                                               

        
                                                               
 


                                                                          

  

                                                                                       
       

                                                                                     
     
 
        

                                          
                                                                           

                                                                                         
     

  

                                                                                         
       

                                                                                       
     
 
        

                                                        
                                                           

                                                                                           
     
  
 
                                 


                                                                               
                                

                  
                                        









                                                                                       
                                 

                                                                   


                                                          

                                                                                           












                                                                                     

                                                                                             












                                                                                       
                                 

                                                                                    





                                     
                                                                                 
                                                                        

 
              



                                                                                                
                                                          







                                                                                                  
                                                            






                                                                         







                                                                        
                   

                                                                                            
                                                  

 




                                                                   

                                                                           















                                                                             
                                               

 
                                           
// Copyright (c) 2020 Egor Tensin <Egor.Tensin@gmail.com>
// This file is part of the "math-server" project.
// For details, see https://github.com/egor-tensin/math-server.
// Distributed under the MIT License.

#include "../error.hpp"
#include "../token_type.hpp"

#include <boost/regex.hpp>

#include <cstddef>
#include <exception>
#include <optional>
#include <regex>
#include <string>
#include <string_view>

namespace math::server::lexer::details {
namespace {

// This approach gives GCC on Travis an "internal compiler error":
// template <template <typename> class MatchResultsT>

template <typename MatchResultsT>
class RegexMatcher {
public:
    virtual ~RegexMatcher() = default;

    virtual bool match_regex(const std::string_view& input) = 0;

    std::string_view to_view() const {
        return {&*m_match[0].first, static_cast<std::size_t>(m_match[0].length())};
        //      ^ I fucking hate C++.
    }

    std::string to_str() const { return m_match[0].str(); }

protected:
    MatchResultsT m_match;
};

template <typename MatchResultsT>
class RegexNumberMatcher : public RegexMatcher<MatchResultsT> {
public:
    bool match(const std::string_view& input) {
        if (!this->match_regex(input)) {
            return false;
        }
        // If we have the numeric part of a number followed by 'e' and no
        // digits,
        // 1) that 'e' definitely belongs to this number token,
        // 2) the user forgot to type in the required digits.
        if (matched_e() && !matched_e_power()) {
            throw LexerError{"exponent has no digits: " + this->to_str()};
        }
        return true;
    }

protected:
    // This is a hacky attempt to describe a C-like grammar for floating-point
    // numbers using a regex (the tests seem to pass though).
    // A proper NFA would be better, I guess.
    static constexpr std::string_view NUMBER_REGEX{
        R"REGEX(^(?:\d+(?:\.\d*)?|\.\d+)(e[+-]?(\d*))?)REGEX"};

private:
    bool matched_e() const { return this->m_match[1].matched; }

    bool matched_e_power() const {
        return this->m_match[2].matched && this->m_match[2].length() != 0;
    }
};

class StdNumberMatcher
    : public RegexNumberMatcher<std::match_results<std::string_view::const_iterator>> {
public:
    bool match_regex(const std::string_view& input) override {
        return std::regex_search(input.cbegin(), input.cend(), m_match, get_regex());
    }

private:
    static const std::regex& get_regex() {
        static constexpr auto flags =
            std::regex_constants::ECMAScript | std::regex_constants::icase;
        static const std::regex regex{NUMBER_REGEX.data(), NUMBER_REGEX.length(), flags};
        return regex;
    }
};

class BoostNumberMatcher
    : public RegexNumberMatcher<boost::match_results<std::string_view::const_iterator>> {
public:
    bool match_regex(const std::string_view& input) override {
        return boost::regex_search(input.cbegin(), input.cend(), m_match, get_regex());
    }

private:
    static const boost::regex& get_regex() {
        static constexpr boost::regex::flag_type flags =
            boost::regex::ECMAScript | boost::regex::icase;
        static const boost::regex regex{NUMBER_REGEX.data(), NUMBER_REGEX.length(), flags};
        return regex;
    }
};

template <typename MatchResultsT>
std::optional<double> parse_number(const std::string_view& input,
                                   RegexNumberMatcher<MatchResultsT>&& matcher,
                                   std::string_view& token) {
    if (!matcher.match(input)) {
        return {};
    }
    const auto view = matcher.to_view();
    try {
        const auto result = std::stod(std::string{view});
        token = view;
        return result;
    } catch (const std::exception&) {
        throw LexerError{"internal: couldn't parse number from: " + std::string{view}};
    }
    return {};
}

template <typename MatchResultsT>
class RegexWhitespaceMatcher : public RegexMatcher<MatchResultsT> {
protected:
    static constexpr std::string_view WS_REGEX{R"(^\s+)"};
};

class StdWhitespaceMatcher
    : public RegexWhitespaceMatcher<std::match_results<std::string_view::const_iterator>> {
public:
    bool match_regex(const std::string_view& input) override {
        return std::regex_search(input.cbegin(), input.cend(), m_match, get_regex());
    }

private:
    static const std::regex& get_regex() {
        static constexpr auto flags = std::regex_constants::ECMAScript;
        static const std::regex regex{WS_REGEX.data(), WS_REGEX.length(), flags};
        return regex;
    }
};

class BoostWhitespaceMatcher
    : public RegexWhitespaceMatcher<boost::match_results<std::string_view::const_iterator>> {
public:
    bool match_regex(const std::string_view& input) override {
        return boost::regex_search(input.cbegin(), input.cend(), m_match, get_regex());
    }

private:
    static const boost::regex& get_regex() {
        static constexpr boost::regex::flag_type flags = boost::regex::ECMAScript;
        static const boost::regex regex{WS_REGEX.data(), WS_REGEX.length(), flags};
        return regex;
    }
};

template <typename MatchResultsT>
std::string_view parse_whitespace(const std::string_view& input,
                                  RegexWhitespaceMatcher<MatchResultsT>&& matcher) {
    if (matcher.match_regex(input)) {
        return matcher.to_view();
    }
    return {};
}

bool starts_with(const std::string_view& a, const std::string_view& b) noexcept {
    return a.length() >= b.length() && a.compare(0, b.length(), b) == 0;
}

} // namespace

namespace impl {

std::optional<double> std_parse_number(const std::string_view& input, std::string_view& token) {
    return parse_number(input, StdNumberMatcher{}, token);
}

std::optional<double> std_parse_number(const std::string_view& input) {
    std::string_view token;
    return std_parse_number(input, token);
}

std::optional<double> boost_parse_number(const std::string_view& input, std::string_view& token) {
    return parse_number(input, BoostNumberMatcher{}, token);
}

std::optional<double> boost_parse_number(const std::string_view& input) {
    std::string_view token;
    return boost_parse_number(input, token);
}

std::string_view std_parse_whitespace(const std::string_view& input) {
    return parse_whitespace(input, StdWhitespaceMatcher{});
}

std::string_view boost_parse_whitespace(const std::string_view& input) {
    return parse_whitespace(input, BoostWhitespaceMatcher{});
}

} // namespace impl

std::optional<double> parse_number(const std::string_view& input, std::string_view& token) {
    return impl::boost_parse_number(input, token);
}

std::optional<double> parse_number(const std::string_view& input) {
    std::string_view token;
    return parse_number(input, token);
}

std::optional<token::Type> parse_const_token(const std::string_view& input,
                                             std::string_view& token) {
    for (const auto type : token::const_tokens()) {
        const auto str = token::type_to_string(type);
        if (starts_with(input, str)) {
            token = std::string_view(input.data(), str.length());
            return {type};
        }
    }
    return {};
}

std::optional<token::Type> parse_const_token(const std::string_view& input) {
    std::string_view token;
    return parse_const_token(input, token);
}

std::string_view parse_whitespace(const std::string_view& input) {
    return impl::boost_parse_whitespace(input);
}

} // namespace math::server::lexer::details