aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/server/lexer.hpp
diff options
context:
space:
mode:
Diffstat (limited to 'server/lexer.hpp')
-rw-r--r--server/lexer.hpp261
1 files changed, 0 insertions, 261 deletions
diff --git a/server/lexer.hpp b/server/lexer.hpp
deleted file mode 100644
index 8afe15c..0000000
--- a/server/lexer.hpp
+++ /dev/null
@@ -1,261 +0,0 @@
-#pragma once
-
-#include "error.hpp"
-
-#include <cmath>
-
-#include <exception>
-#include <functional>
-#include <limits>
-#include <optional>
-#include <regex>
-#include <string>
-#include <string_view>
-#include <unordered_map>
-#include <vector>
-
-namespace math::server {
-namespace lexer {
-
-class Error : public server::Error {
-public:
- explicit Error(const std::string &what)
- : server::Error{"lexer error: " + what}
- { }
-};
-
-class Token {
-public:
- enum class Type {
- LEFT_PAREN,
- RIGHT_PAREN,
- PLUS,
- MINUS,
- ASTERISK,
- SLASH,
- NUMBER,
- };
-
- explicit Token(Type type)
- : m_type{type}, m_number_value{nan()}
- { }
-
- explicit Token(double number_value)
- : m_type{Type::NUMBER}, m_number_value{number_value}
- { }
-
- bool operator==(const Token& other) const {
- return m_type == other.m_type
- && ((is_nan(m_number_value) && is_nan(other.m_number_value))
- || m_number_value == other.m_number_value);
- }
-
- bool operator!=(const Token& other) const { return !(*this == other); }
-
- Type get_type() const { return m_type; }
-
- double get_number_value() const {
- if (get_type() != Type::NUMBER) {
- throw Error{"token must be a number to query its value"};
- }
- return m_number_value;
- }
-
-private:
- static constexpr double nan() { return std::numeric_limits<double>::quiet_NaN(); }
-
- static bool is_nan(double x) { return std::isnan(x); }
-
- Type m_type;
- double m_number_value;
-};
-
-namespace details {
-
-inline std::string_view match_number(const std::string_view& input) {
- static constexpr std::regex::flag_type flags =
- std::regex_constants::ECMAScript |
- std::regex_constants::icase;
- // This is a hacky attempt to describe a C-like grammar for floating-point
- // numbers using a regex (the tests seem to pass though).
- // A proper NFA would be better, I guess.
- static const std::regex number_regex{R"REGEX(^(?:\d+(?:\.\d*)?|\.\d+)(e[+-]?(\d*))?)REGEX", flags};
-
- std::cmatch match;
- if (!std::regex_search(input.cbegin(), input.cend(), match, number_regex)) {
- return {};
- }
- // If we have the numeric part of a number followed by 'e' and no digits,
- // 1) that 'e' definitely belongs to this number token,
- // 2) the user forgot to type in the required digits.
- const auto& exponent = match[1];
- const auto& abs_power = match[2];
- if (exponent.matched && abs_power.matched && abs_power.length() == 0) {
- throw lexer::Error{"exponent has no digits: " + match[0].str()};
- }
- return {match[0].first, match[0].length()};
-}
-
-inline std::optional<double> parse_number(const std::string_view& input, std::string_view& token) {
- const auto view = match_number(input);
- if (!view.data()) {
- return {};
- }
- try {
- const auto result = std::stod(std::string{view});
- token = view;
- return result;
- } catch (const std::exception& e) {
- throw lexer::Error{"couldn't parse number from: " + std::string{view}};
- }
- return {};
-}
-
-inline std::optional<double> parse_number(const std::string_view& input) {
- std::string_view token;
- return parse_number(input, token);
-}
-
-inline bool starts_with(const std::string_view& a, const std::string_view& b) noexcept {
- return a.length() >= b.length()
- && a.compare(0, b.length(), b) == 0;
-}
-
-inline std::optional<Token::Type> parse_const_token(const std::string_view& input, std::string_view& token) {
- // FIXME: Potentially error-prone if there's const token A which is a
- // prefix of token B (if the map is not sorted, we'd parse token A, when it
- // could've been token B).
- // Can be solved by sorting the keys accordingly.
-
- static const std::unordered_map<std::string_view, Token::Type> const_tokens{
- {"(", Token::Type::LEFT_PAREN},
- {")", Token::Type::RIGHT_PAREN},
- {"+", Token::Type::PLUS},
- {"-", Token::Type::MINUS},
- {"*", Token::Type::ASTERISK},
- {"/", Token::Type::SLASH},
- };
-
- for (const auto& it : const_tokens) {
- const auto& str = it.first;
- const auto& type = it.second;
-
- if (starts_with(input, str)) {
- token = input.substr(0, str.length());
- return type;
- }
- }
-
- return {};
-}
-
-inline std::optional<Token::Type> parse_const_token(const std::string_view& input) {
- std::string_view token;
- return parse_const_token(input, token);
-}
-
-inline std::string_view parse_whitespace(const std::string_view& input) {
- static const std::regex ws_regex{R"(\s*)"};
-
- std::cmatch match;
- if (std::regex_search(input.cbegin(), input.cend(), match, ws_regex)) {
- return {match[0].first, match[0].length()};
- }
- return {};
-}
-
-}
-
-}
-
-class Lexer {
-public:
- explicit Lexer(const std::string_view& input)
- : m_input{input} {
- }
-
- using TokenProcessor = std::function<bool (const lexer::Token&)>;
-
- bool for_each_token(const TokenProcessor& process) {
- parse_token();
- for (auto token = peek_token(); token.has_value(); drop_token(), token = peek_token()) {
- if (!process(*token)) {
- return false;
- }
- }
- return true;
- }
-
- std::vector<lexer::Token> get_tokens() {
- std::vector<lexer::Token> tokens;
- for_each_token([&tokens] (const lexer::Token& token) {
- tokens.emplace_back(token);
- return true;
- });
- return tokens;
- }
-
- void parse_token() {
- if (m_input.length() == 0) {
- return;
- }
- std::string_view token_view;
- m_token_buffer = parse_token(token_view);
- if (m_token_buffer.has_value()) {
- m_input.remove_prefix(token_view.length());
- }
- }
-
- bool has_token() const {
- return peek_token().has_value();
- }
-
- std::optional<lexer::Token> peek_token() const {
- return m_token_buffer;
- }
-
- void drop_token() {
- if (!has_token()) {
- throw lexer::Error{"internal: no tokens to drop"};
- }
- m_token_buffer = {};
- parse_token();
- }
-
- std::optional<lexer::Token> drop_token_if(lexer::Token::Type type) {
- if (!has_token()) {
- throw lexer::Error{"internal: no tokens to drop"};
- }
- if (m_token_buffer.value().get_type() != type) {
- return {};
- }
- const auto result = m_token_buffer;
- drop_token();
- return result;
- }
-
-private:
- void consume_whitespace() {
- const auto ws = lexer::details::parse_whitespace(m_input);
- m_input.remove_prefix(ws.length());
- }
-
- std::optional<lexer::Token> parse_token(std::string_view& token_view) {
- consume_whitespace();
- if (m_input.length() == 0) {
- return {};
- }
- if (const auto const_token = lexer::details::parse_const_token(m_input, token_view); const_token.has_value()) {
- return lexer::Token{*const_token};
- }
- if (const auto number = lexer::details::parse_number(m_input, token_view); number.has_value()) {
- return lexer::Token{*number};
- }
- throw lexer::Error{"invalid input at: " + std::string{m_input}};
- }
-
- std::string_view m_input;
- std::optional<lexer::Token> m_token_buffer;
-};
-
-}