From ad748449fb2d6154f0d7677f2a933aa29919075b Mon Sep 17 00:00:00 2001 From: Egor Tensin Date: Fri, 3 Jan 2020 00:58:56 +0300 Subject: lexer: split out testing-exposed functions --- server/lexer/details/parse.cpp | 107 +++++++++++++++++++++++++++++++++++++++++ server/lexer/details/parse.hpp | 22 +++++++++ 2 files changed, 129 insertions(+) create mode 100644 server/lexer/details/parse.cpp create mode 100644 server/lexer/details/parse.hpp (limited to 'server/lexer/details') diff --git a/server/lexer/details/parse.cpp b/server/lexer/details/parse.cpp new file mode 100644 index 0000000..464dfb2 --- /dev/null +++ b/server/lexer/details/parse.cpp @@ -0,0 +1,107 @@ +// Copyright (c) 2020 Egor Tensin +// This file is part of the "math-server" project. +// For details, see https://github.com/egor-tensin/math-server. +// Distributed under the MIT License. + +#include "../error.hpp" +#include "../token_type.hpp" + +#include + +#include +#include +#include +#include +#include + +namespace math::server::lexer::details { +namespace { + +std::string_view match_number(const std::string_view& input) { + static constexpr std::regex::flag_type flags = + std::regex_constants::ECMAScript | + std::regex_constants::icase; + // This is a hacky attempt to describe a C-like grammar for floating-point + // numbers using a regex (the tests seem to pass though). + // A proper NFA would be better, I guess. + static const std::regex number_regex{R"REGEX(^(?:\d+(?:\.\d*)?|\.\d+)(e[+-]?(\d*))?)REGEX", flags}; + + std::cmatch match; + { + const auto begin = input.data(); + const auto end = begin + input.length(); + if (!std::regex_search(begin, end, match, number_regex)) { + return {}; + } + } + { + // If we have the numeric part of a number followed by 'e' and no digits, + // 1) that 'e' definitely belongs to this number token, + // 2) the user forgot to type in the required digits. + const auto& exponent = match[1]; + const auto& abs_power = match[2]; + if (exponent.matched && abs_power.matched && abs_power.length() == 0) { + throw LexerError{"exponent has no digits: " + match[0].str()}; + } + } + return {match[0].first, static_cast(match[0].length())}; +} + +bool starts_with(const std::string_view& a, const std::string_view& b) noexcept { + return a.length() >= b.length() + && a.compare(0, b.length(), b) == 0; +} + +} + +std::optional parse_number(const std::string_view& input, std::string_view& token) { + const auto view = match_number(input); + if (!view.data()) { + return {}; + } + try { + const auto result = std::stod(std::string{view}); + token = view; + return result; + } catch (const std::exception&) { + throw LexerError{"internal: couldn't parse number from: " + std::string{view}}; + } + return {}; +} + +std::optional parse_number(const std::string_view& input) { + std::string_view token; + return parse_number(input, token); +} + +std::optional parse_const_token(const std::string_view& input, std::string_view& token) { + for (const auto type : token::const_tokens()) { + const auto str = token::type_to_string(type); + if (starts_with(input, str)) { + token = std::string_view(input.data(), str.length()); + return {type}; + } + } + return {}; +} + +std::optional parse_const_token(const std::string_view& input) { + std::string_view token; + return parse_const_token(input, token); +} + +std::string_view parse_whitespace(const std::string_view& input) { + static const std::regex ws_regex{R"(^\s+)"}; + + std::cmatch match; + { + const auto begin = input.data(); + const auto end = begin + input.length(); + if (std::regex_search(begin, end, match, ws_regex)) { + return std::string_view(match[0].first, match[0].length()); + } + } + return {}; +} + +} diff --git a/server/lexer/details/parse.hpp b/server/lexer/details/parse.hpp new file mode 100644 index 0000000..72da234 --- /dev/null +++ b/server/lexer/details/parse.hpp @@ -0,0 +1,22 @@ +// Copyright (c) 2020 Egor Tensin +// This file is part of the "math-server" project. +// For details, see https://github.com/egor-tensin/math-server. +// Distributed under the MIT License. + +#pragma once + +#include "../token_type.hpp" + +#include +#include + +namespace math::server::lexer::details { + +// Exposed for testing: +std::string_view parse_whitespace(const std::string_view&); +std::optional parse_number(const std::string_view&, std::string_view&); +std::optional parse_number(const std::string_view&); +std::optional parse_const_token(const std::string_view&, std::string_view&); +std::optional parse_const_token(const std::string_view&); + +} -- cgit v1.2.3