Compiler/lexical analyzer: Difference between revisions

Content deleted Content added
added C++ implementation
m bug fixes and minor improvements
Line 1,631: Line 1,631:
#include <string>
#include <string>
#include <string_view>
#include <string_view>
#include <utility> // std::forward
#include <variant> // TokenVal
#include <variant> // TokenVal


Line 1,667: Line 1,668:


template <class F>
template <class F>
void with_IO (string source, string destination, F f)
void with_IO (string source, string destination, F&& f)
{
{
string input;
string input;
Line 1,674: Line 1,675:
else input = file_to_string(source);
else input = file_to_string(source);


string output = invoke(f, input);
string output = invoke(forward<F>(f), input);


if (destination == "stdout") cout << output;
if (destination == "stdout") cout << output;
Line 1,701: Line 1,702:
int column = 1;
int column = 1;


Scanner (string_view source) : pos {source.data()} {}
Scanner (const char* source) : pos {source} {}


inline char peek () { return *pos; }
inline char peek () { return *pos; }
Line 1,760: Line 1,761:
"Op_not", "Op_assign", "Op_and", "Op_or",
"Op_not", "Op_assign", "Op_and", "Op_or",
"LeftParen", "RightParen", "LeftBrace", "RightBrace", "Semicolon", "Comma",
"LeftParen", "RightParen", "LeftBrace", "RightBrace", "Semicolon", "Comma",
"Keyword_if", "Keyword_else", "Keyword_while", "Keyword_print", "Keyword_putc"
"Keyword_if", "Keyword_else", "Keyword_while", "Keyword_print", "Keyword_putc",
"Identifier", "Integer", "String",
"End_of_input", "Error"
};
};


Line 1,794: Line 1,797:
{
{
public:
public:
Lexer (string_view source) : s {source.data()}, pre_state {s} {}
Lexer (const char* source) : s {source}, pre_state {s} {}


bool has_more () { return s.peek() != '\0'; }
bool has_more () { return s.peek() != '\0'; }
Line 1,847: Line 1,850:


ostringstream msg;
ostringstream msg;
(msg << ... << ostream_args) << '\n'
(msg << ... << forward<Args>(ostream_args)) << '\n'
<< string(28, ' ') << "(" << s.line << ", " << s.column << "): " << code;
<< string(28, ' ') << "(" << s.line << ", " << s.column << "): " << code;


Line 1,948: Line 1,951:




inline bool is_id_start (char c) { return isalpha(static_cast<unsigned char>(c)) || c == '_'; }
static inline bool is_id_start (char c) { return isalpha(static_cast<unsigned char>(c)) || c == '_'; }
inline bool is_id_end (char c) { return isalnum(static_cast<unsigned char>(c)) || c == '_'; }
static inline bool is_id_end (char c) { return isalnum(static_cast<unsigned char>(c)) || c == '_'; }
inline bool is_digit (char c) { return isdigit(static_cast<unsigned char>(c)); }
static inline bool is_digit (char c) { return isdigit(static_cast<unsigned char>(c)); }




Line 1,998: Line 2,001:
string out = (argc > 2) ? argv[2] : "stdout";
string out = (argc > 2) ? argv[2] : "stdout";


with_IO(in, out, [] (string input)
with_IO(in, out, [](string input)
{
{
Lexer lexer {input};
Lexer lexer {input.data()};


string s = "Location Token name Value\n"
string s = "Location Token name Value\n"