Allow dynamic theming and improve ruby parser
This commit is contained in:
@@ -32,8 +32,8 @@ Editor *new_editor(const char *filename_arg, Coord position, Coord size) {
|
||||
if (editor->lang.name != "unknown")
|
||||
editor->parser = new Parser(editor->root, &editor->knot_mtx,
|
||||
editor->lang.name, size.row + 5);
|
||||
// if (len <= (1024 * 28))
|
||||
// request_add_to_lsp(editor->lang, editor);
|
||||
if (len <= (1024 * 28))
|
||||
request_add_to_lsp(editor->lang, editor);
|
||||
editor->indents.compute_indent(editor);
|
||||
return editor;
|
||||
}
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
#include "editor/editor.h"
|
||||
#include "main.h"
|
||||
#include "syntax/decl.h"
|
||||
#include "syntax/parser.h"
|
||||
|
||||
void render_editor(Editor *editor) {
|
||||
uint32_t sel_start = 0, sel_end = 0;
|
||||
@@ -23,6 +25,15 @@ void render_editor(Editor *editor) {
|
||||
std::unique_lock<std::mutex> lock;
|
||||
if (editor->parser)
|
||||
lock = std::unique_lock<std::mutex>(editor->parser->mutex);
|
||||
LineData *line_data = nullptr;
|
||||
auto get_type = [&](uint32_t col) {
|
||||
if (!line_data)
|
||||
return 0;
|
||||
for (auto const &token : line_data->tokens)
|
||||
if (token.start <= col && token.end > col)
|
||||
return (int)token.type;
|
||||
return 0;
|
||||
};
|
||||
std::shared_lock knot_lock(editor->knot_mtx);
|
||||
if (editor->selection_active) {
|
||||
Coord start, end;
|
||||
@@ -82,6 +93,10 @@ void render_editor(Editor *editor) {
|
||||
while (rendered_rows < editor->size.row) {
|
||||
uint32_t line_len;
|
||||
char *line = next_line(it, &line_len);
|
||||
if (line_data)
|
||||
line_data = editor->parser->line_tree.next();
|
||||
else
|
||||
line_data = editor->parser->line_tree.start_iter(line_index);
|
||||
if (!line)
|
||||
break;
|
||||
if (line_len > 0 && line[line_len - 1] == '\n')
|
||||
@@ -140,9 +155,8 @@ void render_editor(Editor *editor) {
|
||||
uint32_t absolute_byte_pos =
|
||||
global_byte_offset + current_byte_offset + local_render_offset;
|
||||
const Highlight *hl = nullptr;
|
||||
if (editor->parser && editor->parser->line_data.size() > line_index)
|
||||
hl = &highlight_map.at(editor->parser->get_type(
|
||||
{line_index, current_byte_offset + local_render_offset}));
|
||||
if (editor->parser)
|
||||
hl = &highlights[get_type(current_byte_offset + local_render_offset)];
|
||||
uint32_t fg = hl ? hl->fg : 0xFFFFFF;
|
||||
uint32_t bg = hl ? hl->bg : 0;
|
||||
uint8_t fl = hl ? hl->flags : 0;
|
||||
|
||||
@@ -16,19 +16,11 @@ static bool init_lsp(std::shared_ptr<LSPInstance> lsp) {
|
||||
if (pid == 0) {
|
||||
dup2(in_pipe[0], STDIN_FILENO);
|
||||
dup2(out_pipe[1], STDOUT_FILENO);
|
||||
#ifdef __clang__
|
||||
int devnull = open("/dev/null", O_WRONLY);
|
||||
if (devnull >= 0) {
|
||||
dup2(devnull, STDERR_FILENO);
|
||||
close(devnull);
|
||||
}
|
||||
#else
|
||||
int log = open("/tmp/lsp.log", O_WRONLY | O_CREAT | O_TRUNC, 0644);
|
||||
if (log >= 0) {
|
||||
dup2(log, STDERR_FILENO);
|
||||
close(log);
|
||||
}
|
||||
#endif
|
||||
close(in_pipe[0]);
|
||||
close(in_pipe[1]);
|
||||
close(out_pipe[0]);
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
#include "editor/editor.h"
|
||||
#include "io/sysio.h"
|
||||
#include "lsp/lsp.h"
|
||||
#include "syntax/decl.h"
|
||||
#include "ui/bar.h"
|
||||
#include "utils/utils.h"
|
||||
|
||||
@@ -61,6 +62,8 @@ int main(int argc, char *argv[]) {
|
||||
|
||||
system(("bash " + get_exe_dir() + "/../scripts/init.sh").c_str());
|
||||
|
||||
load_theme(get_exe_dir() + "/../themes/default.json");
|
||||
|
||||
Editor *editor = new_editor(filename, {0, 0}, {screen.row - 2, screen.col});
|
||||
Bar bar(screen);
|
||||
|
||||
|
||||
73
src/syntax/bash.cc
Normal file
73
src/syntax/bash.cc
Normal file
@@ -0,0 +1,73 @@
|
||||
#include "syntax/decl.h"
|
||||
#include "syntax/langs.h"
|
||||
#include "utils/utils.h"
|
||||
|
||||
struct BashFullState {
|
||||
int brace_level = 0;
|
||||
|
||||
enum : uint8_t { NONE, STRING, HEREDOC };
|
||||
uint8_t in_state = BashFullState::NONE;
|
||||
|
||||
bool line_cont = false;
|
||||
|
||||
struct Lit {
|
||||
std::string delim = "";
|
||||
int brace_level = 1;
|
||||
bool allow_interp = false;
|
||||
|
||||
bool operator==(const BashFullState::Lit &other) const {
|
||||
return delim == other.delim && brace_level == other.brace_level &&
|
||||
allow_interp == other.allow_interp;
|
||||
}
|
||||
} lit;
|
||||
|
||||
bool operator==(const BashFullState &other) const {
|
||||
return in_state == other.in_state && lit == other.lit &&
|
||||
brace_level == other.brace_level && line_cont == other.line_cont;
|
||||
}
|
||||
};
|
||||
|
||||
struct BashState {
|
||||
using full_state_type = BashFullState;
|
||||
|
||||
int interp_level = 0;
|
||||
std::stack<std::shared_ptr<BashFullState>> interp_stack;
|
||||
std::shared_ptr<BashFullState> full_state;
|
||||
|
||||
bool operator==(const BashState &other) const {
|
||||
return interp_level == other.interp_level &&
|
||||
interp_stack == other.interp_stack &&
|
||||
((full_state && other.full_state &&
|
||||
*full_state == *other.full_state));
|
||||
}
|
||||
};
|
||||
|
||||
bool bash_state_match(std::shared_ptr<void> state_1,
|
||||
std::shared_ptr<void> state_2) {
|
||||
if (!state_1 || !state_2)
|
||||
return false;
|
||||
return *std::static_pointer_cast<BashState>(state_1) ==
|
||||
*std::static_pointer_cast<BashState>(state_2);
|
||||
}
|
||||
|
||||
std::shared_ptr<void> bash_parse(std::vector<Token> *tokens,
|
||||
std::shared_ptr<void> in_state,
|
||||
const char *text, uint32_t len) {
|
||||
static bool keywords_trie_init = false;
|
||||
if (!keywords_trie_init) {
|
||||
keywords_trie_init = true;
|
||||
}
|
||||
tokens->clear();
|
||||
auto state = ensure_state(std::static_pointer_cast<BashState>(in_state));
|
||||
uint32_t i = 0;
|
||||
while (len > 0 && (text[len - 1] == '\n' || text[len - 1] == '\r' ||
|
||||
text[len - 1] == '\t' || text[len - 1] == ' '))
|
||||
len--;
|
||||
if (len == 0)
|
||||
return state;
|
||||
bool heredoc_first = false;
|
||||
while (i < len) {
|
||||
i += utf8_codepoint_width(text[i]);
|
||||
}
|
||||
return state;
|
||||
}
|
||||
@@ -1,12 +1,14 @@
|
||||
#include "syntax/parser.h"
|
||||
#include "io/knot.h"
|
||||
#include "main.h"
|
||||
#include "syntax/decl.h"
|
||||
#include "syntax/langs.h"
|
||||
#include "syntax/parser.h"
|
||||
|
||||
std::array<Highlight, TOKEN_KIND_COUNT> highlights = {};
|
||||
|
||||
Parser::Parser(Knot *n_root, std::shared_mutex *n_knot_mutex,
|
||||
std::string n_lang, uint32_t n_scroll_max) {
|
||||
scroll_max = n_scroll_max;
|
||||
line_data.reserve(n_root->line_count + 1);
|
||||
knot_mutex = n_knot_mutex;
|
||||
lang = n_lang;
|
||||
auto pair = parsers.find(n_lang);
|
||||
@@ -24,11 +26,9 @@ void Parser::edit(Knot *n_root, uint32_t start_line, uint32_t old_end_line,
|
||||
std::lock_guard lock(data_mutex);
|
||||
root = n_root;
|
||||
if (((int64_t)old_end_line - (int64_t)start_line) > 0)
|
||||
line_data.erase(line_data.begin() + start_line,
|
||||
line_data.begin() + start_line + old_end_line - start_line);
|
||||
line_tree.erase(start_line + 1, old_end_line - start_line);
|
||||
if (((int64_t)new_end_line - (int64_t)old_end_line) > 0)
|
||||
line_data.insert(line_data.begin() + start_line,
|
||||
new_end_line - old_end_line, LineData{});
|
||||
line_tree.insert(start_line + 1, new_end_line - start_line);
|
||||
dirty_lines.insert(start_line);
|
||||
}
|
||||
|
||||
@@ -42,16 +42,18 @@ void Parser::work() {
|
||||
tmp_dirty.swap(dirty_lines);
|
||||
lock_data.unlock();
|
||||
std::set<uint32_t> remaining_dirty;
|
||||
std::unique_lock lock(mutex);
|
||||
lock.unlock();
|
||||
for (uint32_t c_line : tmp_dirty) {
|
||||
if (c_line > scroll_max) {
|
||||
remaining_dirty.insert(c_line);
|
||||
continue;
|
||||
}
|
||||
std::unique_lock lock(mutex);
|
||||
uint32_t line_count = (uint32_t)line_data.size();
|
||||
uint32_t line_count = line_tree.count();
|
||||
lock_data.lock();
|
||||
std::shared_ptr<void> prev_state =
|
||||
(c_line > 0) ? line_data[c_line - 1].out_state : nullptr;
|
||||
lock.unlock();
|
||||
(c_line > 0) ? line_tree.at(c_line - 1)->out_state : nullptr;
|
||||
lock_data.unlock();
|
||||
while (c_line < line_count) {
|
||||
if (!running.load(std::memory_order_relaxed)) {
|
||||
free(text);
|
||||
@@ -70,14 +72,17 @@ void Parser::work() {
|
||||
if (c_line < scroll_max &&
|
||||
((scroll_max > 100 && c_line > scroll_max - 100) || c_line < 100))
|
||||
lock.lock();
|
||||
if (line_tree.count() < c_line) {
|
||||
if (lock.owns_lock())
|
||||
lock.unlock();
|
||||
continue;
|
||||
}
|
||||
lock_data.lock();
|
||||
LineData *line_data = line_tree.at(c_line);
|
||||
std::shared_ptr<void> new_state =
|
||||
parse_func(&line_data[c_line].tokens, prev_state, text, r_len);
|
||||
lock_data.unlock();
|
||||
line_data[c_line].in_state = prev_state;
|
||||
line_data[c_line].out_state = new_state;
|
||||
if (lock.owns_lock())
|
||||
lock.unlock();
|
||||
parse_func(&line_data->tokens, prev_state, text, r_len);
|
||||
line_data->in_state = prev_state;
|
||||
line_data->out_state = new_state;
|
||||
if (!running.load(std::memory_order_relaxed)) {
|
||||
free(text);
|
||||
return;
|
||||
@@ -85,16 +90,24 @@ void Parser::work() {
|
||||
prev_state = new_state;
|
||||
c_line++;
|
||||
if (c_line < line_count && c_line > scroll_max + 50) {
|
||||
lock_data.unlock();
|
||||
if (lock.owns_lock())
|
||||
lock.unlock();
|
||||
if (c_line > 0)
|
||||
remaining_dirty.insert(c_line - 1);
|
||||
remaining_dirty.insert(c_line);
|
||||
break;
|
||||
}
|
||||
lock.lock();
|
||||
if (c_line < line_count &&
|
||||
state_match_func(prev_state, line_data[c_line].in_state))
|
||||
state_match_func(prev_state, line_tree.at(c_line)->in_state)) {
|
||||
lock_data.unlock();
|
||||
if (lock.owns_lock())
|
||||
lock.unlock();
|
||||
break;
|
||||
lock.unlock();
|
||||
}
|
||||
lock_data.unlock();
|
||||
if (lock.owns_lock())
|
||||
lock.unlock();
|
||||
}
|
||||
if (!running.load(std::memory_order_relaxed)) {
|
||||
free(text);
|
||||
@@ -110,20 +123,20 @@ void Parser::scroll(uint32_t line) {
|
||||
if (line != scroll_max) {
|
||||
scroll_max = line;
|
||||
uint32_t c_line = line > 100 ? line - 100 : 0;
|
||||
if (line_data.size() < c_line)
|
||||
if (line_tree.count() < c_line)
|
||||
return;
|
||||
if (line_data[c_line].in_state || line_data[c_line].out_state)
|
||||
std::unique_lock lock_data(data_mutex);
|
||||
if (line_tree.at(c_line)->in_state || line_tree.at(c_line)->out_state)
|
||||
return;
|
||||
lock_data.unlock();
|
||||
std::shared_lock k_lock(*knot_mutex);
|
||||
k_lock.unlock();
|
||||
uint32_t capacity = 256;
|
||||
char *text = (char *)calloc((capacity + 1), sizeof(char));
|
||||
std::unique_lock lock_data(data_mutex);
|
||||
lock_data.unlock();
|
||||
uint32_t line_count = line_tree.count();
|
||||
std::unique_lock lock(mutex);
|
||||
uint32_t line_count = (uint32_t)line_data.size();
|
||||
std::shared_ptr<void> prev_state =
|
||||
(c_line > 0) ? line_data[c_line - 1].out_state : nullptr;
|
||||
(c_line > 0) ? line_tree.at(c_line - 1)->out_state : nullptr;
|
||||
lock.unlock();
|
||||
while (c_line < line_count) {
|
||||
if (!running.load(std::memory_order_relaxed)) {
|
||||
@@ -143,12 +156,18 @@ void Parser::scroll(uint32_t line) {
|
||||
if (c_line < scroll_max &&
|
||||
((scroll_max > 100 && c_line > scroll_max - 100) || c_line < 100))
|
||||
lock.lock();
|
||||
if (line_tree.count() < c_line) {
|
||||
if (lock.owns_lock())
|
||||
lock.unlock();
|
||||
continue;
|
||||
}
|
||||
lock_data.lock();
|
||||
LineData *line_data = line_tree.at(c_line);
|
||||
std::shared_ptr<void> new_state =
|
||||
parse_func(&line_data[c_line].tokens, prev_state, text, r_len);
|
||||
parse_func(&line_data->tokens, prev_state, text, r_len);
|
||||
line_data->in_state = nullptr;
|
||||
line_data->out_state = new_state;
|
||||
lock_data.unlock();
|
||||
line_data[c_line].in_state = nullptr;
|
||||
line_data[c_line].out_state = new_state;
|
||||
if (lock.owns_lock())
|
||||
lock.unlock();
|
||||
if (!running.load(std::memory_order_relaxed)) {
|
||||
@@ -1,24 +1,28 @@
|
||||
#include "syntax/decl.h"
|
||||
#include "syntax/langs.h"
|
||||
|
||||
const static std::vector<std::string> base_keywords = {
|
||||
// style 4
|
||||
"if", "else", "elsif", "case", "rescue", "ensure", "do", "for",
|
||||
"while", "until", "def", "class", "module", "begin", "end", "unless",
|
||||
"class", "module", "begin", "end", "else", "rescue", "ensure", "do", "when",
|
||||
};
|
||||
|
||||
const static std::vector<std::string> expecting_keywords = {
|
||||
"if", "elsif", "case", "for", "while", "until", "unless",
|
||||
};
|
||||
|
||||
const static std::vector<std::string> operator_keywords = {
|
||||
// style 5
|
||||
"alias", "and", "BEGIN", "break", "catch", "defined?", "in", "next",
|
||||
"not", "or", "redo", "rescue", "retry", "return", "super", "yield",
|
||||
"self", "nil", "true", "false", "undef", "when",
|
||||
"alias", "BEGIN", "break", "catch", "defined?", "in", "next",
|
||||
"redo", "rescue", "retry", "super", "self", "nil", "undef",
|
||||
};
|
||||
|
||||
const static std::vector<std::string> expecting_operators = {
|
||||
"and", "return", "not", "yield", "or",
|
||||
};
|
||||
|
||||
const static std::vector<std::string> operators = {
|
||||
"+", "-", "*", "/", "%", "**", "==", "!=", "===",
|
||||
"<=>", ">", ">=", "<", "<=", "&&", "||", "!", "&",
|
||||
"|", "^", "~", "<<", ">>", "=", "+=", "-=", "*=",
|
||||
"/=", "%=", "**=", "&=", "|=", "^=", "<<=", ">>=", "..",
|
||||
"...", "===", "=", "=>", "&.", "[]", "[]=", "`", "->",
|
||||
"+", "-", "*", "/", "%", "**", "==", "!=", "===", "<=>", ">",
|
||||
">=", "<", "<=", "&&", "||", "!", "&", "|", "^", "~", "<<",
|
||||
">>", "=", "+=", "-=", "*=", "/=", "%=", "**=", "&=", "|=", "^=",
|
||||
"<<=", ">>=", "..", "...", "===", "=", "=>", "&", "`", "->", "=~",
|
||||
};
|
||||
|
||||
struct HeredocInfo {
|
||||
@@ -34,19 +38,16 @@ struct HeredocInfo {
|
||||
};
|
||||
|
||||
struct RubyFullState {
|
||||
// TODO: use this to highlight each level seperaletly like vscode colored
|
||||
// braces extention thingy does
|
||||
int brace_level = 0;
|
||||
int paren_level = 0;
|
||||
int bracket_level = 0;
|
||||
|
||||
enum : uint8_t { NONE, STRING, REGEXP, COMMENT, HEREDOC, END };
|
||||
uint8_t in_state = RubyFullState::NONE;
|
||||
|
||||
bool expecting_expr = false;
|
||||
|
||||
struct Lit {
|
||||
char delim_start = '\0';
|
||||
char delim_end = '\0';
|
||||
// For stuff like %Q{ { these braces are valid } this part is still str }
|
||||
int brace_level = 1;
|
||||
bool allow_interp = false;
|
||||
|
||||
@@ -60,12 +61,13 @@ struct RubyFullState {
|
||||
bool operator==(const RubyFullState &other) const {
|
||||
return in_state == other.in_state && lit == other.lit &&
|
||||
brace_level == other.brace_level &&
|
||||
paren_level == other.paren_level &&
|
||||
bracket_level == other.bracket_level;
|
||||
expecting_expr == other.expecting_expr;
|
||||
}
|
||||
};
|
||||
|
||||
struct RubyState {
|
||||
using full_state_type = RubyFullState;
|
||||
|
||||
int interp_level = 0;
|
||||
std::stack<std::shared_ptr<RubyFullState>> interp_stack;
|
||||
std::shared_ptr<RubyFullState> full_state;
|
||||
@@ -80,32 +82,16 @@ struct RubyState {
|
||||
}
|
||||
};
|
||||
|
||||
inline std::shared_ptr<RubyState>
|
||||
ensure_state(std::shared_ptr<RubyState> state) {
|
||||
if (!state)
|
||||
state = std::make_shared<RubyState>();
|
||||
if (state.unique())
|
||||
return state;
|
||||
return std::make_shared<RubyState>(*state);
|
||||
}
|
||||
|
||||
inline std::shared_ptr<RubyState>
|
||||
ensure_full_state(std::shared_ptr<RubyState> state) {
|
||||
state = ensure_state(state);
|
||||
if (!state->full_state)
|
||||
state->full_state = std::make_shared<RubyFullState>();
|
||||
else if (!state->full_state.unique())
|
||||
state->full_state = std::make_shared<RubyFullState>(*state->full_state);
|
||||
return state;
|
||||
}
|
||||
|
||||
bool identifier_start_char(char c) {
|
||||
inline static bool identifier_start_char(char c) {
|
||||
return !isascii(c) || isalpha(c) || c == '_';
|
||||
}
|
||||
|
||||
bool identifier_char(char c) { return !isascii(c) || isalnum(c) || c == '_'; }
|
||||
inline static bool identifier_char(char c) {
|
||||
return !isascii(c) || isalnum(c) || c == '_';
|
||||
}
|
||||
|
||||
uint32_t get_next_word(const char *text, uint32_t i, uint32_t len) {
|
||||
inline static uint32_t get_next_word(const char *text, uint32_t i,
|
||||
uint32_t len) {
|
||||
if (i >= len || !identifier_start_char(text[i]))
|
||||
return 0;
|
||||
uint32_t width = 1;
|
||||
@@ -116,12 +102,12 @@ uint32_t get_next_word(const char *text, uint32_t i, uint32_t len) {
|
||||
return width;
|
||||
}
|
||||
|
||||
bool compare(const char *a, const char *b, size_t n) {
|
||||
size_t i = 0;
|
||||
for (; i < n; ++i)
|
||||
if (a[i] != b[i])
|
||||
return false;
|
||||
return true;
|
||||
bool ruby_state_match(std::shared_ptr<void> state_1,
|
||||
std::shared_ptr<void> state_2) {
|
||||
if (!state_1 || !state_2)
|
||||
return false;
|
||||
return *std::static_pointer_cast<RubyState>(state_1) ==
|
||||
*std::static_pointer_cast<RubyState>(state_2);
|
||||
}
|
||||
|
||||
std::shared_ptr<void> ruby_parse(std::vector<Token> *tokens,
|
||||
@@ -129,21 +115,20 @@ std::shared_ptr<void> ruby_parse(std::vector<Token> *tokens,
|
||||
const char *text, uint32_t len) {
|
||||
static bool keywords_trie_init = false;
|
||||
static Trie base_keywords_trie;
|
||||
static Trie expecting_keywords_trie;
|
||||
static Trie operator_keywords_trie;
|
||||
static Trie expecting_operators_trie;
|
||||
static Trie operator_trie;
|
||||
if (!keywords_trie_init) {
|
||||
base_keywords_trie.build(base_keywords);
|
||||
expecting_keywords_trie.build(expecting_keywords);
|
||||
operator_keywords_trie.build(operator_keywords);
|
||||
expecting_operators_trie.build(expecting_operators);
|
||||
operator_trie.build(operators);
|
||||
keywords_trie_init = true;
|
||||
}
|
||||
tokens->clear();
|
||||
if (!in_state)
|
||||
in_state = std::make_shared<RubyState>();
|
||||
std::shared_ptr<RubyState> state =
|
||||
std::static_pointer_cast<RubyState>(in_state);
|
||||
if (!state->full_state)
|
||||
state->full_state = std::make_shared<RubyFullState>();
|
||||
auto state = ensure_state(std::static_pointer_cast<RubyState>(in_state));
|
||||
uint32_t i = 0;
|
||||
while (len > 0 && (text[len - 1] == '\n' || text[len - 1] == '\r' ||
|
||||
text[len - 1] == '\t' || text[len - 1] == ' '))
|
||||
@@ -152,15 +137,12 @@ std::shared_ptr<void> ruby_parse(std::vector<Token> *tokens,
|
||||
return state;
|
||||
bool heredoc_first = false;
|
||||
while (i < len) {
|
||||
if (state->full_state->in_state == RubyFullState::END) {
|
||||
tokens->clear();
|
||||
if (state->full_state->in_state == RubyFullState::END)
|
||||
return state;
|
||||
}
|
||||
if (state->full_state->in_state == RubyFullState::COMMENT) {
|
||||
tokens->push_back({i, len, 1});
|
||||
tokens->push_back({i, len, TokenKind::Comment});
|
||||
if (i == 0 && len == 4 && text[i] == '=' && text[i + 1] == 'e' &&
|
||||
text[i + 2] == 'n' && text[i + 3] == 'd') {
|
||||
state = ensure_full_state(state);
|
||||
state->full_state->in_state = RubyFullState::NONE;
|
||||
}
|
||||
return state;
|
||||
@@ -175,32 +157,32 @@ std::shared_ptr<void> ruby_parse(std::vector<Token> *tokens,
|
||||
if (len - start == state->heredocs.front().delim.length() &&
|
||||
compare(text + start, state->heredocs.front().delim.c_str(),
|
||||
state->heredocs.front().delim.length())) {
|
||||
state = ensure_full_state(state);
|
||||
state->heredocs.pop_front();
|
||||
if (state->heredocs.empty())
|
||||
state->full_state->in_state = RubyFullState::NONE;
|
||||
tokens->push_back({i, len, 10});
|
||||
tokens->push_back({i, len, TokenKind::Annotation});
|
||||
return state;
|
||||
}
|
||||
}
|
||||
uint32_t start = i;
|
||||
if (!state->heredocs.front().allow_interpolation) {
|
||||
tokens->push_back({i, len, 2});
|
||||
tokens->push_back({i, len, TokenKind::String});
|
||||
return state;
|
||||
} else {
|
||||
while (i < len) {
|
||||
if (text[i] == '\\') {
|
||||
// TODO: highlight the escape character
|
||||
tokens->push_back({start, i, TokenKind::String});
|
||||
start = i;
|
||||
i++;
|
||||
if (i < len)
|
||||
i++;
|
||||
tokens->push_back({start, i, TokenKind::Escape});
|
||||
continue;
|
||||
}
|
||||
if (text[i] == '#' && i + 1 < len && text[i + 1] == '{') {
|
||||
tokens->push_back({start, i, 2});
|
||||
tokens->push_back({i, i + 2, 10});
|
||||
tokens->push_back({start, i, TokenKind::String});
|
||||
tokens->push_back({i, i + 2, TokenKind::Interpolation});
|
||||
i += 2;
|
||||
state = ensure_state(state);
|
||||
state->interp_stack.push(state->full_state);
|
||||
state->full_state = std::make_shared<RubyFullState>();
|
||||
state->interp_level = 1;
|
||||
@@ -209,7 +191,7 @@ std::shared_ptr<void> ruby_parse(std::vector<Token> *tokens,
|
||||
i++;
|
||||
}
|
||||
if (i == len)
|
||||
tokens->push_back({start, len, 2});
|
||||
tokens->push_back({start, len, TokenKind::String});
|
||||
continue;
|
||||
}
|
||||
}
|
||||
@@ -217,19 +199,20 @@ std::shared_ptr<void> ruby_parse(std::vector<Token> *tokens,
|
||||
uint32_t start = i;
|
||||
while (i < len) {
|
||||
if (text[i] == '\\') {
|
||||
// TODO: highlight the escape character - need to make priority work
|
||||
// and this have higher
|
||||
tokens->push_back({start, i, TokenKind::String});
|
||||
start = i;
|
||||
i++;
|
||||
if (i < len)
|
||||
i++;
|
||||
tokens->push_back({start, i, TokenKind::Escape});
|
||||
continue;
|
||||
continue;
|
||||
}
|
||||
if (state->full_state->lit.allow_interp && text[i] == '#' &&
|
||||
i + 1 < len && text[i + 1] == '{') {
|
||||
tokens->push_back({start, i, 2});
|
||||
tokens->push_back({i, i + 2, 10});
|
||||
tokens->push_back({start, i, TokenKind::String});
|
||||
tokens->push_back({i, i + 2, TokenKind::Interpolation});
|
||||
i += 2;
|
||||
state = ensure_state(state);
|
||||
state->interp_stack.push(state->full_state);
|
||||
state->full_state = std::make_shared<RubyFullState>();
|
||||
state->interp_level = 1;
|
||||
@@ -238,23 +221,23 @@ std::shared_ptr<void> ruby_parse(std::vector<Token> *tokens,
|
||||
if (text[i] == state->full_state->lit.delim_start &&
|
||||
state->full_state->lit.delim_start !=
|
||||
state->full_state->lit.delim_end) {
|
||||
state = ensure_full_state(state);
|
||||
state->full_state->lit.brace_level++;
|
||||
}
|
||||
if (text[i] == state->full_state->lit.delim_end) {
|
||||
state = ensure_full_state(state);
|
||||
if (state->full_state->lit.delim_start ==
|
||||
state->full_state->lit.delim_end) {
|
||||
i++;
|
||||
tokens->push_back({start, i, 2});
|
||||
tokens->push_back({start, i, TokenKind::String});
|
||||
state->full_state->in_state = RubyFullState::NONE;
|
||||
state->full_state->expecting_expr = false;
|
||||
break;
|
||||
} else {
|
||||
state->full_state->lit.brace_level--;
|
||||
if (state->full_state->lit.brace_level == 0) {
|
||||
i++;
|
||||
tokens->push_back({start, i, 2});
|
||||
tokens->push_back({start, i, TokenKind::String});
|
||||
state->full_state->in_state = RubyFullState::NONE;
|
||||
state->full_state->expecting_expr = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
@@ -262,15 +245,67 @@ std::shared_ptr<void> ruby_parse(std::vector<Token> *tokens,
|
||||
i++;
|
||||
}
|
||||
if (i == len)
|
||||
tokens->push_back({start, len, 2});
|
||||
tokens->push_back({start, len, TokenKind::String});
|
||||
continue;
|
||||
}
|
||||
if (state->full_state->in_state == RubyFullState::REGEXP) {
|
||||
uint32_t start = i;
|
||||
while (i < len) {
|
||||
if (text[i] == '\\') {
|
||||
tokens->push_back({start, i, TokenKind::Regexp});
|
||||
;
|
||||
start = i;
|
||||
i++;
|
||||
if (i < len)
|
||||
i++;
|
||||
tokens->push_back({start, i, TokenKind::Escape});
|
||||
continue;
|
||||
}
|
||||
if (text[i] == '#' && i + 1 < len && text[i + 1] == '{') {
|
||||
tokens->push_back({start, i, TokenKind::Regexp});
|
||||
tokens->push_back({i, i + 2, TokenKind::Interpolation});
|
||||
i += 2;
|
||||
state->interp_stack.push(state->full_state);
|
||||
state->full_state = std::make_shared<RubyFullState>();
|
||||
state->interp_level = 1;
|
||||
break;
|
||||
}
|
||||
if (text[i] == state->full_state->lit.delim_start &&
|
||||
state->full_state->lit.delim_start !=
|
||||
state->full_state->lit.delim_end) {
|
||||
state->full_state->lit.brace_level++;
|
||||
}
|
||||
if (text[i] == state->full_state->lit.delim_end) {
|
||||
if (state->full_state->lit.delim_start ==
|
||||
state->full_state->lit.delim_end) {
|
||||
i += 1;
|
||||
tokens->push_back({start, i, TokenKind::Regexp});
|
||||
state->full_state->in_state = RubyFullState::NONE;
|
||||
state->full_state->expecting_expr = false;
|
||||
break;
|
||||
} else {
|
||||
state->full_state->lit.brace_level--;
|
||||
if (state->full_state->lit.brace_level == 0) {
|
||||
i += 1;
|
||||
tokens->push_back({start, i, TokenKind::Regexp});
|
||||
state->full_state->in_state = RubyFullState::NONE;
|
||||
state->full_state->expecting_expr = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
i++;
|
||||
}
|
||||
if (i == len)
|
||||
tokens->push_back({start, len, TokenKind::Regexp});
|
||||
continue;
|
||||
}
|
||||
if (i == 0 && len == 6) {
|
||||
if (text[i] == '=' && text[i + 1] == 'b' && text[i + 2] == 'e' &&
|
||||
text[i + 3] == 'g' && text[i + 4] == 'i' && text[i + 5] == 'n') {
|
||||
state = ensure_full_state(state);
|
||||
state->full_state->in_state = RubyFullState::COMMENT;
|
||||
tokens->push_back({0, len, 1});
|
||||
state->full_state->expecting_expr = false;
|
||||
tokens->push_back({0, len, TokenKind::Comment});
|
||||
return state;
|
||||
}
|
||||
}
|
||||
@@ -278,9 +313,9 @@ std::shared_ptr<void> ruby_parse(std::vector<Token> *tokens,
|
||||
if (text[i] == '_' && text[i + 1] == '_' && text[i + 2] == 'E' &&
|
||||
text[i + 3] == 'N' && text[i + 4] == 'D' && text[i + 5] == '_' &&
|
||||
text[i + 6] == '_') {
|
||||
state = ensure_full_state(state);
|
||||
tokens->clear();
|
||||
state->full_state->in_state = RubyFullState::END;
|
||||
state->full_state->expecting_expr = false;
|
||||
return state;
|
||||
}
|
||||
}
|
||||
@@ -291,7 +326,7 @@ std::shared_ptr<void> ruby_parse(std::vector<Token> *tokens,
|
||||
indented = true;
|
||||
if (text[j] == '~' || text[j] == '-')
|
||||
j++;
|
||||
tokens->push_back({i, j, 10});
|
||||
tokens->push_back({i, j, TokenKind::Operator});
|
||||
if (j >= len)
|
||||
continue;
|
||||
std::string delim;
|
||||
@@ -304,12 +339,15 @@ std::shared_ptr<void> ruby_parse(std::vector<Token> *tokens,
|
||||
while (j < len && text[j] != q)
|
||||
delim += text[j++];
|
||||
} else {
|
||||
while (j < len && identifier_char(text[j]))
|
||||
if (j < len && identifier_start_char(text[j])) {
|
||||
delim += text[j++];
|
||||
while (j < len && identifier_char(text[j]))
|
||||
delim += text[j++];
|
||||
}
|
||||
}
|
||||
state->full_state->expecting_expr = false;
|
||||
if (!delim.empty()) {
|
||||
tokens->push_back({s, j, 10});
|
||||
state = ensure_full_state(state);
|
||||
tokens->push_back({s, j, TokenKind::Annotation});
|
||||
state->heredocs.push_back({delim, interpolation, indented});
|
||||
state->full_state->in_state = RubyFullState::HEREDOC;
|
||||
heredoc_first = true;
|
||||
@@ -317,18 +355,47 @@ std::shared_ptr<void> ruby_parse(std::vector<Token> *tokens,
|
||||
i = j;
|
||||
continue;
|
||||
}
|
||||
if (text[i] == '#') {
|
||||
tokens->push_back({i, len, 1});
|
||||
if (text[i] == '/' && state->full_state->expecting_expr) {
|
||||
tokens->push_back({i, i + 1, TokenKind::Regexp});
|
||||
state->full_state->in_state = RubyFullState::REGEXP;
|
||||
state->full_state->expecting_expr = false;
|
||||
state->full_state->lit.delim_start = '/';
|
||||
state->full_state->lit.delim_end = '/';
|
||||
state->full_state->lit.allow_interp = true;
|
||||
i++;
|
||||
continue;
|
||||
} else if (text[i] == '#') {
|
||||
tokens->push_back({i, len, TokenKind::Comment});
|
||||
state->full_state->expecting_expr = false;
|
||||
return state;
|
||||
} else if (text[i] == '.') {
|
||||
uint32_t start = i;
|
||||
i++;
|
||||
if (i < len && text[i] == '.') {
|
||||
i++;
|
||||
if (i < len && text[i] == '.') {
|
||||
i++;
|
||||
}
|
||||
}
|
||||
tokens->push_back({start, i, TokenKind::Operator});
|
||||
state->full_state->expecting_expr = false;
|
||||
continue;
|
||||
} else if (text[i] == ':') {
|
||||
state->full_state->expecting_expr = false;
|
||||
uint32_t start = i;
|
||||
i++;
|
||||
if (i >= len) {
|
||||
tokens->push_back({start, i, 3});
|
||||
tokens->push_back({start, i, TokenKind::Operator});
|
||||
state->full_state->expecting_expr = true;
|
||||
continue;
|
||||
}
|
||||
if (text[i] == ':') {
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
if (text[i] == '\'' || text[i] == '"') {
|
||||
tokens->push_back({start, i, 6});
|
||||
tokens->push_back({start, i, TokenKind::Operator});
|
||||
state->full_state->expecting_expr = true;
|
||||
continue;
|
||||
}
|
||||
if (text[i] == '$' || text[i] == '@') {
|
||||
@@ -338,24 +405,25 @@ std::shared_ptr<void> ruby_parse(std::vector<Token> *tokens,
|
||||
i++;
|
||||
while (i < len && identifier_char(text[i]))
|
||||
i++;
|
||||
tokens->push_back({start, i, 6});
|
||||
tokens->push_back({start, i, TokenKind::Label});
|
||||
continue;
|
||||
}
|
||||
uint32_t op_len = operator_trie.match(text, i, len, identifier_char);
|
||||
if (op_len > 0) {
|
||||
tokens->push_back({start, i + op_len, 6});
|
||||
tokens->push_back({start, i + op_len, TokenKind::Label});
|
||||
i += op_len;
|
||||
continue;
|
||||
}
|
||||
if (identifier_start_char(text[i])) {
|
||||
uint32_t word_len = get_next_word(text, i, len);
|
||||
tokens->push_back({start, i + word_len, 6});
|
||||
tokens->push_back({start, i + word_len, TokenKind::Label});
|
||||
i += word_len;
|
||||
continue;
|
||||
}
|
||||
tokens->push_back({start, i, 3});
|
||||
tokens->push_back({start, i, TokenKind::Operator});
|
||||
continue;
|
||||
} else if (text[i] == '@') {
|
||||
state->full_state->expecting_expr = false;
|
||||
uint32_t start = i;
|
||||
i++;
|
||||
if (i >= len)
|
||||
@@ -368,9 +436,10 @@ std::shared_ptr<void> ruby_parse(std::vector<Token> *tokens,
|
||||
continue;
|
||||
while (i < len && identifier_char(text[i]))
|
||||
i++;
|
||||
tokens->push_back({start, i, 7});
|
||||
tokens->push_back({start, i, TokenKind::VariableInstance});
|
||||
continue;
|
||||
} else if (text[i] == '$') {
|
||||
state->full_state->expecting_expr = false;
|
||||
uint32_t start = i;
|
||||
i++;
|
||||
if (i >= len)
|
||||
@@ -390,9 +459,10 @@ std::shared_ptr<void> ruby_parse(std::vector<Token> *tokens,
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
tokens->push_back({start, i, 8});
|
||||
tokens->push_back({start, i, TokenKind::VariableGlobal});
|
||||
continue;
|
||||
} else if (text[i] == '?') {
|
||||
state->full_state->expecting_expr = false;
|
||||
uint32_t start = i;
|
||||
i++;
|
||||
if (i < len && text[i] == '\\') {
|
||||
@@ -405,7 +475,7 @@ std::shared_ptr<void> ruby_parse(std::vector<Token> *tokens,
|
||||
continue;
|
||||
if (i < len && isxdigit(text[i]))
|
||||
i++;
|
||||
tokens->push_back({start, i, 7});
|
||||
tokens->push_back({start, i, TokenKind::Char});
|
||||
continue;
|
||||
} else if (i < len && text[i] == 'u') {
|
||||
i++;
|
||||
@@ -425,42 +495,81 @@ std::shared_ptr<void> ruby_parse(std::vector<Token> *tokens,
|
||||
i++;
|
||||
else
|
||||
continue;
|
||||
tokens->push_back({start, i, 7});
|
||||
tokens->push_back({start, i, TokenKind::Char});
|
||||
continue;
|
||||
} else if (i < len) {
|
||||
i++;
|
||||
tokens->push_back({start, i, 7});
|
||||
tokens->push_back({start, i, TokenKind::Char});
|
||||
continue;
|
||||
}
|
||||
} else if (i < len && text[i] != ' ') {
|
||||
i++;
|
||||
tokens->push_back({start, i, 7});
|
||||
tokens->push_back({start, i, TokenKind::Char});
|
||||
continue;
|
||||
} else {
|
||||
tokens->push_back({start, i, 3});
|
||||
state->full_state->expecting_expr = true;
|
||||
tokens->push_back({start, i, TokenKind::Operator});
|
||||
continue;
|
||||
}
|
||||
} else if (text[i] == '{') {
|
||||
tokens->push_back({i, i + 1, 3});
|
||||
state = ensure_state(state);
|
||||
state->full_state->expecting_expr = true;
|
||||
uint8_t brace_color =
|
||||
(uint8_t)TokenKind::Brace1 + (state->full_state->brace_level % 5);
|
||||
tokens->push_back({i, i + 1, (TokenKind)brace_color});
|
||||
state->interp_level++;
|
||||
state->full_state->brace_level++;
|
||||
i++;
|
||||
continue;
|
||||
} else if (text[i] == '}') {
|
||||
state = ensure_full_state(state);
|
||||
state->full_state->expecting_expr = false;
|
||||
state->interp_level--;
|
||||
if (state->interp_level == 0 && !state->interp_stack.empty()) {
|
||||
state->full_state = state->interp_stack.top();
|
||||
state->interp_stack.pop();
|
||||
tokens->push_back({i, i + 1, 10});
|
||||
tokens->push_back({i, i + 1, TokenKind::Interpolation});
|
||||
} else {
|
||||
tokens->push_back({i, i + 1, 3});
|
||||
state->full_state->brace_level--;
|
||||
uint8_t brace_color =
|
||||
(uint8_t)TokenKind::Brace1 + (state->full_state->brace_level % 5);
|
||||
tokens->push_back({i, i + 1, (TokenKind)brace_color});
|
||||
}
|
||||
i++;
|
||||
continue;
|
||||
} else if (text[i] == '(') {
|
||||
state->full_state->expecting_expr = true;
|
||||
uint8_t brace_color =
|
||||
(uint8_t)TokenKind::Brace1 + (state->full_state->brace_level % 5);
|
||||
tokens->push_back({i, i + 1, (TokenKind)brace_color});
|
||||
state->full_state->brace_level++;
|
||||
i++;
|
||||
continue;
|
||||
} else if (text[i] == ')') {
|
||||
state->full_state->expecting_expr = false;
|
||||
state->full_state->brace_level--;
|
||||
uint8_t brace_color =
|
||||
(uint8_t)TokenKind::Brace1 + (state->full_state->brace_level % 5);
|
||||
tokens->push_back({i, i + 1, (TokenKind)brace_color});
|
||||
i++;
|
||||
continue;
|
||||
} else if (text[i] == '[') {
|
||||
state->full_state->expecting_expr = true;
|
||||
uint8_t brace_color =
|
||||
(uint8_t)TokenKind::Brace1 + (state->full_state->brace_level % 5);
|
||||
tokens->push_back({i, i + 1, (TokenKind)brace_color});
|
||||
state->full_state->brace_level++;
|
||||
i++;
|
||||
continue;
|
||||
} else if (text[i] == ']') {
|
||||
state->full_state->expecting_expr = false;
|
||||
state->full_state->brace_level--;
|
||||
uint8_t brace_color =
|
||||
(uint8_t)TokenKind::Brace1 + (state->full_state->brace_level % 5);
|
||||
tokens->push_back({i, i + 1, (TokenKind)brace_color});
|
||||
i++;
|
||||
continue;
|
||||
} else if (text[i] == '\'') {
|
||||
tokens->push_back({i, i + 1, 2});
|
||||
state = ensure_full_state(state);
|
||||
state->full_state->expecting_expr = false;
|
||||
tokens->push_back({i, i + 1, TokenKind::String});
|
||||
state->full_state->in_state = RubyFullState::STRING;
|
||||
state->full_state->lit.delim_start = '\'';
|
||||
state->full_state->lit.delim_end = '\'';
|
||||
@@ -468,8 +577,8 @@ std::shared_ptr<void> ruby_parse(std::vector<Token> *tokens,
|
||||
i++;
|
||||
continue;
|
||||
} else if (text[i] == '"') {
|
||||
tokens->push_back({i, i + 1, 2});
|
||||
state = ensure_full_state(state);
|
||||
state->full_state->expecting_expr = false;
|
||||
tokens->push_back({i, i + 1, TokenKind::String});
|
||||
state->full_state->in_state = RubyFullState::STRING;
|
||||
state->full_state->lit.delim_start = '"';
|
||||
state->full_state->lit.delim_end = '"';
|
||||
@@ -477,8 +586,8 @@ std::shared_ptr<void> ruby_parse(std::vector<Token> *tokens,
|
||||
i++;
|
||||
continue;
|
||||
} else if (text[i] == '`') {
|
||||
tokens->push_back({i, i + 1, 2});
|
||||
state = ensure_full_state(state);
|
||||
state->full_state->expecting_expr = false;
|
||||
tokens->push_back({i, i + 1, TokenKind::String});
|
||||
state->full_state->in_state = RubyFullState::STRING;
|
||||
state->full_state->lit.delim_start = '`';
|
||||
state->full_state->lit.delim_end = '`';
|
||||
@@ -486,6 +595,7 @@ std::shared_ptr<void> ruby_parse(std::vector<Token> *tokens,
|
||||
i++;
|
||||
continue;
|
||||
} else if (text[i] == '%') {
|
||||
state->full_state->expecting_expr = false;
|
||||
if (i + 1 >= len) {
|
||||
i++;
|
||||
continue;
|
||||
@@ -495,15 +605,24 @@ std::shared_ptr<void> ruby_parse(std::vector<Token> *tokens,
|
||||
char delim_end = '\0';
|
||||
bool allow_interp = true;
|
||||
int prefix_len = 1;
|
||||
bool is_regexp = false;
|
||||
switch (type) {
|
||||
case 'r':
|
||||
is_regexp = true;
|
||||
allow_interp = true;
|
||||
prefix_len = 2;
|
||||
break;
|
||||
case 'Q':
|
||||
case 'x':
|
||||
case 'I':
|
||||
case 'W':
|
||||
allow_interp = true;
|
||||
prefix_len = 2;
|
||||
break;
|
||||
case 'w':
|
||||
case 'q':
|
||||
case 'i':
|
||||
case 's':
|
||||
allow_interp = false;
|
||||
prefix_len = 2;
|
||||
break;
|
||||
@@ -539,9 +658,10 @@ std::shared_ptr<void> ruby_parse(std::vector<Token> *tokens,
|
||||
delim_end = delim_start;
|
||||
break;
|
||||
}
|
||||
tokens->push_back({i, i + prefix_len + 1, 2});
|
||||
state = ensure_full_state(state);
|
||||
state->full_state->in_state = RubyFullState::STRING;
|
||||
tokens->push_back({i, i + prefix_len + 1,
|
||||
(is_regexp ? TokenKind::Regexp : TokenKind::String)});
|
||||
state->full_state->in_state =
|
||||
is_regexp ? RubyFullState::REGEXP : RubyFullState::STRING;
|
||||
state->full_state->lit.delim_start = delim_start;
|
||||
state->full_state->lit.delim_end = delim_end;
|
||||
state->full_state->lit.allow_interp = allow_interp;
|
||||
@@ -549,6 +669,7 @@ std::shared_ptr<void> ruby_parse(std::vector<Token> *tokens,
|
||||
i += prefix_len + 1;
|
||||
continue;
|
||||
} else if (isdigit(text[i])) {
|
||||
state->full_state->expecting_expr = false;
|
||||
uint32_t start = i;
|
||||
if (text[i] == '0') {
|
||||
i++;
|
||||
@@ -646,85 +767,137 @@ std::shared_ptr<void> ruby_parse(std::vector<Token> *tokens,
|
||||
i--;
|
||||
}
|
||||
}
|
||||
tokens->push_back({start, i, 9});
|
||||
tokens->push_back({start, i, TokenKind::Number});
|
||||
continue;
|
||||
} else if (identifier_start_char(text[i])) {
|
||||
state->full_state->expecting_expr = false;
|
||||
uint32_t length;
|
||||
if ((length = base_keywords_trie.match(text, i, len, identifier_char)) >
|
||||
0) {
|
||||
tokens->push_back({i, i + length, 4});
|
||||
if ((length = base_keywords_trie.match(text, i, len, identifier_char))) {
|
||||
tokens->push_back({i, i + length, TokenKind::Keyword});
|
||||
i += length;
|
||||
continue;
|
||||
} else if ((length = expecting_keywords_trie.match(text, i, len,
|
||||
identifier_char))) {
|
||||
state->full_state->expecting_expr = true;
|
||||
tokens->push_back({i, i + length, TokenKind::Keyword});
|
||||
i += length;
|
||||
continue;
|
||||
} else if ((length = operator_keywords_trie.match(text, i, len,
|
||||
identifier_char)) > 0) {
|
||||
tokens->push_back({i, i + length, 5});
|
||||
identifier_char))) {
|
||||
tokens->push_back({i, i + length, TokenKind::KeywordOperator});
|
||||
i += length;
|
||||
continue;
|
||||
} else if ((length = expecting_operators_trie.match(
|
||||
text, i, len, identifier_char)) > 0) {
|
||||
state->full_state->expecting_expr = true;
|
||||
tokens->push_back({i, i + length, TokenKind::KeywordOperator});
|
||||
i += length;
|
||||
continue;
|
||||
} else if (text[i] >= 'A' && text[i] <= 'Z') {
|
||||
uint32_t start = i;
|
||||
i += get_next_word(text, i, len);
|
||||
tokens->push_back({start, i, 10});
|
||||
tokens->push_back({start, i, TokenKind::Constant});
|
||||
continue;
|
||||
} else {
|
||||
uint32_t start = i;
|
||||
if (i + 4 < len && text[i] == 't' && text[i + 1] == 'r' &&
|
||||
text[i + 2] == 'u' && text[i + 3] == 'e') {
|
||||
i += 4;
|
||||
tokens->push_back({start, i, TokenKind::True});
|
||||
continue;
|
||||
}
|
||||
if (i + 5 < len && text[i] == 'f' && text[i + 1] == 'a' &&
|
||||
text[i + 2] == 'l' && text[i + 3] == 's' && text[i + 4] == 'e') {
|
||||
i += 5;
|
||||
tokens->push_back({start, i, TokenKind::False});
|
||||
continue;
|
||||
}
|
||||
if (i + 3 < len && text[i] == 'd' && text[i + 1] == 'e' &&
|
||||
text[i + 2] == 'f') {
|
||||
i += 3;
|
||||
tokens->push_back({start, i, TokenKind::Keyword});
|
||||
while (i < len && (text[i] == ' ' || text[i] == '\t'))
|
||||
i++;
|
||||
while (i < len) {
|
||||
if (identifier_start_char(text[i])) {
|
||||
uint32_t width = get_next_word(text, i, len);
|
||||
if (text[i] >= 'A' && text[i] <= 'Z')
|
||||
tokens->push_back({i, i + width, TokenKind::Constant});
|
||||
else if (width == 4 && (text[i] >= 's' && text[i + 1] == 'e' &&
|
||||
text[i + 2] == 'l' && text[i + 3] == 'f'))
|
||||
tokens->push_back({i, i + width, TokenKind::Keyword});
|
||||
i += width;
|
||||
if (i < len && text[i] == '.') {
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
tokens->push_back({i - width, i, TokenKind::Function});
|
||||
break;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
while (i < len && identifier_char(text[i]))
|
||||
i++;
|
||||
if (i < len && text[i] == ':') {
|
||||
i++;
|
||||
tokens->push_back({start, i, 6});
|
||||
tokens->push_back({start, i, TokenKind::Label});
|
||||
continue;
|
||||
} else if (i < len && (text[i] == '!' || text[i] == '?')) {
|
||||
i++;
|
||||
tokens->push_back({start, i, TokenKind::Function});
|
||||
} else {
|
||||
uint32_t tmp = i;
|
||||
if (tmp < len && (text[tmp] == '(' || text[tmp] == '{')) {
|
||||
tokens->push_back({start, i, TokenKind::Function});
|
||||
continue;
|
||||
} else if (tmp < len && (text[tmp] == ' ' || text[tmp] == '\t')) {
|
||||
tmp++;
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
while (tmp < len && (text[tmp] == ' ' || text[tmp] == '\t'))
|
||||
tmp++;
|
||||
if (tmp >= len)
|
||||
continue;
|
||||
if (!isascii(text[tmp])) {
|
||||
tokens->push_back({start, i, TokenKind::Function});
|
||||
continue;
|
||||
} else if (text[tmp] == '-' || text[tmp] == '&' || text[tmp] == '%' ||
|
||||
text[tmp] == ':') {
|
||||
if (tmp + 1 >= len ||
|
||||
(text[tmp + 1] == ' ' || text[tmp + 1] == '>'))
|
||||
continue;
|
||||
} else if (text[tmp] == ']' || text[tmp] == '}' || text[tmp] == ')' ||
|
||||
text[tmp] == ',' || text[tmp] == ';' || text[tmp] == '.' ||
|
||||
text[tmp] == '+' || text[tmp] == '*' || text[tmp] == '/' ||
|
||||
text[tmp] == '=' || text[tmp] == '?' || text[tmp] == '|' ||
|
||||
text[tmp] == '^' || text[tmp] == '<' || text[tmp] == '>') {
|
||||
continue;
|
||||
}
|
||||
tokens->push_back({start, i, TokenKind::Function});
|
||||
}
|
||||
continue;
|
||||
}
|
||||
} else {
|
||||
uint32_t op_len;
|
||||
if ((op_len = operator_trie.match(text, i, len,
|
||||
[](char) { return false; })) > 0) {
|
||||
tokens->push_back({i, i + op_len, 3});
|
||||
if ((op_len =
|
||||
operator_trie.match(text, i, len, [](char) { return false; }))) {
|
||||
tokens->push_back({i, i + op_len, TokenKind::Operator});
|
||||
i += op_len;
|
||||
state->full_state->expecting_expr = true;
|
||||
continue;
|
||||
} else {
|
||||
i += utf8_codepoint_width(text[i]);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
i += utf8_codepoint_width(text[i]);
|
||||
}
|
||||
return state;
|
||||
}
|
||||
|
||||
bool ruby_state_match(std::shared_ptr<void> state_1,
|
||||
std::shared_ptr<void> state_2) {
|
||||
if (!state_1 || !state_2)
|
||||
return false;
|
||||
return *std::static_pointer_cast<RubyState>(state_1) ==
|
||||
*std::static_pointer_cast<RubyState>(state_2);
|
||||
}
|
||||
|
||||
// function calls matched with alphanumeric names followed immediately by !
|
||||
// or ? or `(` immediately or siwth space or are followed by a non-keyword
|
||||
// or non-operator (some operators like - for negating and ! for not or {
|
||||
// for block might be allowed?)
|
||||
// a word following :: or . is matched as a property
|
||||
// and any random word is matched as a variable name
|
||||
// or as a class/module name if it starts with a capital letter
|
||||
//
|
||||
// regex are matched as text within / and / as long as
|
||||
// the first / is not
|
||||
// following a literal (int/float/string) or variable or brace close
|
||||
// and is following a keyword or operator liek return /regex/ or x =
|
||||
// /regex/ . so maybe add feild expecting_expr to state that is true right
|
||||
// after keyword or some operators like = , =~ , `,` etc?
|
||||
//
|
||||
// (left to implement) -
|
||||
//
|
||||
// words - breaks up into these submatches
|
||||
// - Constants that start with a capital letter
|
||||
// - a word following :: or . is matched as a property
|
||||
// - function call if ending with ! or ? or ( or are followed by a
|
||||
// non-keyword or non-operator . ill figure it out
|
||||
//
|
||||
// regex (and distinguish between / for division and / for regex) and
|
||||
// %r{} ones too
|
||||
//
|
||||
// Matching brace colors by brace depth
|
||||
//
|
||||
// TODO: Add trie's for builtins and highlight them separately liek (Array /
|
||||
// self etc)
|
||||
// And in regex better highlighting of regex structures
|
||||
|
||||
@@ -1,5 +1,13 @@
|
||||
#include "utils/utils.h"
|
||||
|
||||
bool compare(const char *a, const char *b, size_t n) {
|
||||
size_t i = 0;
|
||||
for (; i < n; ++i)
|
||||
if (a[i] != b[i])
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
std::string percent_decode(const std::string &s) {
|
||||
std::string out;
|
||||
out.reserve(s.size());
|
||||
|
||||
Reference in New Issue
Block a user