Add tree-sitter injections support & cleanup

This commit is contained in:
2025-12-25 04:14:53 +00:00
parent a10dd92249
commit 659628835d
16 changed files with 302 additions and 323 deletions

3
.gitmodules vendored
View File

@@ -66,3 +66,6 @@
[submodule "libs/tree-sitter-fish"]
path = libs/tree-sitter-fish
url = https://github.com/ram02z/tree-sitter-fish
[submodule "libs/tree-sitter-rust"]
path = libs/tree-sitter-rust
url = https://github.com/tree-sitter/tree-sitter-rust.git

View File

@@ -1,3 +1,7 @@
; This is an injection test - it should hight all heredoc content as bash code
;; !bash - this part should be ignored (anything after the first wordbreak after the `!`)
(heredoc_content) @ruby_injection
;; #ffffff #000000 0 0 0 1
[
(identifier)

View File

@@ -5,6 +5,8 @@
#include "./pch.h"
#include "./ui.h"
#include "./utils.h"
#include "ts_def.h"
#include <cstdint>
#define CHAR 0
#define WORD 1
@@ -117,6 +119,25 @@ struct VAI {
// after the first one
};
struct TSSetBase {
std::string lang;
TSTree *tree;
TSParser *parser;
std::string query_file;
TSQuery *query;
std::map<uint16_t, Highlight> query_map;
std::map<uint16_t, Language> injection_map;
const TSLanguage *language;
};
struct TSSet : TSSetBase {
std::vector<TSRange> ranges;
};
struct TSSetMain : TSSetBase {
std::vector<TSSet> injections;
};
struct Editor {
std::string filename;
std::string uri;
@@ -130,13 +151,8 @@ struct Editor {
Coord position;
Coord size;
Coord scroll;
TSTree *tree;
TSParser *parser;
std::string query_file;
TSQuery *query;
const TSLanguage *language;
TSSetMain ts;
Queue<TSInputEdit> edit_queue;
std::vector<Highlight> query_map;
std::vector<Fold> folds;
Spans spans;
Spans def_spans;

View File

@@ -24,7 +24,7 @@ struct LSPOpenRequest {
struct LSPInstance {
std::shared_mutex mtx;
LSP *lsp;
const LSP *lsp;
std::string root_dir;
int pid{-1};
int stdin_fd{-1};
@@ -39,7 +39,6 @@ struct LSPInstance {
extern std::shared_mutex active_lsps_mtx;
extern std::unordered_map<uint8_t, LSPInstance *> active_lsps;
extern std::unordered_map<uint8_t, LSP> lsp_map;
void lsp_worker();
void lsp_handle(LSPInstance *lsp, json message);

65
include/maps.h Normal file
View File

@@ -0,0 +1,65 @@
#ifndef MAPS_H
#define MAPS_H
#include "./lsp.h"
#include "./pch.h"
#include "./ts_def.h"
#include <unordered_map>
static const std::unordered_map<std::string, Language> kLanguages = {
{"bash", {"bash", tree_sitter_bash}},
{"c", {"c", tree_sitter_c, 1}},
{"cpp", {"cpp", tree_sitter_cpp, 1}},
{"h", {"h", tree_sitter_cpp, 1}},
{"css", {"css", tree_sitter_css}},
{"fish", {"fish", tree_sitter_fish}},
{"go", {"go", tree_sitter_go}},
{"haskell", {"haskell", tree_sitter_haskell}},
{"html", {"html", tree_sitter_html}},
{"javascript", {"javascript", tree_sitter_javascript}},
{"json", {"json", tree_sitter_json}},
{"lua", {"lua", tree_sitter_lua}},
{"make", {"make", tree_sitter_make}},
{"python", {"python", tree_sitter_python}},
{"ruby", {"ruby", tree_sitter_ruby}},
};
static const std::unordered_map<uint8_t, LSP> kLsps = {
{1,
{"clangd",
{
"clangd",
"--background-index",
"--clang-tidy",
"--completion-style=detailed",
"--header-insertion=iwyu",
"--log=error",
nullptr,
}}},
};
static const std::unordered_map<std::string, std::string> kExtToLang = {
{"sh", "bash"}, {"bash", "bash"}, {"c", "c"}, {"cpp", "cpp"},
{"cxx", "cpp"}, {"cc", "cpp"}, {"hpp", "h"}, {"hh", "h"},
{"hxx", "h"}, {"h", "h"}, {"css", "css"}, {"fish", "fish"},
{"go", "go"}, {"hs", "haskell"}, {"html", "html"}, {"htm", "html"},
{"js", "javascript"}, {"json", "json"}, {"lua", "lua"}, {"mk", "make"},
{"makefile", "make"}, {"py", "python"}, {"rb", "ruby"},
};
static const std::unordered_map<std::string, std::string> kMimeToLang = {
{"text/x-c", "c"},
{"text/x-c++", "cpp"},
{"text/x-shellscript", "bash"},
{"application/json", "json"},
{"text/javascript", "javascript"},
{"text/html", "html"},
{"text/css", "css"},
{"text/x-python", "python"},
{"text/x-ruby", "ruby"},
{"text/x-go", "go"},
{"text/x-haskell", "haskell"},
{"text/x-lua", "lua"},
};
#endif

View File

@@ -7,10 +7,19 @@
#include "../libs/tree-sitter/lib/include/tree_sitter/api.h"
#include <algorithm>
#include <atomic>
#include <cctype>
#include <chrono>
#include <cstdarg>
#include <cstdint>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <deque>
#include <filesystem>
#include <fstream>
#include <functional>
#include <limits.h>
#include <magic.h>
#include <map>
#include <mutex>
#include <nlohmann/json.hpp>

View File

@@ -9,7 +9,7 @@
extern std::unordered_map<std::string, pcre2_code *> regex_cache;
TSQuery *load_query(const char *query_path, Editor *editor);
TSQuery *load_query(const char *query_path, TSSetBase *set);
void ts_collect_spans(Editor *editor);
void clear_regex_cache();

View File

@@ -24,6 +24,13 @@ const TSLanguage *tree_sitter_lua();
const TSLanguage *tree_sitter_make();
const TSLanguage *tree_sitter_python();
const TSLanguage *tree_sitter_ruby();
const TSLanguage *tree_sitter_rust();
// TO ADD
// sql
// wasm
// conf
// yaml, toml
// godot
}
#endif

1
libs/tree-sitter-rust Submodule

Submodule libs/tree-sitter-rust added at 261b20226c

View File

@@ -44,9 +44,19 @@ puts "Emoji count: #{emojis.length}"
# Multi-line string with unicode
multi = <<~EOF
Emojis inside heredoc: 🎉🔥💀🧡💛💚💙💜🖤🤍🤎
End of block.
# Function recursion demo
factorial() {
local n="$1"
if ((n <= 1)); then
echo 1
else
local prev
prev=$(factorial $((n - 1)))
echo $((n * prev))
fi
}
log INFO "factorial(5) = $(factorial 5)"
EOF
puts multi

View File

@@ -4,7 +4,6 @@ extern "C" {
#include "../include/editor.h"
#include "../include/lsp.h"
#include "../include/main.h"
#include "../include/ts.h"
#include "../include/utils.h"
Editor *new_editor(const char *filename, Coord position, Coord size) {
@@ -24,25 +23,38 @@ Editor *new_editor(const char *filename, Coord position, Coord size) {
editor->cursor_preffered = UINT32_MAX;
editor->root = load(str, len, optimal_chunk_size(len));
free(str);
if (len <= (1024 * 128)) {
Language language = language_for_file(filename);
editor->parser = ts_parser_new();
editor->language = language.fn();
ts_parser_set_language(editor->parser, editor->language);
editor->query_file =
Language language = language_for_file(filename);
if (language.name != "unknown" && len <= (1024 * 128)) {
editor->ts.parser = ts_parser_new();
editor->ts.language = language.fn();
ts_parser_set_language(editor->ts.parser, editor->ts.language);
editor->ts.query_file =
get_exe_dir() + "/../grammar/" + language.name + ".scm";
request_add_to_lsp(language, editor);
}
return editor;
}
void free_tsset(TSSetMain *set) {
if (set->parser)
ts_parser_delete(set->parser);
if (set->tree)
ts_tree_delete(set->tree);
if (set->query)
ts_query_delete(set->query);
for (auto &inj : set->injections) {
if (inj.parser)
ts_parser_delete(inj.parser);
if (inj.tree)
ts_tree_delete(inj.tree);
if (inj.query)
ts_query_delete(inj.query);
}
}
void free_editor(Editor *editor) {
remove_from_lsp(editor);
ts_parser_delete(editor->parser);
if (editor->tree)
ts_tree_delete(editor->tree);
if (editor->query)
ts_query_delete(editor->query);
free_tsset(&editor->ts);
free_rope(editor->root);
delete editor;
}
@@ -61,23 +73,6 @@ void render_editor(Editor *editor) {
auto hook_it = v.begin();
while (hook_it != v.end() && hook_it->first <= editor->scroll.row)
++hook_it;
// Iterators for hints and warnings (both already sorted)
size_t hint_idx = 0;
size_t warn_idx = 0;
// Helper to advance hint iterator to current line
auto advance_hints_to = [&](uint32_t row) {
while (hint_idx < editor->hints.size() &&
editor->hints[hint_idx].pos.row < row)
++hint_idx;
};
auto advance_warns_to = [&](uint32_t row) {
while (warn_idx < editor->warnings.size() &&
editor->warnings[warn_idx].line < row)
++warn_idx;
};
std::shared_lock knot_lock(editor->knot_mtx);
if (editor->selection_active) {
Coord start, end;
@@ -127,46 +122,6 @@ void render_editor(Editor *editor) {
sel_start = line_to_byte(editor->root, start.row, nullptr) + start.col;
sel_end = line_to_byte(editor->root, end.row, nullptr) + end.col;
}
// Helper for warning colors based on type
auto warn_colors = [](int8_t type) -> std::pair<uint32_t, uint32_t> {
switch (type) {
case 1: // info
return {0x7fbfff, 0};
case 2: // warn
return {0xffd166, 0};
case 3: // error
return {0xff5f5f, 0};
default: // neutral
return {0xaaaaaa, 0};
}
};
// Helper to get nth line (0-based) from VAI text (ASCII/UTF-8)
auto ai_line_span = [&](const VAI &ai,
uint32_t n) -> std::pair<const char *, uint32_t> {
const char *p = ai.text;
uint32_t line_no = 0;
const char *start = p;
uint32_t len = 0;
for (uint32_t i = 0; i < ai.len; i++) {
if (ai.text[i] == '\n') {
if (line_no == n) {
len = i - (start - ai.text);
return {start, len};
}
line_no++;
start = ai.text + i + 1;
}
}
// last line (no trailing newline)
if (line_no == n) {
len = ai.text + ai.len - start;
return {start, len};
}
return {nullptr, 0};
};
Coord cursor = {UINT32_MAX, UINT32_MAX};
uint32_t line_index = editor->scroll.row;
SpanCursor span_cursor(editor->spans);
@@ -178,15 +133,7 @@ void render_editor(Editor *editor) {
uint32_t global_byte_offset = line_to_byte(editor->root, line_index, nullptr);
span_cursor.sync(global_byte_offset);
def_span_cursor.sync(global_byte_offset);
const bool ai_active = editor->ai.text && editor->ai.len > 0;
const uint32_t ai_row = ai_active ? editor->ai.pos.row : UINT32_MAX;
const uint32_t ai_lines = ai_active ? editor->ai.lines : 0;
while (rendered_rows < editor->size.row) {
advance_hints_to(line_index);
advance_warns_to(line_index);
const Fold *fold = fold_for_line(editor->folds, line_index);
if (fold) {
update(editor->position.row + rendered_rows, editor->position.col, "",
@@ -207,17 +154,10 @@ void render_editor(Editor *editor) {
for (; i < render_width; i++)
update(rendered_rows, i + render_x, " ", 0xc6c6c6, 0, 0);
rendered_rows++;
uint32_t skip_until = fold->end;
while (line_index <= skip_until) {
if (hook_it != v.end() && hook_it->first == line_index + 1)
hook_it++;
if (hint_idx < editor->hints.size() &&
editor->hints[hint_idx].pos.row == line_index)
hint_idx++;
if (warn_idx < editor->warnings.size() &&
editor->warnings[warn_idx].line == line_index)
warn_idx++;
uint32_t line_len;
char *line = next_line(it, &line_len);
if (!line)
@@ -239,15 +179,7 @@ void render_editor(Editor *editor) {
uint32_t current_byte_offset = 0;
if (rendered_rows == 0)
current_byte_offset += editor->scroll.col;
// AI handling: determine if this line is overridden by AI
bool ai_this_line =
ai_active && line_index >= ai_row && line_index <= ai_row + ai_lines;
bool ai_first_line = ai_this_line && line_index == ai_row;
while ((ai_this_line ? current_byte_offset <= line_len
: current_byte_offset < line_len) &&
rendered_rows < editor->size.row) {
while (current_byte_offset < line_len && rendered_rows < editor->size.row) {
uint32_t color = editor->cursor.row == line_index ? 0x222222 : 0;
if (current_byte_offset == 0 || rendered_rows == 0) {
const char *hook = nullptr;
@@ -276,71 +208,7 @@ void render_editor(Editor *editor) {
uint32_t col = 0;
uint32_t local_render_offset = 0;
uint32_t line_left = line_len - current_byte_offset;
// For AI extra lines (line > ai_row), we don't render real text
if (ai_this_line && !ai_first_line) {
const uint32_t ai_line_no = line_index - ai_row;
auto [aptr, alen] = ai_line_span(editor->ai, ai_line_no);
if (aptr && alen) {
uint32_t draw = std::min<uint32_t>(alen, render_width);
update(editor->position.row + rendered_rows, render_x,
std::string(aptr, draw).c_str(), 0x666666, 0, CF_ITALIC);
col = draw;
}
while (col < render_width) {
update(editor->position.row + rendered_rows, render_x + col, " ", 0,
0 | color, 0);
col++;
}
rendered_rows++;
break; // move to next screen row
}
while (line_left > 0 && col < render_width) {
// Render pending hints at this byte offset
while (hint_idx < editor->hints.size() &&
editor->hints[hint_idx].pos.row == line_index &&
editor->hints[hint_idx].pos.col ==
current_byte_offset + local_render_offset) {
const VHint &vh = editor->hints[hint_idx];
uint32_t draw = std::min<uint32_t>(vh.len, render_width - col);
if (draw == 0)
break;
update(editor->position.row + rendered_rows, render_x + col,
std::string(vh.text, draw).c_str(), 0x777777, 0 | color,
CF_ITALIC);
col += draw;
++hint_idx;
if (col >= render_width)
break;
}
if (col >= render_width)
break;
// AI first line: stop underlying text at ai.pos.col, then render AI,
// clip
if (ai_first_line &&
(current_byte_offset + local_render_offset) >= editor->ai.pos.col) {
// render AI first line
auto [aptr, alen] = ai_line_span(editor->ai, 0);
if (aptr && alen) {
uint32_t draw = std::min<uint32_t>(alen, render_width - col);
update(editor->position.row + rendered_rows, render_x + col,
std::string(aptr, draw).c_str(), 0x666666, 0 | color,
CF_ITALIC);
col += draw;
}
// fill rest and break
while (col < render_width) {
update(editor->position.row + rendered_rows, render_x + col, " ", 0,
0 | color, 0);
col++;
}
rendered_rows++;
current_byte_offset = line_len; // hide rest of real text
goto after_line_body;
}
if (line_index == editor->cursor.row &&
editor->cursor.col == (current_byte_offset + local_render_offset)) {
cursor.row = editor->position.row + rendered_rows;
@@ -379,14 +247,11 @@ void render_editor(Editor *editor) {
update(editor->position.row + rendered_rows, render_x + col - width,
"\x1b", fg, bg | color, fl);
}
if (line_index == editor->cursor.row &&
editor->cursor.col == (current_byte_offset + local_render_offset)) {
cursor.row = editor->position.row + rendered_rows;
cursor.col = render_x + col;
}
// Trailing selection block
if (editor->selection_active &&
global_byte_offset + line_len + 1 > sel_start &&
global_byte_offset + line_len + 1 <= sel_end && col < render_width) {
@@ -394,20 +259,6 @@ void render_editor(Editor *editor) {
0x555555 | color, 0);
col++;
}
// Render warning text at end (does not affect wrapping)
if (warn_idx < editor->warnings.size() &&
editor->warnings[warn_idx].line == line_index && col < render_width) {
const VWarn &w = editor->warnings[warn_idx];
auto [wfg, wbg] = warn_colors(w.type);
uint32_t draw = std::min<uint32_t>(w.len, render_width - col);
if (draw)
update(editor->position.row + rendered_rows, render_x + col,
std::string(w.text, draw).c_str(), wfg, wbg | color,
CF_ITALIC);
// do not advance col for padding skip; we still fill remaining spaces
}
while (col < render_width) {
update(editor->position.row + rendered_rows, render_x + col, " ", 0,
0 | color, 0);
@@ -415,11 +266,7 @@ void render_editor(Editor *editor) {
}
rendered_rows++;
current_byte_offset += local_render_offset;
after_line_body:
break; // proceed to next screen row
}
if (line_len == 0 ||
(current_byte_offset >= line_len && rendered_rows == 0)) {
uint32_t color = editor->cursor.row == line_index ? 0x222222 : 0;
@@ -452,17 +299,6 @@ void render_editor(Editor *editor) {
0x555555 | color, 0);
col++;
}
// warning on empty line
if (warn_idx < editor->warnings.size() &&
editor->warnings[warn_idx].line == line_index && col < render_width) {
const VWarn &w = editor->warnings[warn_idx];
auto [wfg, wbg] = warn_colors(w.type);
uint32_t draw = std::min<uint32_t>(w.len, render_width - col);
if (draw)
update(editor->position.row + rendered_rows, render_x + col,
std::string(w.text, draw).c_str(), wfg, wbg | color,
CF_ITALIC);
}
while (col < render_width) {
update(editor->position.row + rendered_rows, render_x + col, " ", 0,
0 | color, 0);

View File

@@ -361,7 +361,7 @@ void edit_erase(Editor *editor, Coord pos, int64_t len) {
std::unique_lock lock_2(editor->knot_mtx);
editor->root = erase(editor->root, start, byte_pos - start);
lock_2.unlock();
if (editor->tree) {
if (editor->ts.tree) {
TSInputEdit edit = {
.start_byte = start,
.old_end_byte = byte_pos,
@@ -405,7 +405,7 @@ void edit_erase(Editor *editor, Coord pos, int64_t len) {
std::unique_lock lock_2(editor->knot_mtx);
editor->root = erase(editor->root, byte_pos, end - byte_pos);
lock_2.unlock();
if (editor->tree) {
if (editor->ts.tree) {
TSInputEdit edit = {
.start_byte = byte_pos,
.old_end_byte = end,
@@ -454,7 +454,7 @@ void edit_insert(Editor *editor, Coord pos, char *data, uint32_t len) {
}
apply_line_insertion(editor, pos.row, rows);
apply_hook_insertion(editor, pos.row, rows);
if (editor->tree) {
if (editor->ts.tree) {
TSInputEdit edit = {
.start_byte = byte_pos,
.old_end_byte = byte_pos,

View File

@@ -535,9 +535,11 @@ static Highlight HL_UNDERLINE = {0, 0, 1 << 2, 100};
void editor_worker(Editor *editor) {
if (!editor || !editor->root)
return;
if (editor->query_file != "" && !editor->query)
editor->query = load_query(editor->query_file.c_str(), editor);
if (editor->parser && editor->query)
if (editor->root->char_count > (1024 * 200))
return;
if (editor->ts.query_file != "" && !editor->ts.query)
editor->ts.query = load_query(editor->ts.query_file.c_str(), &editor->ts);
if (editor->ts.parser && editor->ts.query)
ts_collect_spans(editor);
uint32_t prev_col, next_col;
word_boundaries_exclusive(editor, editor->cursor, &prev_col, &next_col);

View File

@@ -1,4 +1,5 @@
#include "../include/lsp.h"
#include "../include/maps.h"
#include <fcntl.h>
#include <signal.h>
#include <sys/poll.h>
@@ -11,22 +12,6 @@ std::unordered_map<uint8_t, LSPInstance *> active_lsps;
Queue<LSPOpenRequest> lsp_open_queue;
std::unordered_map<uint8_t, LSP> lsp_map = {
{
1,
{"clangd",
{
"clangd",
"--background-index",
"--clang-tidy",
"--completion-style=detailed",
"--header-insertion=iwyu",
"--log=error",
nullptr,
}},
},
};
static bool init_lsp(LSPInstance *lsp) {
log("starting %s\n", lsp->lsp->command);
int in_pipe[2];
@@ -66,8 +51,8 @@ LSPInstance *get_or_init_lsp(uint8_t lsp_id) {
std::unique_lock lock(active_lsps_mtx);
auto it = active_lsps.find(lsp_id);
if (it == active_lsps.end()) {
auto map_it = lsp_map.find(lsp_id);
if (map_it == lsp_map.end())
auto map_it = kLsps.find(lsp_id);
if (map_it == kLsps.end())
return nullptr;
LSPInstance *lsp = new LSPInstance();
lsp->lsp = &map_it->second;
@@ -78,9 +63,7 @@ LSPInstance *get_or_init_lsp(uint8_t lsp_id) {
LSPPending *pending = new LSPPending();
pending->method = "initialize";
pending->editor = nullptr;
pending->callback = [lsp]([[maybe_unused]] Editor *_e,
[[maybe_unused]] std::string _m,
[[maybe_unused]] json _j) {
pending->callback = [lsp](Editor *, std::string, json) {
lsp->initialized = true;
json initialized = {{"jsonrpc", "2.0"},
{"method", "initialized"},
@@ -327,7 +310,7 @@ void remove_from_lsp(Editor *editor) {
close_lsp(lsp_id);
}
void lsp_handle([[maybe_unused]] LSPInstance *lsp, json message) {
void lsp_handle(LSPInstance *, json message) {
std::string method = message.value("method", "");
if (method == "window/showMessage") {
if (message.contains("params")) {

162
src/ts.cc
View File

@@ -1,6 +1,7 @@
#include "../include/ts.h"
#include "../include/editor.h"
#include "../include/knot.h"
#include "../include/maps.h"
#include <algorithm>
#include <cstdint>
#include <fstream>
@@ -28,8 +29,8 @@ pcre2_code *get_re(const std::string &pattern) {
return re;
}
TSQuery *load_query(const char *query_path, Editor *editor) {
const TSLanguage *lang = editor->language;
TSQuery *load_query(const char *query_path, TSSetBase *set) {
const TSLanguage *lang = set->language;
std::ifstream file(query_path, std::ios::in | std::ios::binary);
if (!file.is_open())
return nullptr;
@@ -38,7 +39,7 @@ TSQuery *load_query(const char *query_path, Editor *editor) {
int errornumber = 0;
PCRE2_SIZE erroroffset = 0;
pcre2_code *re = pcre2_compile(
(PCRE2_SPTR) R"((@[A-Za-z0-9_.]+)|(;; \#[0-9a-fA-F]{6} \#[0-9a-fA-F]{6} [01] [01] [01] \d+))",
(PCRE2_SPTR) R"((@[A-Za-z0-9_.]+)|(;; \#[0-9a-fA-F]{6} \#[0-9a-fA-F]{6} [01] [01] [01] \d+)|(;; !(\w+)))",
PCRE2_ZERO_TERMINATED, 0, &errornumber, &erroroffset, nullptr);
if (!re)
return nullptr;
@@ -46,9 +47,8 @@ TSQuery *load_query(const char *query_path, Editor *editor) {
pcre2_match_data_create_from_pattern(re, nullptr);
std::map<std::string, int> capture_name_cache;
Highlight *c_hl = nullptr;
Language c_lang = {"unknown", nullptr, 0};
int i = 0;
int limit = 20;
editor->query_map.resize(limit);
PCRE2_SIZE offset = 0;
PCRE2_SIZE subject_length = highlight_query.size();
while (offset < subject_length) {
@@ -63,18 +63,18 @@ TSQuery *load_query(const char *query_path, Editor *editor) {
std::string capture_name = mct;
if (!capture_name_cache.count(capture_name)) {
if (c_hl) {
if (i >= limit) {
limit += 20;
editor->query_map.resize(limit);
}
editor->query_map[i] = *c_hl;
set->query_map[i] = *c_hl;
delete c_hl;
c_hl = nullptr;
}
if (c_lang.fn != nullptr) {
set->injection_map[i] = c_lang;
c_lang = {"unknown", nullptr, 0};
}
capture_name_cache[capture_name] = i;
i++;
}
} else if (mct.size() >= 2 && mct[0] == ';' && mct[1] == ';') {
} else if (mct.substr(0, 4) == ";; #") {
if (c_hl)
delete c_hl;
c_hl = new Highlight();
@@ -86,6 +86,10 @@ TSQuery *load_query(const char *query_path, Editor *editor) {
c_hl->priority = std::stoi(mct.substr(25));
c_hl->flags = (bold ? CF_BOLD : 0) | (italic ? CF_ITALIC : 0) |
(underline ? CF_UNDERLINE : 0);
} else if (mct.substr(0, 4) == ";; !") {
auto it = kLanguages.find(mct.substr(4));
if (it != kLanguages.end())
c_lang = it->second;
}
offset = ovector[1];
}
@@ -174,26 +178,32 @@ const char *read_ts(void *payload, uint32_t byte_index, TSPoint,
return leaf_from_offset(editor->root, byte_index, bytes_read);
}
static inline Highlight *safe_get(std::vector<Highlight> &vec, size_t index) {
if (index >= vec.size())
template <typename T>
static inline T *safe_get(std::map<uint16_t, T> &m, uint16_t key) {
auto it = m.find(key);
if (it == m.end())
return nullptr;
return &vec[index];
return &it->second;
}
void ts_collect_spans(Editor *editor) {
static int parse_counter = 0;
if (!editor->parser || !editor->root || !editor->query)
if (!editor->ts.parser || !editor->root || !editor->ts.query)
return;
TSInput tsinput = {
const bool injections_enabled = editor->root->char_count < (1024 * 32);
for (auto &inj : editor->ts.injections)
inj.ranges.clear();
TSInput tsinput{
.payload = editor,
.read = read_ts,
.encoding = TSInputEncodingUTF8,
.decode = nullptr,
};
TSTree *tree, *copy = nullptr;
TSTree *tree = nullptr;
TSTree *copy = nullptr;
std::unique_lock knot_mtx(editor->knot_mtx);
if (editor->tree)
copy = ts_tree_copy(editor->tree);
if (editor->ts.tree)
copy = ts_tree_copy(editor->ts.tree);
knot_mtx.unlock();
std::vector<TSInputEdit> edits;
TSInputEdit edit;
@@ -201,7 +211,7 @@ void ts_collect_spans(Editor *editor) {
while (editor->edit_queue.pop(edit)) {
edits.push_back(edit);
ts_tree_edit(copy, &edits.back());
};
}
if (copy && edits.empty() && parse_counter < 64) {
parse_counter++;
ts_tree_delete(copy);
@@ -210,41 +220,129 @@ void ts_collect_spans(Editor *editor) {
parse_counter = 0;
editor->spans.mid_parse = true;
std::shared_lock lock(editor->knot_mtx);
tree = ts_parser_parse(editor->parser, copy, tsinput);
tree = ts_parser_parse(editor->ts.parser, copy, tsinput);
lock.unlock();
if (copy)
ts_tree_delete(copy);
knot_mtx.lock();
if (editor->tree)
ts_tree_delete(editor->tree);
editor->tree = tree;
if (editor->ts.tree)
ts_tree_delete(editor->ts.tree);
editor->ts.tree = tree;
copy = ts_tree_copy(tree);
knot_mtx.unlock();
std::unordered_map<std::string, TSSet *> inj_lookup;
for (auto &inj : editor->ts.injections)
if (inj.lang != "unknown")
inj_lookup[inj.lang] = &inj;
TSQueryCursor *cursor = ts_query_cursor_new();
ts_query_cursor_exec(cursor, editor->query, ts_tree_root_node(copy));
ts_query_cursor_exec(cursor, editor->ts.query, ts_tree_root_node(copy));
std::vector<Span> new_spans;
new_spans.reserve(4096);
struct PendingRanges {
std::vector<TSRange> ranges;
TSSet *tsset = nullptr;
};
std::unordered_map<std::string, PendingRanges> pending_injections;
TSQueryMatch match;
while (ts_query_cursor_next_match(cursor, &match)) {
if (!ts_predicate(editor->query, match, editor->root))
if (!ts_predicate(editor->ts.query, match, editor->root))
continue;
for (uint32_t i = 0; i < match.capture_count; i++) {
TSQueryCapture cap = match.captures[i];
uint32_t start = ts_node_start_byte(cap.node);
uint32_t end = ts_node_end_byte(cap.node);
Highlight *hl = safe_get(editor->query_map, cap.index);
if (hl)
if (Highlight *hl = safe_get(editor->ts.query_map, cap.index))
new_spans.push_back({start, end, hl});
if (!injections_enabled)
continue;
if (Language *inj_lang = safe_get(editor->ts.injection_map, cap.index)) {
auto &pending = pending_injections[inj_lang->name];
if (!pending.tsset) {
if (auto it = inj_lookup.find(inj_lang->name);
it != inj_lookup.end()) {
pending.tsset = it->second;
} else {
TSSet fresh{};
fresh.lang = inj_lang->name;
fresh.parser = ts_parser_new();
ts_parser_set_language(fresh.parser, inj_lang->fn());
fresh.language = inj_lang->fn();
fresh.query_file =
get_exe_dir() + "/../grammar/" + inj_lang->name + ".scm";
fresh.query = load_query(fresh.query_file.c_str(), &fresh);
editor->ts.injections.push_back(std::move(fresh));
pending.tsset = &editor->ts.injections.back();
inj_lookup[inj_lang->name] = pending.tsset;
}
}
pending.ranges.push_back(TSRange{
ts_node_start_point(cap.node),
ts_node_end_point(cap.node),
start,
end,
});
}
}
}
auto overlaps = [](const Span &s, const TSRange &r) {
return !(s.end <= r.start_byte || s.start >= r.end_byte);
};
if (injections_enabled) {
for (auto &[lang_name, pending] : pending_injections) {
TSSet *tsset = pending.tsset;
if (!tsset)
continue;
tsset->ranges = std::move(pending.ranges);
if (tsset->ranges.size() > 1)
new_spans.erase(std::remove_if(new_spans.begin(), new_spans.end(),
[&](const Span &sp) {
return std::any_of(
tsset->ranges.begin(),
tsset->ranges.end(),
[&](const TSRange &r) {
return overlaps(sp, r);
});
}),
new_spans.end());
}
for (auto &inj : editor->ts.injections) {
if (!inj.parser || !inj.query || inj.ranges.size() == 0)
continue;
ts_parser_set_included_ranges(inj.parser, inj.ranges.data(),
inj.ranges.size());
std::pair<uint32_t, int64_t> span_edit;
while (editor->spans.edits.pop(span_edit))
apply_edit(new_spans, span_edit.first, span_edit.second);
knot_mtx.lock();
TSTree *inj_tree = ts_parser_parse(inj.parser, inj.tree, tsinput);
knot_mtx.unlock();
if (inj.tree)
ts_tree_delete(inj.tree);
inj.tree = inj_tree;
TSTree *inj_copy = ts_tree_copy(inj_tree);
TSQueryCursor *inj_cursor = ts_query_cursor_new();
ts_query_cursor_exec(inj_cursor, inj.query, ts_tree_root_node(inj_copy));
TSQueryMatch inj_match;
while (ts_query_cursor_next_match(inj_cursor, &inj_match)) {
if (!ts_predicate(inj.query, inj_match, editor->root))
continue;
for (uint32_t i = 0; i < inj_match.capture_count; i++) {
TSQueryCapture cap = inj_match.captures[i];
uint32_t start = ts_node_start_byte(cap.node);
uint32_t end = ts_node_end_byte(cap.node);
if (Highlight *hl = safe_get(inj.query_map, cap.index))
new_spans.push_back({start, end, hl});
}
}
ts_query_cursor_delete(inj_cursor);
ts_tree_delete(inj_copy);
}
}
ts_query_cursor_delete(cursor);
ts_tree_delete(copy);
std::sort(new_spans.begin(), new_spans.end());
std::pair<uint32_t, int64_t> span_edit;
while (editor->spans.edits.pop(span_edit))
apply_edit(new_spans, span_edit.first, span_edit.second);
std::sort(new_spans.begin(), new_spans.end());
std::unique_lock span_mtx(editor->spans.mtx);
editor->spans.mid_parse = false;
editor->spans.spans.swap(new_spans);
span_mtx.unlock();
}

View File

@@ -2,22 +2,8 @@ extern "C" {
#include "../libs/libgrapheme/grapheme.h"
#include "../libs/unicode_width/unicode_width.h"
}
#include "../include/maps.h"
#include "../include/utils.h"
#include <algorithm>
#include <cctype>
#include <cstdarg>
#include <cstdint>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <filesystem>
#include <fstream>
#include <limits.h>
#include <magic.h>
#include <string.h>
#include <string>
#include <unistd.h>
#include <unordered_map>
static std::string percent_encode(const std::string &s) {
static const char *hex = "0123456789ABCDEF";
@@ -256,61 +242,21 @@ char *detect_file_type(const char *filename) {
return result;
}
static const std::unordered_map<std::string, Language> ext_map = {
{"sh", {"bash", tree_sitter_bash}},
{"bash", {"bash", tree_sitter_bash}},
{"c", {"c", tree_sitter_c, 1}},
{"cpp", {"cpp", tree_sitter_cpp, 1}},
{"cxx", {"cpp", tree_sitter_cpp, 1}},
{"cc", {"cpp", tree_sitter_cpp, 1}},
{"hpp", {"cpp", tree_sitter_cpp, 1}},
{"hh", {"cpp", tree_sitter_cpp, 1}},
{"hxx", {"cpp", tree_sitter_cpp, 1}},
{"h", {"cpp", tree_sitter_cpp, 1}},
{"css", {"css", tree_sitter_css}},
{"fish", {"fish", tree_sitter_fish}},
{"go", {"go", tree_sitter_go}},
{"hs", {"haskell", tree_sitter_haskell}},
{"html", {"html", tree_sitter_html}},
{"htm", {"html", tree_sitter_html}},
{"js", {"javascript", tree_sitter_javascript}},
{"json", {"json", tree_sitter_json}},
{"lua", {"lua", tree_sitter_lua}},
{"mk", {"make", tree_sitter_make}},
{"makefile", {"make", tree_sitter_make}},
{"py", {"python", tree_sitter_python}},
{"rb", {"ruby", tree_sitter_ruby}},
};
static const std::unordered_map<std::string, Language> mime_map = {
{"text/x-c", {"c", tree_sitter_c, 1}},
{"text/x-c++", {"cpp", tree_sitter_cpp, 1}},
{"text/x-shellscript", {"bash", tree_sitter_bash}},
{"application/json", {"json", tree_sitter_json}},
{"text/javascript", {"javascript", tree_sitter_javascript}},
{"text/html", {"html", tree_sitter_html}},
{"text/css", {"css", tree_sitter_css}},
{"text/x-python", {"python", tree_sitter_python}},
{"text/x-ruby", {"ruby", tree_sitter_ruby}},
{"text/x-go", {"go", tree_sitter_go}},
{"text/x-haskell", {"haskell", tree_sitter_haskell}},
{"text/x-lua", {"lua", tree_sitter_lua}},
};
Language language_for_file(const char *filename) {
std::string ext = file_extension(filename);
std::string lang_name;
if (!ext.empty()) {
auto it = ext_map.find(ext);
if (it != ext_map.end())
return it->second;
auto it = kExtToLang.find(ext);
if (it != kExtToLang.end())
return kLanguages.find(it->second)->second;
}
char *mime = detect_file_type(filename);
if (mime) {
std::string mime_type(mime);
free(mime);
auto it = mime_map.find(mime_type);
if (it != mime_map.end())
return it->second;
auto it = kMimeToLang.find(mime_type);
if (it != kMimeToLang.end())
return kLanguages.find(it->second)->second;
}
return {"unknown", nullptr};
}