Make tree-sitter read function utilize faster rope reading

- By removing any allocations/deallocation that the function would have used
This commit is contained in:
2025-12-10 18:02:37 +00:00
parent f60d6ba0d8
commit bc67d2e682
4 changed files with 69 additions and 48 deletions

View File

@@ -104,11 +104,6 @@ struct Editor {
// - built by tree-sitter helpers // - built by tree-sitter helpers
}; };
typedef struct TSLoad {
Editor *editor;
char *prev = nullptr;
} TSLoad;
Editor *new_editor(const char *filename, Coord position, Coord size); Editor *new_editor(const char *filename, Coord position, Coord size);
void free_editor(Editor *editor); void free_editor(Editor *editor);
void render_editor(Editor *editor); void render_editor(Editor *editor);

View File

@@ -36,6 +36,7 @@ typedef struct LeafIterator {
Knot *node; Knot *node;
uint8_t top; uint8_t top;
uint32_t offset; uint32_t offset;
uint32_t adjustment;
Knot *stack[64]; Knot *stack[64];
} LeafIterator; } LeafIterator;
@@ -122,13 +123,15 @@ char *next_line(LineIterator *it);
// Used to start an iterator over leaf data // Used to start an iterator over leaf data
// root is the root of the rope // root is the root of the rope
// the caller must free the iterator after use // the caller must free the iterator after use
LeafIterator *begin_k_iter(Knot *root); // start_offset is the byte from which the iterator should start
LeafIterator *begin_k_iter(Knot *root, uint32_t start_offset);
// Returns the next leaf data as a null terminated string // Returns the next leaf data as a null terminated string
// `it` is the iterator returned from begin_k_iter // `it` is the iterator returned from begin_k_iter
// ! Strings returned must never be freed by the caller ! // ! Strings returned must never be freed by the caller !
// to mutate the string a copy must be made // to mutate the string a copy must be made
char *next_leaf(LeafIterator *it); // `out_len` is set to the length of the returned string
char *next_leaf(LeafIterator *it, uint32_t *out_len);
// Used to start an iterator over byte data (one byte at a time) // Used to start an iterator over byte data (one byte at a time)
// Uses leaf iterator internally // Uses leaf iterator internally
@@ -140,6 +143,13 @@ ByteIterator *begin_b_iter(Knot *root);
// `it` is the iterator returned from begin_b_iter // `it` is the iterator returned from begin_b_iter
char next_byte(ByteIterator *it); char next_byte(ByteIterator *it);
// Returns a leaf data as a null terminated string
// root is the root of the rope
// start_offset is the byte from which the leaf data should start
// `out_len` is set to the length of the returned string
// return value must never be freed
char *leaf_from_offset(Knot *root, uint32_t start_offset, uint32_t *out_len);
// Used to search for a pattern in the rope // Used to search for a pattern in the rope
// Pattern is a null terminated string representing a regular expression (DFA // Pattern is a null terminated string representing a regular expression (DFA
// compliant) I.e some forms of backtracking etc. are not supported // compliant) I.e some forms of backtracking etc. are not supported

View File

@@ -570,31 +570,32 @@ char *next_line(LineIterator *it) {
return nullptr; return nullptr;
} }
LeafIterator *begin_k_iter(Knot *root) { LeafIterator *begin_k_iter(Knot *root, uint32_t start_offset) {
if (!root) if (!root)
return nullptr; return nullptr;
LeafIterator *it = (LeafIterator *)malloc(sizeof(LeafIterator)); LeafIterator *it = (LeafIterator *)malloc(sizeof(LeafIterator));
if (!it) if (!it)
return nullptr; return nullptr;
it->top = 0; it->top = 0;
it->adjustment = 0;
Knot *curr = root; Knot *curr = root;
while (curr) { while (curr) {
it->stack[it->top++] = curr; it->stack[it->top++] = curr;
if (!curr->left && !curr->right) { if (!curr->left && !curr->right) {
if (start_offset > curr->char_count) {
free(it);
return nullptr;
}
it->node = curr; it->node = curr;
it->adjustment = start_offset;
return it; return it;
} }
uint32_t left_size = (curr->left) ? curr->left->char_count : 0;
if (start_offset < left_size) {
curr = curr->left; curr = curr->left;
if (!curr) { } else {
curr = it->stack[--it->top]->right; start_offset -= left_size;
Knot *temp = it->stack[it->top]; curr = curr->right;
it->stack[it->top++] = temp;
curr = temp->left ? temp->left : temp->right;
Knot *parent = it->stack[it->top - 1];
curr = parent->left;
if (!curr) {
curr = parent->right;
}
} }
} }
free(it); free(it);
@@ -602,11 +603,14 @@ LeafIterator *begin_k_iter(Knot *root) {
} }
// Caller must never free the returned string // Caller must never free the returned string
char *next_leaf(LeafIterator *it) { char *next_leaf(LeafIterator *it, uint32_t *out_len) {
if (!it || !it->node) if (!it || !it->node)
return nullptr; return nullptr;
char *data_to_return = it->node->data; char *data_to_return = it->node->data + it->adjustment;
data_to_return[it->node->char_count] = '\0'; if (out_len)
*out_len = it->node->char_count - it->adjustment;
it->node->data[it->node->char_count] = '\0';
it->adjustment = 0;
Knot *prev_leaf = it->node; Knot *prev_leaf = it->node;
Knot *parent = nullptr; Knot *parent = nullptr;
while (it->top > 0) { while (it->top > 0) {
@@ -632,7 +636,7 @@ char *next_leaf(LeafIterator *it) {
ByteIterator *begin_b_iter(Knot *root) { ByteIterator *begin_b_iter(Knot *root) {
ByteIterator *b_it = (ByteIterator *)malloc(sizeof(ByteIterator)); ByteIterator *b_it = (ByteIterator *)malloc(sizeof(ByteIterator));
LeafIterator *l_it = begin_k_iter(root); LeafIterator *l_it = begin_k_iter(root, 0);
b_it->it = l_it; b_it->it = l_it;
b_it->offset_g = 0; b_it->offset_g = 0;
b_it->offset_l = 0; b_it->offset_l = 0;
@@ -647,21 +651,46 @@ char next_byte(ByteIterator *it) {
} else { } else {
it->offset_g += it->offset_l; it->offset_g += it->offset_l;
it->offset_l = 1; it->offset_l = 1;
char *data = next_leaf(it->it); char *data = next_leaf(it->it, &it->char_count);
if (!data) if (!data)
return '\0'; return '\0';
it->char_count = strlen(data);
while (it->char_count <= 0) { while (it->char_count <= 0) {
data = next_leaf(it->it); data = next_leaf(it->it, &it->char_count);
if (!data) if (!data)
return '\0'; return '\0';
it->char_count = strlen(data);
} }
it->data = data; it->data = data;
return *it->data; return *it->data;
} }
} }
// Caller must NOT free returned string.
// Returns nullptr if offset is invalid or no leaf found.
char *leaf_from_offset(Knot *root, uint32_t start_offset, uint32_t *out_len) {
if (!root)
return nullptr;
Knot *curr = root;
while (curr) {
if (!curr->left && !curr->right) {
if (start_offset > curr->char_count)
return nullptr;
char *result = curr->data + start_offset;
if (out_len)
*out_len = curr->char_count - start_offset;
curr->data[curr->char_count] = '\0';
return result;
}
uint32_t left_size = curr->left ? curr->left->char_count : 0;
if (start_offset < left_size) {
curr = curr->left;
} else {
start_offset -= left_size;
curr = curr->right;
}
}
return nullptr;
}
std::vector<std::pair<size_t, size_t>> search_rope(Knot *root, std::vector<std::pair<size_t, size_t>> search_rope(Knot *root,
const char *pattern) { const char *pattern) {
std::vector<std::pair<size_t, size_t>> results; std::vector<std::pair<size_t, size_t>> results;
@@ -675,7 +704,7 @@ std::vector<std::pair<size_t, size_t>> search_rope(Knot *root,
} }
pcre2_match_data *mdata = pcre2_match_data_create(128, nullptr); pcre2_match_data *mdata = pcre2_match_data_create(128, nullptr);
int workspace[PCRE_WORKSPACE_SIZE]; int workspace[PCRE_WORKSPACE_SIZE];
LeafIterator *it = begin_k_iter(root); LeafIterator *it = begin_k_iter(root, 0);
if (!it) { if (!it) {
pcre2_code_free(re); pcre2_code_free(re);
pcre2_match_data_free(mdata); pcre2_match_data_free(mdata);
@@ -686,7 +715,7 @@ std::vector<std::pair<size_t, size_t>> search_rope(Knot *root,
bool match_in_progress = false; bool match_in_progress = false;
int flags = PCRE2_PARTIAL_SOFT; int flags = PCRE2_PARTIAL_SOFT;
while (1) { while (1) {
const char *chunk_start = next_leaf(it); const char *chunk_start = next_leaf(it, nullptr);
if (!chunk_start) if (!chunk_start)
break; break;
size_t chunk_len = strlen(chunk_start); size_t chunk_len = strlen(chunk_start);

View File

@@ -141,24 +141,12 @@ const char *read_ts(void *payload, uint32_t byte_index, TSPoint,
*bytes_read = 0; *bytes_read = 0;
return ""; return "";
} }
TSLoad *load = (TSLoad *)payload; Editor *editor = (Editor *)payload;
Knot *root = load->editor->root; if (byte_index >= editor->root->char_count) {
if (load->prev)
free(load->prev);
if (byte_index >= root->char_count) {
*bytes_read = 0; *bytes_read = 0;
load->prev = nullptr;
return ""; return "";
} }
uint32_t chunk_size = 4096; return leaf_from_offset(editor->root, byte_index, bytes_read);
uint32_t remaining = root->char_count - byte_index;
uint32_t len_to_read = remaining > chunk_size ? chunk_size : remaining;
std::shared_lock lock(load->editor->knot_mtx);
char *buffer = read(root, byte_index, len_to_read);
lock.unlock();
load->prev = buffer;
*bytes_read = len_to_read;
return buffer;
} }
static inline Highlight *safe_get(std::vector<Highlight> &vec, size_t index) { static inline Highlight *safe_get(std::vector<Highlight> &vec, size_t index) {
@@ -170,9 +158,8 @@ static inline Highlight *safe_get(std::vector<Highlight> &vec, size_t index) {
void ts_collect_spans(Editor *editor) { void ts_collect_spans(Editor *editor) {
if (!editor->parser || !editor->root || !editor->query) if (!editor->parser || !editor->root || !editor->query)
return; return;
TSLoad load = {editor, nullptr};
TSInput tsinput = { TSInput tsinput = {
.payload = &load, .payload = editor,
.read = read_ts, .read = read_ts,
.encoding = TSInputEncodingUTF8, .encoding = TSInputEncodingUTF8,
.decode = nullptr, .decode = nullptr,
@@ -196,7 +183,9 @@ void ts_collect_spans(Editor *editor) {
return; return;
} }
editor->spans.mid_parse = true; editor->spans.mid_parse = true;
std::shared_lock lock(editor->knot_mtx);
tree = ts_parser_parse(editor->parser, copy, tsinput); tree = ts_parser_parse(editor->parser, copy, tsinput);
lock.unlock();
if (copy) if (copy)
ts_tree_delete(copy); ts_tree_delete(copy);
knot_mtx.lock(); knot_mtx.lock();
@@ -228,8 +217,6 @@ void ts_collect_spans(Editor *editor) {
} }
ts_query_cursor_delete(cursor); ts_query_cursor_delete(cursor);
ts_tree_delete(copy); ts_tree_delete(copy);
if (load.prev)
free(load.prev);
if (!running) if (!running)
return; return;
std::sort(new_spans.begin(), new_spans.end()); std::sort(new_spans.begin(), new_spans.end());