Make tree-sitter read function utilize faster rope reading

- By removing any allocations/deallocation that the function would have used
This commit is contained in:
2025-12-10 18:02:37 +00:00
parent f60d6ba0d8
commit bc67d2e682
4 changed files with 69 additions and 48 deletions

View File

@@ -104,11 +104,6 @@ struct Editor {
// - built by tree-sitter helpers
};
typedef struct TSLoad {
Editor *editor;
char *prev = nullptr;
} TSLoad;
Editor *new_editor(const char *filename, Coord position, Coord size);
void free_editor(Editor *editor);
void render_editor(Editor *editor);

View File

@@ -36,6 +36,7 @@ typedef struct LeafIterator {
Knot *node;
uint8_t top;
uint32_t offset;
uint32_t adjustment;
Knot *stack[64];
} LeafIterator;
@@ -122,13 +123,15 @@ char *next_line(LineIterator *it);
// Used to start an iterator over leaf data
// root is the root of the rope
// the caller must free the iterator after use
LeafIterator *begin_k_iter(Knot *root);
// start_offset is the byte from which the iterator should start
LeafIterator *begin_k_iter(Knot *root, uint32_t start_offset);
// Returns the next leaf data as a null terminated string
// `it` is the iterator returned from begin_k_iter
// ! Strings returned must never be freed by the caller !
// to mutate the string a copy must be made
char *next_leaf(LeafIterator *it);
// `out_len` is set to the length of the returned string
char *next_leaf(LeafIterator *it, uint32_t *out_len);
// Used to start an iterator over byte data (one byte at a time)
// Uses leaf iterator internally
@@ -140,6 +143,13 @@ ByteIterator *begin_b_iter(Knot *root);
// `it` is the iterator returned from begin_b_iter
char next_byte(ByteIterator *it);
// Returns a leaf data as a null terminated string
// root is the root of the rope
// start_offset is the byte from which the leaf data should start
// `out_len` is set to the length of the returned string
// return value must never be freed
char *leaf_from_offset(Knot *root, uint32_t start_offset, uint32_t *out_len);
// Used to search for a pattern in the rope
// Pattern is a null terminated string representing a regular expression (DFA
// compliant) I.e some forms of backtracking etc. are not supported

View File

@@ -570,31 +570,32 @@ char *next_line(LineIterator *it) {
return nullptr;
}
LeafIterator *begin_k_iter(Knot *root) {
LeafIterator *begin_k_iter(Knot *root, uint32_t start_offset) {
if (!root)
return nullptr;
LeafIterator *it = (LeafIterator *)malloc(sizeof(LeafIterator));
if (!it)
return nullptr;
it->top = 0;
it->adjustment = 0;
Knot *curr = root;
while (curr) {
it->stack[it->top++] = curr;
if (!curr->left && !curr->right) {
if (start_offset > curr->char_count) {
free(it);
return nullptr;
}
it->node = curr;
it->adjustment = start_offset;
return it;
}
curr = curr->left;
if (!curr) {
curr = it->stack[--it->top]->right;
Knot *temp = it->stack[it->top];
it->stack[it->top++] = temp;
curr = temp->left ? temp->left : temp->right;
Knot *parent = it->stack[it->top - 1];
curr = parent->left;
if (!curr) {
curr = parent->right;
}
uint32_t left_size = (curr->left) ? curr->left->char_count : 0;
if (start_offset < left_size) {
curr = curr->left;
} else {
start_offset -= left_size;
curr = curr->right;
}
}
free(it);
@@ -602,11 +603,14 @@ LeafIterator *begin_k_iter(Knot *root) {
}
// Caller must never free the returned string
char *next_leaf(LeafIterator *it) {
char *next_leaf(LeafIterator *it, uint32_t *out_len) {
if (!it || !it->node)
return nullptr;
char *data_to_return = it->node->data;
data_to_return[it->node->char_count] = '\0';
char *data_to_return = it->node->data + it->adjustment;
if (out_len)
*out_len = it->node->char_count - it->adjustment;
it->node->data[it->node->char_count] = '\0';
it->adjustment = 0;
Knot *prev_leaf = it->node;
Knot *parent = nullptr;
while (it->top > 0) {
@@ -632,7 +636,7 @@ char *next_leaf(LeafIterator *it) {
ByteIterator *begin_b_iter(Knot *root) {
ByteIterator *b_it = (ByteIterator *)malloc(sizeof(ByteIterator));
LeafIterator *l_it = begin_k_iter(root);
LeafIterator *l_it = begin_k_iter(root, 0);
b_it->it = l_it;
b_it->offset_g = 0;
b_it->offset_l = 0;
@@ -647,21 +651,46 @@ char next_byte(ByteIterator *it) {
} else {
it->offset_g += it->offset_l;
it->offset_l = 1;
char *data = next_leaf(it->it);
char *data = next_leaf(it->it, &it->char_count);
if (!data)
return '\0';
it->char_count = strlen(data);
while (it->char_count <= 0) {
data = next_leaf(it->it);
data = next_leaf(it->it, &it->char_count);
if (!data)
return '\0';
it->char_count = strlen(data);
}
it->data = data;
return *it->data;
}
}
// Caller must NOT free returned string.
// Returns nullptr if offset is invalid or no leaf found.
char *leaf_from_offset(Knot *root, uint32_t start_offset, uint32_t *out_len) {
if (!root)
return nullptr;
Knot *curr = root;
while (curr) {
if (!curr->left && !curr->right) {
if (start_offset > curr->char_count)
return nullptr;
char *result = curr->data + start_offset;
if (out_len)
*out_len = curr->char_count - start_offset;
curr->data[curr->char_count] = '\0';
return result;
}
uint32_t left_size = curr->left ? curr->left->char_count : 0;
if (start_offset < left_size) {
curr = curr->left;
} else {
start_offset -= left_size;
curr = curr->right;
}
}
return nullptr;
}
std::vector<std::pair<size_t, size_t>> search_rope(Knot *root,
const char *pattern) {
std::vector<std::pair<size_t, size_t>> results;
@@ -675,7 +704,7 @@ std::vector<std::pair<size_t, size_t>> search_rope(Knot *root,
}
pcre2_match_data *mdata = pcre2_match_data_create(128, nullptr);
int workspace[PCRE_WORKSPACE_SIZE];
LeafIterator *it = begin_k_iter(root);
LeafIterator *it = begin_k_iter(root, 0);
if (!it) {
pcre2_code_free(re);
pcre2_match_data_free(mdata);
@@ -686,7 +715,7 @@ std::vector<std::pair<size_t, size_t>> search_rope(Knot *root,
bool match_in_progress = false;
int flags = PCRE2_PARTIAL_SOFT;
while (1) {
const char *chunk_start = next_leaf(it);
const char *chunk_start = next_leaf(it, nullptr);
if (!chunk_start)
break;
size_t chunk_len = strlen(chunk_start);

View File

@@ -141,24 +141,12 @@ const char *read_ts(void *payload, uint32_t byte_index, TSPoint,
*bytes_read = 0;
return "";
}
TSLoad *load = (TSLoad *)payload;
Knot *root = load->editor->root;
if (load->prev)
free(load->prev);
if (byte_index >= root->char_count) {
Editor *editor = (Editor *)payload;
if (byte_index >= editor->root->char_count) {
*bytes_read = 0;
load->prev = nullptr;
return "";
}
uint32_t chunk_size = 4096;
uint32_t remaining = root->char_count - byte_index;
uint32_t len_to_read = remaining > chunk_size ? chunk_size : remaining;
std::shared_lock lock(load->editor->knot_mtx);
char *buffer = read(root, byte_index, len_to_read);
lock.unlock();
load->prev = buffer;
*bytes_read = len_to_read;
return buffer;
return leaf_from_offset(editor->root, byte_index, bytes_read);
}
static inline Highlight *safe_get(std::vector<Highlight> &vec, size_t index) {
@@ -170,9 +158,8 @@ static inline Highlight *safe_get(std::vector<Highlight> &vec, size_t index) {
void ts_collect_spans(Editor *editor) {
if (!editor->parser || !editor->root || !editor->query)
return;
TSLoad load = {editor, nullptr};
TSInput tsinput = {
.payload = &load,
.payload = editor,
.read = read_ts,
.encoding = TSInputEncodingUTF8,
.decode = nullptr,
@@ -196,7 +183,9 @@ void ts_collect_spans(Editor *editor) {
return;
}
editor->spans.mid_parse = true;
std::shared_lock lock(editor->knot_mtx);
tree = ts_parser_parse(editor->parser, copy, tsinput);
lock.unlock();
if (copy)
ts_tree_delete(copy);
knot_mtx.lock();
@@ -228,8 +217,6 @@ void ts_collect_spans(Editor *editor) {
}
ts_query_cursor_delete(cursor);
ts_tree_delete(copy);
if (load.prev)
free(load.prev);
if (!running)
return;
std::sort(new_spans.begin(), new_spans.end());