Compare commits
3 Commits
f60d6ba0d8
...
303f008b6f
| Author | SHA1 | Date | |
|---|---|---|---|
|
303f008b6f
|
|||
|
774a21241e
|
|||
|
bc67d2e682
|
@@ -281,7 +281,7 @@
|
|||||||
(extglob_pattern)
|
(extglob_pattern)
|
||||||
] @string.regexp
|
] @string.regexp
|
||||||
|
|
||||||
;; #fbb152 #000000 0 0 0 3
|
;; #51eeba #000000 0 0 0 3
|
||||||
((program
|
((program
|
||||||
.
|
.
|
||||||
(comment) @keyword.directive @nospell)
|
(comment) @keyword.directive @nospell)
|
||||||
|
|||||||
@@ -245,23 +245,11 @@
|
|||||||
;; #AAAAAA #000000 0 1 0 1
|
;; #AAAAAA #000000 0 1 0 1
|
||||||
(comment) @comment
|
(comment) @comment
|
||||||
|
|
||||||
(program
|
;; #51eeba #000000 0 0 0 3
|
||||||
(comment)+ @comment.documentation
|
((program
|
||||||
(class))
|
.
|
||||||
|
(comment) @shebang @nospell)
|
||||||
(module
|
(#match? @shebang "^#!/"))
|
||||||
(comment)+ @comment.documentation
|
|
||||||
(body_statement
|
|
||||||
(class)))
|
|
||||||
|
|
||||||
(class
|
|
||||||
(comment)+ @comment.documentation
|
|
||||||
(body_statement
|
|
||||||
(method)))
|
|
||||||
|
|
||||||
(body_statement
|
|
||||||
(comment)+ @comment.documentation
|
|
||||||
(method))
|
|
||||||
|
|
||||||
;; #ffffff #000000 0 0 0 1
|
;; #ffffff #000000 0 0 0 1
|
||||||
[
|
[
|
||||||
|
|||||||
@@ -104,11 +104,6 @@ struct Editor {
|
|||||||
// - built by tree-sitter helpers
|
// - built by tree-sitter helpers
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef struct TSLoad {
|
|
||||||
Editor *editor;
|
|
||||||
char *prev = nullptr;
|
|
||||||
} TSLoad;
|
|
||||||
|
|
||||||
Editor *new_editor(const char *filename, Coord position, Coord size);
|
Editor *new_editor(const char *filename, Coord position, Coord size);
|
||||||
void free_editor(Editor *editor);
|
void free_editor(Editor *editor);
|
||||||
void render_editor(Editor *editor);
|
void render_editor(Editor *editor);
|
||||||
|
|||||||
@@ -36,6 +36,7 @@ typedef struct LeafIterator {
|
|||||||
Knot *node;
|
Knot *node;
|
||||||
uint8_t top;
|
uint8_t top;
|
||||||
uint32_t offset;
|
uint32_t offset;
|
||||||
|
uint32_t adjustment;
|
||||||
Knot *stack[64];
|
Knot *stack[64];
|
||||||
} LeafIterator;
|
} LeafIterator;
|
||||||
|
|
||||||
@@ -122,13 +123,15 @@ char *next_line(LineIterator *it);
|
|||||||
// Used to start an iterator over leaf data
|
// Used to start an iterator over leaf data
|
||||||
// root is the root of the rope
|
// root is the root of the rope
|
||||||
// the caller must free the iterator after use
|
// the caller must free the iterator after use
|
||||||
LeafIterator *begin_k_iter(Knot *root);
|
// start_offset is the byte from which the iterator should start
|
||||||
|
LeafIterator *begin_k_iter(Knot *root, uint32_t start_offset);
|
||||||
|
|
||||||
// Returns the next leaf data as a null terminated string
|
// Returns the next leaf data as a null terminated string
|
||||||
// `it` is the iterator returned from begin_k_iter
|
// `it` is the iterator returned from begin_k_iter
|
||||||
// ! Strings returned must never be freed by the caller !
|
// ! Strings returned must never be freed by the caller !
|
||||||
// to mutate the string a copy must be made
|
// to mutate the string a copy must be made
|
||||||
char *next_leaf(LeafIterator *it);
|
// `out_len` is set to the length of the returned string
|
||||||
|
char *next_leaf(LeafIterator *it, uint32_t *out_len);
|
||||||
|
|
||||||
// Used to start an iterator over byte data (one byte at a time)
|
// Used to start an iterator over byte data (one byte at a time)
|
||||||
// Uses leaf iterator internally
|
// Uses leaf iterator internally
|
||||||
@@ -140,6 +143,13 @@ ByteIterator *begin_b_iter(Knot *root);
|
|||||||
// `it` is the iterator returned from begin_b_iter
|
// `it` is the iterator returned from begin_b_iter
|
||||||
char next_byte(ByteIterator *it);
|
char next_byte(ByteIterator *it);
|
||||||
|
|
||||||
|
// Returns a leaf data as a null terminated string
|
||||||
|
// root is the root of the rope
|
||||||
|
// start_offset is the byte from which the leaf data should start
|
||||||
|
// `out_len` is set to the length of the returned string
|
||||||
|
// return value must never be freed
|
||||||
|
char *leaf_from_offset(Knot *root, uint32_t start_offset, uint32_t *out_len);
|
||||||
|
|
||||||
// Used to search for a pattern in the rope
|
// Used to search for a pattern in the rope
|
||||||
// Pattern is a null terminated string representing a regular expression (DFA
|
// Pattern is a null terminated string representing a regular expression (DFA
|
||||||
// compliant) I.e some forms of backtracking etc. are not supported
|
// compliant) I.e some forms of backtracking etc. are not supported
|
||||||
|
|||||||
@@ -2,10 +2,14 @@
|
|||||||
#define TS_H
|
#define TS_H
|
||||||
|
|
||||||
#include "./editor.h"
|
#include "./editor.h"
|
||||||
|
#include <pcre2.h>
|
||||||
|
|
||||||
#define HEX(s) (static_cast<uint32_t>(std::stoul(s, nullptr, 16)))
|
#define HEX(s) (static_cast<uint32_t>(std::stoul(s, nullptr, 16)))
|
||||||
|
|
||||||
|
extern std::unordered_map<std::string, pcre2_code *> regex_cache;
|
||||||
|
|
||||||
TSQuery *load_query(const char *query_path, Editor *editor);
|
TSQuery *load_query(const char *query_path, Editor *editor);
|
||||||
void ts_collect_spans(Editor *editor);
|
void ts_collect_spans(Editor *editor);
|
||||||
|
void clear_regex_cache();
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -12,8 +12,11 @@ Editor *new_editor(const char *filename, Coord position, Coord size) {
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
uint32_t len = 0;
|
uint32_t len = 0;
|
||||||
char *str = load_file(filename, &len);
|
char *str = load_file(filename, &len);
|
||||||
if (!str)
|
if (!str) {
|
||||||
|
free_editor(editor);
|
||||||
|
log("me?");
|
||||||
return nullptr;
|
return nullptr;
|
||||||
|
}
|
||||||
editor->filename = filename;
|
editor->filename = filename;
|
||||||
editor->position = position;
|
editor->position = position;
|
||||||
editor->size = size;
|
editor->size = size;
|
||||||
|
|||||||
27
src/main.cc
27
src/main.cc
@@ -5,22 +5,14 @@
|
|||||||
#include <atomic>
|
#include <atomic>
|
||||||
#include <chrono>
|
#include <chrono>
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
#include <iostream>
|
|
||||||
#include <sys/ioctl.h>
|
#include <sys/ioctl.h>
|
||||||
#include <thread>
|
#include <thread>
|
||||||
|
|
||||||
std::atomic<bool> running{true};
|
std::atomic<bool> running{true};
|
||||||
Queue<KeyEvent> event_queue;
|
Queue<KeyEvent> event_queue;
|
||||||
|
|
||||||
std::atomic<uint64_t> render_frames{0};
|
|
||||||
std::atomic<uint64_t> worker_frames{0};
|
|
||||||
|
|
||||||
auto start_time = std::chrono::high_resolution_clock::now();
|
|
||||||
|
|
||||||
void background_worker(Editor *editor) {
|
void background_worker(Editor *editor) {
|
||||||
while (running) {
|
while (running) {
|
||||||
worker_frames++;
|
|
||||||
|
|
||||||
ts_collect_spans(editor);
|
ts_collect_spans(editor);
|
||||||
|
|
||||||
std::this_thread::sleep_for(std::chrono::milliseconds(16));
|
std::this_thread::sleep_for(std::chrono::milliseconds(16));
|
||||||
@@ -164,7 +156,7 @@ void handle_editor_event(Editor *editor, KeyEvent event) {
|
|||||||
|
|
||||||
int main(int argc, char *argv[]) {
|
int main(int argc, char *argv[]) {
|
||||||
Coord screen = start_screen();
|
Coord screen = start_screen();
|
||||||
const char *filename = (argc > 1) ? argv[1] : "ts.cpp";
|
const char *filename = (argc > 1) ? argv[1] : "";
|
||||||
|
|
||||||
Editor *editor = new_editor(filename, {0, 0}, {screen.row, screen.col});
|
Editor *editor = new_editor(filename, {0, 0}, {screen.row, screen.col});
|
||||||
if (!editor) {
|
if (!editor) {
|
||||||
@@ -173,14 +165,10 @@ int main(int argc, char *argv[]) {
|
|||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
start_time = std::chrono::high_resolution_clock::now();
|
|
||||||
|
|
||||||
std::thread input_thread(input_listener);
|
std::thread input_thread(input_listener);
|
||||||
std::thread work_thread(background_worker, editor);
|
std::thread work_thread(background_worker, editor);
|
||||||
|
|
||||||
while (running) {
|
while (running) {
|
||||||
render_frames++;
|
|
||||||
|
|
||||||
KeyEvent event;
|
KeyEvent event;
|
||||||
while (event_queue.pop(event))
|
while (event_queue.pop(event))
|
||||||
handle_editor_event(editor, event);
|
handle_editor_event(editor, event);
|
||||||
@@ -196,20 +184,9 @@ int main(int argc, char *argv[]) {
|
|||||||
if (work_thread.joinable())
|
if (work_thread.joinable())
|
||||||
work_thread.join();
|
work_thread.join();
|
||||||
|
|
||||||
auto end_time = std::chrono::high_resolution_clock::now();
|
|
||||||
double seconds = std::chrono::duration<double>(end_time - start_time).count();
|
|
||||||
|
|
||||||
double render_fps = render_frames / seconds;
|
|
||||||
double worker_fps = worker_frames / seconds;
|
|
||||||
|
|
||||||
end_screen();
|
end_screen();
|
||||||
|
|
||||||
std::cout << "\n======= Performance Summary =======\n";
|
|
||||||
std::cout << "Runtime: " << seconds << "s\n";
|
|
||||||
std::cout << "Render loop FPS: " << render_fps << "Hz\n";
|
|
||||||
std::cout << "Worker loop FPS: " << worker_fps << "Hz\n";
|
|
||||||
std::cout << "===================================\n";
|
|
||||||
|
|
||||||
free_editor(editor);
|
free_editor(editor);
|
||||||
|
clear_regex_cache();
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|||||||
218
src/rope.cc
218
src/rope.cc
@@ -25,6 +25,18 @@ static void update(Knot *n) {
|
|||||||
n->chunk_size = n->left ? n->left->chunk_size : n->right->chunk_size;
|
n->chunk_size = n->left ? n->left->chunk_size : n->right->chunk_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
uint32_t optimal_chunk_size(uint64_t length) {
|
||||||
|
if (length <= MIN_CHUNK_SIZE)
|
||||||
|
return MIN_CHUNK_SIZE;
|
||||||
|
double target_exponent = MIN(std::log2((double)MAX_CHUNK_SIZE),
|
||||||
|
7.0 + (std::log2((double)length) - 10.0) * 0.25);
|
||||||
|
uint32_t final_chunk_size =
|
||||||
|
MAX((uint32_t)MIN_CHUNK_SIZE, (uint32_t)std::pow(2.0, target_exponent));
|
||||||
|
final_chunk_size = MIN(final_chunk_size, (uint32_t)MAX_CHUNK_SIZE);
|
||||||
|
final_chunk_size = 1U << (32 - __builtin_clz(final_chunk_size - 1));
|
||||||
|
return final_chunk_size;
|
||||||
|
}
|
||||||
|
|
||||||
// str is not consumed and \0 is not handled
|
// str is not consumed and \0 is not handled
|
||||||
// So if str is null terminated then len must be strlen(str)
|
// So if str is null terminated then len must be strlen(str)
|
||||||
// and freed by caller
|
// and freed by caller
|
||||||
@@ -570,31 +582,32 @@ char *next_line(LineIterator *it) {
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
LeafIterator *begin_k_iter(Knot *root) {
|
LeafIterator *begin_k_iter(Knot *root, uint32_t start_offset) {
|
||||||
if (!root)
|
if (!root)
|
||||||
return nullptr;
|
return nullptr;
|
||||||
LeafIterator *it = (LeafIterator *)malloc(sizeof(LeafIterator));
|
LeafIterator *it = (LeafIterator *)malloc(sizeof(LeafIterator));
|
||||||
if (!it)
|
if (!it)
|
||||||
return nullptr;
|
return nullptr;
|
||||||
it->top = 0;
|
it->top = 0;
|
||||||
|
it->adjustment = 0;
|
||||||
Knot *curr = root;
|
Knot *curr = root;
|
||||||
while (curr) {
|
while (curr) {
|
||||||
it->stack[it->top++] = curr;
|
it->stack[it->top++] = curr;
|
||||||
if (!curr->left && !curr->right) {
|
if (!curr->left && !curr->right) {
|
||||||
|
if (start_offset > curr->char_count) {
|
||||||
|
free(it);
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
it->node = curr;
|
it->node = curr;
|
||||||
|
it->adjustment = start_offset;
|
||||||
return it;
|
return it;
|
||||||
}
|
}
|
||||||
curr = curr->left;
|
uint32_t left_size = (curr->left) ? curr->left->char_count : 0;
|
||||||
if (!curr) {
|
if (start_offset < left_size) {
|
||||||
curr = it->stack[--it->top]->right;
|
curr = curr->left;
|
||||||
Knot *temp = it->stack[it->top];
|
} else {
|
||||||
it->stack[it->top++] = temp;
|
start_offset -= left_size;
|
||||||
curr = temp->left ? temp->left : temp->right;
|
curr = curr->right;
|
||||||
Knot *parent = it->stack[it->top - 1];
|
|
||||||
curr = parent->left;
|
|
||||||
if (!curr) {
|
|
||||||
curr = parent->right;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
free(it);
|
free(it);
|
||||||
@@ -602,11 +615,14 @@ LeafIterator *begin_k_iter(Knot *root) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Caller must never free the returned string
|
// Caller must never free the returned string
|
||||||
char *next_leaf(LeafIterator *it) {
|
char *next_leaf(LeafIterator *it, uint32_t *out_len) {
|
||||||
if (!it || !it->node)
|
if (!it || !it->node)
|
||||||
return nullptr;
|
return nullptr;
|
||||||
char *data_to_return = it->node->data;
|
char *data_to_return = it->node->data + it->adjustment;
|
||||||
data_to_return[it->node->char_count] = '\0';
|
if (out_len)
|
||||||
|
*out_len = it->node->char_count - it->adjustment;
|
||||||
|
it->node->data[it->node->char_count] = '\0';
|
||||||
|
it->adjustment = 0;
|
||||||
Knot *prev_leaf = it->node;
|
Knot *prev_leaf = it->node;
|
||||||
Knot *parent = nullptr;
|
Knot *parent = nullptr;
|
||||||
while (it->top > 0) {
|
while (it->top > 0) {
|
||||||
@@ -632,7 +648,7 @@ char *next_leaf(LeafIterator *it) {
|
|||||||
|
|
||||||
ByteIterator *begin_b_iter(Knot *root) {
|
ByteIterator *begin_b_iter(Knot *root) {
|
||||||
ByteIterator *b_it = (ByteIterator *)malloc(sizeof(ByteIterator));
|
ByteIterator *b_it = (ByteIterator *)malloc(sizeof(ByteIterator));
|
||||||
LeafIterator *l_it = begin_k_iter(root);
|
LeafIterator *l_it = begin_k_iter(root, 0);
|
||||||
b_it->it = l_it;
|
b_it->it = l_it;
|
||||||
b_it->offset_g = 0;
|
b_it->offset_g = 0;
|
||||||
b_it->offset_l = 0;
|
b_it->offset_l = 0;
|
||||||
@@ -647,21 +663,46 @@ char next_byte(ByteIterator *it) {
|
|||||||
} else {
|
} else {
|
||||||
it->offset_g += it->offset_l;
|
it->offset_g += it->offset_l;
|
||||||
it->offset_l = 1;
|
it->offset_l = 1;
|
||||||
char *data = next_leaf(it->it);
|
char *data = next_leaf(it->it, &it->char_count);
|
||||||
if (!data)
|
if (!data)
|
||||||
return '\0';
|
return '\0';
|
||||||
it->char_count = strlen(data);
|
|
||||||
while (it->char_count <= 0) {
|
while (it->char_count <= 0) {
|
||||||
data = next_leaf(it->it);
|
data = next_leaf(it->it, &it->char_count);
|
||||||
if (!data)
|
if (!data)
|
||||||
return '\0';
|
return '\0';
|
||||||
it->char_count = strlen(data);
|
|
||||||
}
|
}
|
||||||
it->data = data;
|
it->data = data;
|
||||||
return *it->data;
|
return *it->data;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Caller must NOT free returned string.
|
||||||
|
// Returns nullptr if offset is invalid or no leaf found.
|
||||||
|
char *leaf_from_offset(Knot *root, uint32_t start_offset, uint32_t *out_len) {
|
||||||
|
if (!root)
|
||||||
|
return nullptr;
|
||||||
|
Knot *curr = root;
|
||||||
|
while (curr) {
|
||||||
|
if (!curr->left && !curr->right) {
|
||||||
|
if (start_offset > curr->char_count)
|
||||||
|
return nullptr;
|
||||||
|
char *result = curr->data + start_offset;
|
||||||
|
if (out_len)
|
||||||
|
*out_len = curr->char_count - start_offset;
|
||||||
|
curr->data[curr->char_count] = '\0';
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
uint32_t left_size = curr->left ? curr->left->char_count : 0;
|
||||||
|
if (start_offset < left_size) {
|
||||||
|
curr = curr->left;
|
||||||
|
} else {
|
||||||
|
start_offset -= left_size;
|
||||||
|
curr = curr->right;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
std::vector<std::pair<size_t, size_t>> search_rope(Knot *root,
|
std::vector<std::pair<size_t, size_t>> search_rope(Knot *root,
|
||||||
const char *pattern) {
|
const char *pattern) {
|
||||||
std::vector<std::pair<size_t, size_t>> results;
|
std::vector<std::pair<size_t, size_t>> results;
|
||||||
@@ -675,7 +716,7 @@ std::vector<std::pair<size_t, size_t>> search_rope(Knot *root,
|
|||||||
}
|
}
|
||||||
pcre2_match_data *mdata = pcre2_match_data_create(128, nullptr);
|
pcre2_match_data *mdata = pcre2_match_data_create(128, nullptr);
|
||||||
int workspace[PCRE_WORKSPACE_SIZE];
|
int workspace[PCRE_WORKSPACE_SIZE];
|
||||||
LeafIterator *it = begin_k_iter(root);
|
LeafIterator *it = begin_k_iter(root, 0);
|
||||||
if (!it) {
|
if (!it) {
|
||||||
pcre2_code_free(re);
|
pcre2_code_free(re);
|
||||||
pcre2_match_data_free(mdata);
|
pcre2_match_data_free(mdata);
|
||||||
@@ -686,7 +727,7 @@ std::vector<std::pair<size_t, size_t>> search_rope(Knot *root,
|
|||||||
bool match_in_progress = false;
|
bool match_in_progress = false;
|
||||||
int flags = PCRE2_PARTIAL_SOFT;
|
int flags = PCRE2_PARTIAL_SOFT;
|
||||||
while (1) {
|
while (1) {
|
||||||
const char *chunk_start = next_leaf(it);
|
const char *chunk_start = next_leaf(it, nullptr);
|
||||||
if (!chunk_start)
|
if (!chunk_start)
|
||||||
break;
|
break;
|
||||||
size_t chunk_len = strlen(chunk_start);
|
size_t chunk_len = strlen(chunk_start);
|
||||||
@@ -755,14 +796,125 @@ std::vector<std::pair<size_t, size_t>> search_rope(Knot *root,
|
|||||||
return results;
|
return results;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32_t optimal_chunk_size(uint64_t length) {
|
// TODO: Optimize and make it actually utilize capture groups etc.
|
||||||
if (length <= MIN_CHUNK_SIZE)
|
//
|
||||||
return MIN_CHUNK_SIZE;
|
// static const size_t MAX_OVERLAP = 1024;
|
||||||
double target_exponent = MIN(std::log2((double)MAX_CHUNK_SIZE),
|
//
|
||||||
7.0 + (std::log2((double)length) - 10.0) * 0.25);
|
// std::vector<std::pair<size_t, size_t>> search_rope_new(Knot *root,
|
||||||
uint32_t final_chunk_size =
|
// const char *pattern) {
|
||||||
MAX((uint32_t)MIN_CHUNK_SIZE, (uint32_t)std::pow(2.0, target_exponent));
|
// std::vector<std::pair<size_t, size_t>> results;
|
||||||
final_chunk_size = MIN(final_chunk_size, (uint32_t)MAX_CHUNK_SIZE);
|
// int errorcode;
|
||||||
final_chunk_size = 1U << (32 - __builtin_clz(final_chunk_size - 1));
|
// PCRE2_SIZE erroffset;
|
||||||
return final_chunk_size;
|
//
|
||||||
}
|
// // 1. Compile (Standard compilation)
|
||||||
|
// pcre2_code *re = pcre2_compile((PCRE2_SPTR)pattern, PCRE2_ZERO_TERMINATED,
|
||||||
|
// 0,
|
||||||
|
// &errorcode, &erroffset, nullptr);
|
||||||
|
// if (!re) {
|
||||||
|
// fprintf(stderr, "PCRE2 compile error: %d\n", errorcode);
|
||||||
|
// return results;
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// pcre2_match_data *mdata = pcre2_match_data_create_from_pattern(re,
|
||||||
|
// nullptr);
|
||||||
|
//
|
||||||
|
// LeafIterator *it = begin_k_iter(root, 0);
|
||||||
|
// if (!it) {
|
||||||
|
// pcre2_code_free(re);
|
||||||
|
// pcre2_match_data_free(mdata);
|
||||||
|
// return results;
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// // Buffer to hold (Last X chars) + (Current Chunk)
|
||||||
|
// std::string buffer;
|
||||||
|
//
|
||||||
|
// // Tracks where the *start* of the current buffer is located relative to
|
||||||
|
// the
|
||||||
|
// // whole rope
|
||||||
|
// size_t buffer_abs_offset = 0;
|
||||||
|
//
|
||||||
|
// // Tracks the absolute offset up to which we have already "cleared"
|
||||||
|
// matches.
|
||||||
|
// // This prevents reporting a match twice if it sits inside the overlap
|
||||||
|
// region. size_t processed_up_to_abs = 0;
|
||||||
|
//
|
||||||
|
// while (1) {
|
||||||
|
// // 2. Get next chunk
|
||||||
|
// const char *chunk_start = next_leaf(it, nullptr);
|
||||||
|
// if (!chunk_start)
|
||||||
|
// break;
|
||||||
|
//
|
||||||
|
// // 3. Update Buffer: Append new data
|
||||||
|
// size_t chunk_len = strlen(chunk_start);
|
||||||
|
// buffer.append(chunk_start, chunk_len);
|
||||||
|
//
|
||||||
|
// PCRE2_SPTR subject = (PCRE2_SPTR)buffer.c_str();
|
||||||
|
// size_t subject_len = buffer.length();
|
||||||
|
// size_t start_offset = 0;
|
||||||
|
//
|
||||||
|
// // 4. Run pcre2_match loop on the current window
|
||||||
|
// while (true) {
|
||||||
|
// int rc = pcre2_match(re, subject, subject_len, start_offset,
|
||||||
|
// 0, // Default options
|
||||||
|
// mdata, nullptr);
|
||||||
|
//
|
||||||
|
// if (rc < 0) {
|
||||||
|
// // No match (or error) in the rest of this buffer
|
||||||
|
// break;
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(mdata);
|
||||||
|
// size_t match_local_start = ovector[0];
|
||||||
|
// size_t match_local_end = ovector[1];
|
||||||
|
//
|
||||||
|
// // Calculate Absolute Coordinates
|
||||||
|
// size_t match_abs_start = buffer_abs_offset + match_local_start;
|
||||||
|
// size_t match_len = match_local_end - match_local_start;
|
||||||
|
//
|
||||||
|
// // 5. Deduplication Check
|
||||||
|
// // If we find a match that starts *before* where we finished processing
|
||||||
|
// // the previous chunk, it means this match is entirely inside the
|
||||||
|
// // overlap region and was reported in the previous iteration.
|
||||||
|
// if (match_abs_start >= processed_up_to_abs) {
|
||||||
|
// results.push_back(std::make_pair(match_abs_start, match_len));
|
||||||
|
// // Update processed marker so we don't report this again
|
||||||
|
// // (Using start + 1 ensures we allow overlapping matches if regex
|
||||||
|
// // allows, but strictly prevents the exact same start index being
|
||||||
|
// // reported twice)
|
||||||
|
// processed_up_to_abs = match_abs_start + 1;
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// // Prepare for next match in this buffer
|
||||||
|
// start_offset = match_local_end;
|
||||||
|
//
|
||||||
|
// // Handle empty matches (e.g. "a*" matching empty string) to prevent
|
||||||
|
// // infinite loop
|
||||||
|
// if (match_local_end == match_local_start) {
|
||||||
|
// if (start_offset < subject_len) {
|
||||||
|
// start_offset++;
|
||||||
|
// } else {
|
||||||
|
// break; // End of buffer
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// // 6. Maintenance: Shrink buffer to keep only the last MAX_OVERLAP
|
||||||
|
// // characters
|
||||||
|
// if (buffer.length() > MAX_OVERLAP) {
|
||||||
|
// size_t to_remove = buffer.length() - MAX_OVERLAP;
|
||||||
|
//
|
||||||
|
// // Remove from the beginning of the string
|
||||||
|
// buffer.erase(0, to_remove);
|
||||||
|
//
|
||||||
|
// // The buffer's start has now moved forward in absolute terms
|
||||||
|
// buffer_abs_offset += to_remove;
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// // Cleanup
|
||||||
|
// pcre2_match_data_free(mdata);
|
||||||
|
// pcre2_code_free(re);
|
||||||
|
// free(it); // Assuming iter needs free based on original code usage
|
||||||
|
//
|
||||||
|
// return results;
|
||||||
|
// }
|
||||||
|
|||||||
68
src/ts.cc
68
src/ts.cc
@@ -8,7 +8,27 @@
|
|||||||
#include <string>
|
#include <string>
|
||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
|
|
||||||
static std::unordered_map<std::string, std::regex> regex_cache;
|
std::unordered_map<std::string, pcre2_code *> regex_cache;
|
||||||
|
|
||||||
|
void clear_regex_cache() {
|
||||||
|
for (auto &kv : regex_cache) {
|
||||||
|
pcre2_code_free(kv.second);
|
||||||
|
}
|
||||||
|
regex_cache.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
pcre2_code *get_re(const std::string &pattern) {
|
||||||
|
auto it = regex_cache.find(pattern);
|
||||||
|
if (it != regex_cache.end())
|
||||||
|
return it->second;
|
||||||
|
int errornum;
|
||||||
|
PCRE2_SIZE erroffset;
|
||||||
|
pcre2_code *re =
|
||||||
|
pcre2_compile((PCRE2_SPTR)pattern.c_str(), PCRE2_ZERO_TERMINATED, 0,
|
||||||
|
&errornum, &erroffset, nullptr);
|
||||||
|
regex_cache[pattern] = re;
|
||||||
|
return re;
|
||||||
|
}
|
||||||
|
|
||||||
static const std::regex scm_regex(
|
static const std::regex scm_regex(
|
||||||
R"((@[A-Za-z0-9_.]+)|(;; \#[0-9a-fA-F]{6} \#[0-9a-fA-F]{6} [01] [01] [01] \d+))");
|
R"((@[A-Za-z0-9_.]+)|(;; \#[0-9a-fA-F]{6} \#[0-9a-fA-F]{6} [01] [01] [01] \d+))");
|
||||||
@@ -94,8 +114,6 @@ static inline bool ts_predicate(TSQuery *query, const TSQueryMatch &match,
|
|||||||
ts_query_predicates_for_pattern(query, match.pattern_index, &step_count);
|
ts_query_predicates_for_pattern(query, match.pattern_index, &step_count);
|
||||||
if (!steps || step_count != 4)
|
if (!steps || step_count != 4)
|
||||||
return true;
|
return true;
|
||||||
if (source->char_count >= (16 * 1024))
|
|
||||||
return false;
|
|
||||||
std::string command;
|
std::string command;
|
||||||
std::string regex_txt;
|
std::string regex_txt;
|
||||||
uint32_t subject_id = 0;
|
uint32_t subject_id = 0;
|
||||||
@@ -124,15 +142,13 @@ static inline bool ts_predicate(TSQuery *query, const TSQueryMatch &match,
|
|||||||
}
|
}
|
||||||
const TSNode *node = find_capture_node(match, subject_id);
|
const TSNode *node = find_capture_node(match, subject_id);
|
||||||
std::string subject = node_text(*node, source);
|
std::string subject = node_text(*node, source);
|
||||||
auto it = regex_cache.find(regex_txt);
|
pcre2_code *re = get_re(regex_txt);
|
||||||
if (it == regex_cache.end())
|
pcre2_match_data *md = pcre2_match_data_create_from_pattern(re, nullptr);
|
||||||
it = regex_cache.emplace(regex_txt, std::regex(regex_txt)).first;
|
int rc = pcre2_match(re, (PCRE2_SPTR)subject.c_str(), subject.size(), 0, 0,
|
||||||
const std::regex &re = it->second;
|
md, nullptr);
|
||||||
if (command == "match?")
|
pcre2_match_data_free(md);
|
||||||
return std::regex_match(subject, re);
|
bool ok = (rc >= 0);
|
||||||
else if (command == "not-match?")
|
return (command == "match?" ? ok : !ok);
|
||||||
return !std::regex_match(subject, re);
|
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const char *read_ts(void *payload, uint32_t byte_index, TSPoint,
|
const char *read_ts(void *payload, uint32_t byte_index, TSPoint,
|
||||||
@@ -141,24 +157,12 @@ const char *read_ts(void *payload, uint32_t byte_index, TSPoint,
|
|||||||
*bytes_read = 0;
|
*bytes_read = 0;
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
TSLoad *load = (TSLoad *)payload;
|
Editor *editor = (Editor *)payload;
|
||||||
Knot *root = load->editor->root;
|
if (byte_index >= editor->root->char_count) {
|
||||||
if (load->prev)
|
|
||||||
free(load->prev);
|
|
||||||
if (byte_index >= root->char_count) {
|
|
||||||
*bytes_read = 0;
|
*bytes_read = 0;
|
||||||
load->prev = nullptr;
|
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
uint32_t chunk_size = 4096;
|
return leaf_from_offset(editor->root, byte_index, bytes_read);
|
||||||
uint32_t remaining = root->char_count - byte_index;
|
|
||||||
uint32_t len_to_read = remaining > chunk_size ? chunk_size : remaining;
|
|
||||||
std::shared_lock lock(load->editor->knot_mtx);
|
|
||||||
char *buffer = read(root, byte_index, len_to_read);
|
|
||||||
lock.unlock();
|
|
||||||
load->prev = buffer;
|
|
||||||
*bytes_read = len_to_read;
|
|
||||||
return buffer;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline Highlight *safe_get(std::vector<Highlight> &vec, size_t index) {
|
static inline Highlight *safe_get(std::vector<Highlight> &vec, size_t index) {
|
||||||
@@ -170,9 +174,8 @@ static inline Highlight *safe_get(std::vector<Highlight> &vec, size_t index) {
|
|||||||
void ts_collect_spans(Editor *editor) {
|
void ts_collect_spans(Editor *editor) {
|
||||||
if (!editor->parser || !editor->root || !editor->query)
|
if (!editor->parser || !editor->root || !editor->query)
|
||||||
return;
|
return;
|
||||||
TSLoad load = {editor, nullptr};
|
|
||||||
TSInput tsinput = {
|
TSInput tsinput = {
|
||||||
.payload = &load,
|
.payload = editor,
|
||||||
.read = read_ts,
|
.read = read_ts,
|
||||||
.encoding = TSInputEncodingUTF8,
|
.encoding = TSInputEncodingUTF8,
|
||||||
.decode = nullptr,
|
.decode = nullptr,
|
||||||
@@ -196,7 +199,12 @@ void ts_collect_spans(Editor *editor) {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
editor->spans.mid_parse = true;
|
editor->spans.mid_parse = true;
|
||||||
|
// TODO: Remove this lock and replace with an index
|
||||||
|
// modifier based on edits made in the `read_ts` function.
|
||||||
|
std::shared_lock lock(editor->knot_mtx);
|
||||||
|
// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
tree = ts_parser_parse(editor->parser, copy, tsinput);
|
tree = ts_parser_parse(editor->parser, copy, tsinput);
|
||||||
|
lock.unlock();
|
||||||
if (copy)
|
if (copy)
|
||||||
ts_tree_delete(copy);
|
ts_tree_delete(copy);
|
||||||
knot_mtx.lock();
|
knot_mtx.lock();
|
||||||
@@ -228,8 +236,6 @@ void ts_collect_spans(Editor *editor) {
|
|||||||
}
|
}
|
||||||
ts_query_cursor_delete(cursor);
|
ts_query_cursor_delete(cursor);
|
||||||
ts_tree_delete(copy);
|
ts_tree_delete(copy);
|
||||||
if (load.prev)
|
|
||||||
free(load.prev);
|
|
||||||
if (!running)
|
if (!running)
|
||||||
return;
|
return;
|
||||||
std::sort(new_spans.begin(), new_spans.end());
|
std::sort(new_spans.begin(), new_spans.end());
|
||||||
|
|||||||
Reference in New Issue
Block a user