Fix memory leaks

This commit is contained in:
2025-11-28 19:14:43 +00:00
parent ae5da6b38e
commit 5d0b789402
7 changed files with 167 additions and 117 deletions

3
.gitignore vendored Normal file
View File

@@ -0,0 +1,3 @@
tst
*.bin
*.vim

View File

@@ -79,9 +79,24 @@ struct Inst {
int idx; int idx;
}; };
struct Thread {
Inst *pc;
uint32_t saved[40]; /* $0 through $9 */
};
struct ThreadList {
Thread *t;
int n;
};
Exp *regex_to_ast(std::string pattern); Exp *regex_to_ast(std::string pattern);
void free_exp(Exp *exp);
Inst *compile_ast(Exp *root); Inst *compile_ast(Exp *root);
Inst *compile_regex(std::string pattern); Inst *compile_regex(std::string pattern);
int next_match(Inst *prog, ByteIterator *it, uint32_t *saved); int proglen(Inst *prog);
void free_program(Inst *instructions);
int next_match(Inst *prog, ByteIterator *it, uint32_t *saved, ThreadList *clist,
ThreadList *nlist);
void print_program(Inst *program);
#endif #endif

View File

@@ -3,7 +3,6 @@
#include <assert.h> #include <assert.h>
#include <cstdint> #include <cstdint>
#include <cstdlib> #include <cstdlib>
#include <cstring>
#include <stdio.h> #include <stdio.h>
// VM - pass 2 // VM - pass 2
@@ -19,11 +18,6 @@ bool test_ranges(char inp, Range *ranges, int len) {
// Use pike vm method // Use pike vm method
struct Thread {
Inst *pc;
uint32_t saved[40]; /* $0 through $9 */
};
Thread thread(Inst *pc, uint32_t *saved) { Thread thread(Inst *pc, uint32_t *saved) {
Thread t; Thread t;
t.pc = pc; t.pc = pc;
@@ -32,16 +26,6 @@ Thread thread(Inst *pc, uint32_t *saved) {
return t; return t;
} }
struct ThreadList {
Thread *t;
int n;
};
void handle_end(uint32_t *tsaved, uint32_t *saved) {
for (int i = 0; i < 40; i++)
saved[i] = tsaved[i];
}
bool addstate(Inst *prog, ThreadList *list, Thread t, int count) { bool addstate(Inst *prog, ThreadList *list, Thread t, int count) {
if (t.pc->op == JMP) { if (t.pc->op == JMP) {
if (addstate(prog, list, thread(prog + t.pc->j.x, t.saved), count)) if (addstate(prog, list, thread(prog + t.pc->j.x, t.saved), count))
@@ -66,7 +50,6 @@ bool addstate(Inst *prog, ThreadList *list, Thread t, int count) {
return true; return true;
return false; return false;
} else if (t.pc->op == END) { } else if (t.pc->op == END) {
handle_end(t.saved, t.saved);
return true; return true;
} else { } else {
for (int i = 0; i < list->n; i++) for (int i = 0; i < list->n; i++)
@@ -97,24 +80,13 @@ void inline free_list(ThreadList *list) {
free(list); free(list);
} }
int next_match(Inst *prog, ByteIterator *it, uint32_t *saved) { int next_match(Inst *prog, ByteIterator *it, uint32_t *saved, ThreadList *clist,
int len; ThreadList *nlist) {
ThreadList *clist, *nlist;
Thread t; Thread t;
len = proglen(prog);
clist = (ThreadList *)malloc(sizeof(ThreadList));
clist->t = (Thread *)malloc(+sizeof(Thread) * len);
clist->n = 0;
nlist = (ThreadList *)malloc(sizeof(ThreadList));
nlist->t = (Thread *)malloc(+sizeof(Thread) * len);
nlist->n = 0;
char sp; char sp;
int count = 0; int count = 0;
addstate(prog, clist, thread(prog, saved), count); addstate(prog, clist, thread(prog, saved), count);
for (sp = next_byte(it); sp != '\0'; sp = next_byte(it)) { for (sp = next_byte(it); sp != '\0'; sp = next_byte(it)) {
printf("%c", sp);
for (int i = 0; i < clist->n; i++) { for (int i = 0; i < clist->n; i++) {
t = clist->t[i]; t = clist->t[i];
switch (t.pc->op) { switch (t.pc->op) {
@@ -148,14 +120,11 @@ int next_match(Inst *prog, ByteIterator *it, uint32_t *saved) {
break; break;
} }
} }
clear(clist);
swap(clist, nlist); swap(clist, nlist);
clear(nlist);
count++; count++;
} }
free_list(clist);
free_list(nlist);
return false; // Reached EOF without a match return false; // Reached EOF without a match
} }
@@ -235,38 +204,41 @@ void print_program(Inst *program) {
} }
Inst *compile_regex(std::string pattern) { Inst *compile_regex(std::string pattern) {
return compile_ast(regex_to_ast(pattern)); Exp *ast = regex_to_ast(pattern);
Inst *program = compile_ast(ast);
free_exp(ast);
return program;
} }
int __main() { // int __main() {
// Maunally compiled program for testing // // Maunally compiled program for testing
char *buffer = (char *)malloc(29); // char *buffer = (char *)malloc(29);
strcpy(buffer, "abcdabcdabcdabcdf"); // strcpy(buffer, "abcdabcdabcdabcdf");
// This loads all (excluding \0 put in by strcpy) // // This loads all (excluding \0 put in by strcpy)
Knot *root = load(buffer, 17, optimal_chunk_size(12)); // Knot *root = load(buffer, 17, optimal_chunk_size(12));
ByteIterator *it = begin_b_iter(root); // ByteIterator *it = begin_b_iter(root);
uint32_t saved[40]; // uint32_t saved[40];
//
for (int i = 0; i < 40; i++) // for (int i = 0; i < 40; i++)
saved[i] = 0; // saved[i] = 0;
//
std::string pattern = "(abcd)+"; // std::string pattern = "(abcd)+";
//
Inst *program = compile_regex(pattern); // Inst *program = compile_regex(pattern);
//
print_program(program); // print_program(program);
//
int result; // int result;
while ((result = next_match(program, it, saved))) { // while ((result = next_match(program, it, saved))) {
printf("\nRES: %d\n", result); // printf("\nRES: %d\n", result);
for (int i = 0; i < 40; i++) // for (int i = 0; i < 40; i++)
printf("%d, ", saved[i]); // printf("%d, ", saved[i]);
} // }
//
free(program); // free(program);
free(buffer); // free(buffer);
free(it->it); // free(it->it);
free(it); // free(it);
free(root); // free(root);
return 0; // return 0;
} // }

View File

@@ -9,21 +9,21 @@ Exp *parse_atom_with_modifiers(Parser *p);
Exp *parse_bracket_class(Parser *p); Exp *parse_bracket_class(Parser *p);
Exp *make_none() { Exp *make_none() {
Exp *e = new Exp(); Exp *e = (Exp *)malloc(sizeof(Exp));
e->capture = false; e->capture = false;
e->kind = ExpKind::NONE; e->kind = ExpKind::NONE;
return e; return e;
} }
Exp *make_any() { Exp *make_any() {
Exp *e = new Exp(); Exp *e = (Exp *)malloc(sizeof(Exp));
e->capture = false; e->capture = false;
e->kind = ExpKind::ANY; e->kind = ExpKind::ANY;
return e; return e;
} }
Exp *make_range(const std::vector<ExRange> &ranges) { Exp *make_range(const std::vector<ExRange> &ranges) {
Exp *e = new Exp(); Exp *e = (Exp *)malloc(sizeof(Exp));
e->capture = false; e->capture = false;
e->kind = ExpKind::RANGE; e->kind = ExpKind::RANGE;
new (&e->ranges) std::vector<ExRange>(ranges); new (&e->ranges) std::vector<ExRange>(ranges);
@@ -37,10 +37,10 @@ Exp *make_range_single(char c, bool neg = false) {
} }
Exp *make_or(Exp *l, Exp *r) { Exp *make_or(Exp *l, Exp *r) {
OpOr *o = new OpOr(); OpOr *o = (OpOr *)malloc(sizeof(OpOr));
o->left = l; o->left = l;
o->right = r; o->right = r;
Exp *e = new Exp(); Exp *e = (Exp *)malloc(sizeof(Exp));
e->capture = false; e->capture = false;
e->kind = ExpKind::OR; e->kind = ExpKind::OR;
e->opor = o; e->opor = o;
@@ -48,10 +48,10 @@ Exp *make_or(Exp *l, Exp *r) {
} }
Exp *make_seq(Exp *l, Exp *r) { Exp *make_seq(Exp *l, Exp *r) {
OpSeq *o = new OpSeq(); OpSeq *o = (OpSeq *)malloc(sizeof(OpSeq));
o->left = l; o->left = l;
o->right = r; o->right = r;
Exp *e = new Exp(); Exp *e = (Exp *)malloc(sizeof(Exp));
e->capture = false; e->capture = false;
e->kind = ExpKind::SEQ; e->kind = ExpKind::SEQ;
e->opseq = o; e->opseq = o;
@@ -95,6 +95,23 @@ Exp *regex_to_ast(std::string pattern) {
return res ? res : make_none(); return res ? res : make_none();
} }
void free_exp(Exp *exp) {
if (!exp)
return;
if (exp->kind == ExpKind::OR) {
free_exp(exp->opor->left);
free_exp(exp->opor->right);
free(exp->opor);
} else if (exp->kind == ExpKind::SEQ) {
free_exp(exp->opseq->left);
free_exp(exp->opseq->right);
free(exp->opseq);
} else if (exp->kind == ExpKind::RANGE) {
exp->ranges.~vector<ExRange>();
}
free(exp);
}
Exp *parse_alternation(Parser *p) { Exp *parse_alternation(Parser *p) {
std::vector<Exp *> parts; std::vector<Exp *> parts;
parts.push_back(parse_sequence(p)); parts.push_back(parse_sequence(p));
@@ -302,37 +319,41 @@ Exp *parse_atom_with_modifiers(Parser *p) {
if (!atom) if (!atom)
return nullptr; return nullptr;
// apply possibly multiple modifiers in sequence
while (true) { while (true) {
if (peek(p) == '?') { if (peek(p) == '?') {
consume(p); consume(p);
// OpOr(atom, NONE) Exp *old = atom;
atom = make_or(clone_exp(atom), make_none()); atom = make_or(clone_exp(old), make_none());
free_exp(old);
} else if (peek(p) == '*') { } else if (peek(p) == '*') {
consume(p); consume(p);
// Expand to 20 repeating OpOr(atom, NONE) chained by SEQ as literal Exp *old = atom;
// tree // Expand to 20 repeating OpOr(atom, NONE) chained by SEQ as literal tree
Exp *unit_or = nullptr; Exp *unit_or = nullptr;
for (int t = 0; t < 20; ++t) { for (int t = 0; t < 20; ++t) {
Exp *op = make_or(clone_exp(atom), make_none()); Exp *op = make_or(clone_exp(old), make_none());
if (!unit_or) if (!unit_or)
unit_or = op; unit_or = op;
else else
unit_or = make_seq(unit_or, op); unit_or = make_seq(unit_or, op);
} }
atom = unit_or ? unit_or : make_none(); atom = unit_or ? unit_or : make_none();
free_exp(old);
} else if (peek(p) == '+') { } else if (peek(p) == '+') {
consume(p); consume(p);
Exp *old = atom;
// First the atom, then 20 OpOr(atom, NONE) sequence // First the atom, then 20 OpOr(atom, NONE) sequence
Exp *rest = nullptr; Exp *rest = nullptr;
for (int t = 0; t < 20; ++t) { for (int t = 0; t < 20; ++t) {
Exp *op = make_or(clone_exp(atom), make_none()); Exp *op = make_or(clone_exp(old), make_none());
if (!rest) if (!rest)
rest = op; rest = op;
else else
rest = make_seq(rest, op); rest = make_seq(rest, op);
} }
atom = rest ? make_seq(clone_exp(atom), rest) : clone_exp(atom); Exp *new_atom = rest ? make_seq(clone_exp(old), rest) : clone_exp(old);
atom = new_atom;
free_exp(old);
} else if (peek(p) == '{') { } else if (peek(p) == '{') {
// parse {x,y} // parse {x,y}
size_t save = p->i; size_t save = p->i;
@@ -355,18 +376,19 @@ Exp *parse_atom_with_modifiers(Parser *p) {
y = x; y = x;
if (y > 20) if (y > 20)
y = 20; // clamp to 20 as requested y = 20; // clamp to 20 as requested
Exp *old = atom;
// Build x copies of atom concatenated, then (y-x) OpOr(atom, NONE) // Build x copies of atom concatenated, then (y-x) OpOr(atom, NONE)
// chained
Exp *prefix = nullptr; Exp *prefix = nullptr;
for (int k = 0; k < x; ++k) { for (int k = 0; k < x; ++k) {
if (!prefix) if (!prefix)
prefix = clone_exp(atom); prefix = clone_exp(old);
else else
prefix = make_seq(prefix, clone_exp(atom)); prefix = make_seq(prefix, clone_exp(old));
} }
Exp *suffix = nullptr; Exp *suffix = nullptr;
for (int k = 0; k < (y - x); ++k) { for (int k = 0; k < (y - x); ++k) {
Exp *op = make_or(clone_exp(atom), make_none()); Exp *op = make_or(clone_exp(old), make_none());
if (!suffix) if (!suffix)
suffix = op; suffix = op;
else else
@@ -374,10 +396,14 @@ Exp *parse_atom_with_modifiers(Parser *p) {
} }
if (!prefix) if (!prefix)
prefix = make_none(); prefix = make_none();
Exp *new_atom = nullptr;
if (!suffix) if (!suffix)
atom = prefix; new_atom = prefix;
else else
atom = make_seq(prefix, suffix); new_atom = make_seq(prefix, suffix);
atom = new_atom;
free_exp(old);
} else { } else {
break; break;
} }

View File

@@ -25,6 +25,7 @@ static void insert_inst(InstList *list, Inst *inst) {
list->cap = nc; list->cap = nc;
} }
list->data[list->len++] = *inst; list->data[list->len++] = *inst;
free(inst);
} }
Inst *make_inst(Op op) { Inst *make_inst(Op op) {
@@ -122,6 +123,18 @@ void compile_or(Exp *e, InstList *list) {
} }
} }
void free_program(Inst *instructions) {
if (!instructions)
return;
Inst *current = instructions;
while (current->op != END) {
if (current->op == MCH || current->op == NMC)
free(current->r.ranges);
current++; // Assuming sequential memory layout
}
free(instructions);
}
void compile_exp(Exp *e, InstList *list) { void compile_exp(Exp *e, InstList *list) {
switch (e->kind) { switch (e->kind) {
case ExpKind::NONE: case ExpKind::NONE:

View File

@@ -259,7 +259,7 @@ Knot *insert(Knot *node, uint32_t offset, char *str, uint32_t len) {
Knot *left_part = nullptr; Knot *left_part = nullptr;
Knot *right_part = nullptr; Knot *right_part = nullptr;
split(node, offset, &left_part, &right_part); split(node, offset, &left_part, &right_part);
Knot *middle_part = load(str, len, node->chunk_size); Knot *middle_part = load(str, len, left_part->chunk_size);
return concat(concat(left_part, middle_part), right_part); return concat(concat(left_part, middle_part), right_part);
} }
@@ -647,12 +647,17 @@ char next_byte(ByteIterator *it) {
it->offset_g += it->offset_l; it->offset_g += it->offset_l;
it->offset_l = 1; it->offset_l = 1;
char *data = next_leaf(it->it); char *data = next_leaf(it->it);
it->char_count = strlen(data); if (!data)
it->data = data;
if (it->data)
return *it->data;
else
return '\0'; return '\0';
it->char_count = strlen(data);
while (it->char_count <= 0) {
data = next_leaf(it->it);
if (!data)
return '\0';
it->char_count = strlen(data);
}
it->data = data;
return *it->data;
} }
} }
@@ -828,15 +833,15 @@ The quick brown fox jumps over the lazy dog.");
std::string pattern = "f.x"; std::string pattern = "f.x";
Inst *program = compile_regex(pattern); // Inst *program = compile_regex(pattern);
//
bool result; // bool result;
while ((result = next_match(program, it2, saved))) { // while ((result = next_match(program, it2, saved))) {
printf("\nRES: %d\n", result); // printf("\nRES: %d\n", result);
for (int i = 0; i < 40; i++) // for (int i = 0; i < 40; i++)
printf("%d, ", saved[i]); // printf("%d, ", saved[i]);
} // }
//
// char c2 = ' '; // char c2 = ' ';
// while ((c2 = next_byte(it2)) != '\0') // while ((c2 = next_byte(it2)) != '\0')
// printf("%c :wow!:\n", c2); // printf("%c :wow!:\n", c2);

View File

@@ -41,7 +41,7 @@ int main() {
printf("Loading file into rope...\n"); printf("Loading file into rope...\n");
char *buf = load_file("./random.bin", &len); char *buf = load_file("./random.bin", &len);
auto start = std::chrono::high_resolution_clock::now(); auto start = std::chrono::high_resolution_clock::now();
Knot *root = load(buf, len, 2); Knot *root = load(buf, len, optimal_chunk_size(len));
auto end = std::chrono::high_resolution_clock::now(); auto end = std::chrono::high_resolution_clock::now();
printf("Load time: %.3f s\n", printf("Load time: %.3f s\n",
std::chrono::duration<double>(end - start).count()); std::chrono::duration<double>(end - start).count());
@@ -191,40 +191,56 @@ int main() {
// search test // search test
start = std::chrono::high_resolution_clock::now(); start = std::chrono::high_resolution_clock::now();
std::vector<std::pair<size_t, size_t>> matches = search_rope(root, "f.x"); std::vector<std::pair<size_t, size_t>> matches =
search_rope(root, "[A-Z][a-z]+");
end = std::chrono::high_resolution_clock::now(); end = std::chrono::high_resolution_clock::now();
printf("Search Time: %.6f s\n", printf("Search Time: %.6f s\n",
std::chrono::duration<double>(end - start).count()); std::chrono::duration<double>(end - start).count());
printf("Found %lu matches\n", matches.size()); printf("Found %lu matches\n", matches.size());
char *c = read(root, 0, 1000); // char *c = read(root, 0, 1000);
printf("%s\n", c); // printf("%s\n", c);
free(c); // free(c);
ByteIterator *it1 = begin_b_iter(root); // ByteIterator *it1 = begin_b_iter(root);
char ch; // char ch;
while ((ch = next_byte(it1)) != '\0') { // while ((ch = next_byte(it1)) != '\0') {
printf("%c:", ch); // printf("%c:", ch);
} // }
ByteIterator *it2 = begin_b_iter(root); ByteIterator *it2 = begin_b_iter(root);
uint32_t saved[40]; uint32_t saved[40];
for (int i = 0; i < 40; i++) for (int i = 0; i < 40; i++)
saved[i] = 0; saved[i] = 0;
std::string pattern = "f.x"; std::string pattern = "[A-Z][a-z]+";
Inst *program = compile_regex(pattern); Inst *program = compile_regex(pattern);
print_program(program);
bool result; bool result;
int prolen = proglen(program);
ThreadList *clist = (ThreadList *)malloc(sizeof(ThreadList));
clist->t = (Thread *)malloc(+sizeof(Thread) * prolen);
clist->n = 0;
ThreadList *nlist = (ThreadList *)malloc(sizeof(ThreadList));
nlist->t = (Thread *)malloc(+sizeof(Thread) * prolen);
nlist->n = 0;
int count = 0; int count = 0;
start = std::chrono::high_resolution_clock::now(); start = std::chrono::high_resolution_clock::now();
while ((result = next_match(program, it2, saved))) { while ((result = next_match(program, it2, saved, clist, nlist))) {
count++; count++;
printf("%d\n", count);
} }
end = std::chrono::high_resolution_clock::now(); end = std::chrono::high_resolution_clock::now();
printf("Search Time: %.6f s\n", printf("Search Time: %.6f s\n",
std::chrono::duration<double>(end - start).count()); std::chrono::duration<double>(end - start).count());
printf("Found2 %d matches\n", count); printf("Found2 %d matches\n", count);
free_program(program);
free(it2->it);
free(it2);
free(clist->t);
free(nlist->t);
free(clist);
free(nlist);
free_rope(root); free_rope(root);
} }