Fix memory leaks

This commit is contained in:
2025-11-28 19:14:43 +00:00
parent ae5da6b38e
commit 5d0b789402
7 changed files with 167 additions and 117 deletions

3
.gitignore vendored Normal file
View File

@@ -0,0 +1,3 @@
tst
*.bin
*.vim

View File

@@ -79,9 +79,24 @@ struct Inst {
int idx;
};
struct Thread {
Inst *pc;
uint32_t saved[40]; /* $0 through $9 */
};
struct ThreadList {
Thread *t;
int n;
};
Exp *regex_to_ast(std::string pattern);
void free_exp(Exp *exp);
Inst *compile_ast(Exp *root);
Inst *compile_regex(std::string pattern);
int next_match(Inst *prog, ByteIterator *it, uint32_t *saved);
int proglen(Inst *prog);
void free_program(Inst *instructions);
int next_match(Inst *prog, ByteIterator *it, uint32_t *saved, ThreadList *clist,
ThreadList *nlist);
void print_program(Inst *program);
#endif

View File

@@ -3,7 +3,6 @@
#include <assert.h>
#include <cstdint>
#include <cstdlib>
#include <cstring>
#include <stdio.h>
// VM - pass 2
@@ -19,11 +18,6 @@ bool test_ranges(char inp, Range *ranges, int len) {
// Use pike vm method
struct Thread {
Inst *pc;
uint32_t saved[40]; /* $0 through $9 */
};
Thread thread(Inst *pc, uint32_t *saved) {
Thread t;
t.pc = pc;
@@ -32,16 +26,6 @@ Thread thread(Inst *pc, uint32_t *saved) {
return t;
}
struct ThreadList {
Thread *t;
int n;
};
void handle_end(uint32_t *tsaved, uint32_t *saved) {
for (int i = 0; i < 40; i++)
saved[i] = tsaved[i];
}
bool addstate(Inst *prog, ThreadList *list, Thread t, int count) {
if (t.pc->op == JMP) {
if (addstate(prog, list, thread(prog + t.pc->j.x, t.saved), count))
@@ -66,7 +50,6 @@ bool addstate(Inst *prog, ThreadList *list, Thread t, int count) {
return true;
return false;
} else if (t.pc->op == END) {
handle_end(t.saved, t.saved);
return true;
} else {
for (int i = 0; i < list->n; i++)
@@ -97,24 +80,13 @@ void inline free_list(ThreadList *list) {
free(list);
}
int next_match(Inst *prog, ByteIterator *it, uint32_t *saved) {
int len;
ThreadList *clist, *nlist;
int next_match(Inst *prog, ByteIterator *it, uint32_t *saved, ThreadList *clist,
ThreadList *nlist) {
Thread t;
len = proglen(prog);
clist = (ThreadList *)malloc(sizeof(ThreadList));
clist->t = (Thread *)malloc(+sizeof(Thread) * len);
clist->n = 0;
nlist = (ThreadList *)malloc(sizeof(ThreadList));
nlist->t = (Thread *)malloc(+sizeof(Thread) * len);
nlist->n = 0;
char sp;
int count = 0;
addstate(prog, clist, thread(prog, saved), count);
for (sp = next_byte(it); sp != '\0'; sp = next_byte(it)) {
printf("%c", sp);
for (int i = 0; i < clist->n; i++) {
t = clist->t[i];
switch (t.pc->op) {
@@ -148,14 +120,11 @@ int next_match(Inst *prog, ByteIterator *it, uint32_t *saved) {
break;
}
}
clear(clist);
swap(clist, nlist);
clear(nlist);
count++;
}
free_list(clist);
free_list(nlist);
return false; // Reached EOF without a match
}
@@ -235,38 +204,41 @@ void print_program(Inst *program) {
}
Inst *compile_regex(std::string pattern) {
return compile_ast(regex_to_ast(pattern));
Exp *ast = regex_to_ast(pattern);
Inst *program = compile_ast(ast);
free_exp(ast);
return program;
}
int __main() {
// Maunally compiled program for testing
char *buffer = (char *)malloc(29);
strcpy(buffer, "abcdabcdabcdabcdf");
// This loads all (excluding \0 put in by strcpy)
Knot *root = load(buffer, 17, optimal_chunk_size(12));
ByteIterator *it = begin_b_iter(root);
uint32_t saved[40];
for (int i = 0; i < 40; i++)
saved[i] = 0;
std::string pattern = "(abcd)+";
Inst *program = compile_regex(pattern);
print_program(program);
int result;
while ((result = next_match(program, it, saved))) {
printf("\nRES: %d\n", result);
for (int i = 0; i < 40; i++)
printf("%d, ", saved[i]);
}
free(program);
free(buffer);
free(it->it);
free(it);
free(root);
return 0;
}
// int __main() {
// // Maunally compiled program for testing
// char *buffer = (char *)malloc(29);
// strcpy(buffer, "abcdabcdabcdabcdf");
// // This loads all (excluding \0 put in by strcpy)
// Knot *root = load(buffer, 17, optimal_chunk_size(12));
// ByteIterator *it = begin_b_iter(root);
// uint32_t saved[40];
//
// for (int i = 0; i < 40; i++)
// saved[i] = 0;
//
// std::string pattern = "(abcd)+";
//
// Inst *program = compile_regex(pattern);
//
// print_program(program);
//
// int result;
// while ((result = next_match(program, it, saved))) {
// printf("\nRES: %d\n", result);
// for (int i = 0; i < 40; i++)
// printf("%d, ", saved[i]);
// }
//
// free(program);
// free(buffer);
// free(it->it);
// free(it);
// free(root);
// return 0;
// }

View File

@@ -9,21 +9,21 @@ Exp *parse_atom_with_modifiers(Parser *p);
Exp *parse_bracket_class(Parser *p);
Exp *make_none() {
Exp *e = new Exp();
Exp *e = (Exp *)malloc(sizeof(Exp));
e->capture = false;
e->kind = ExpKind::NONE;
return e;
}
Exp *make_any() {
Exp *e = new Exp();
Exp *e = (Exp *)malloc(sizeof(Exp));
e->capture = false;
e->kind = ExpKind::ANY;
return e;
}
Exp *make_range(const std::vector<ExRange> &ranges) {
Exp *e = new Exp();
Exp *e = (Exp *)malloc(sizeof(Exp));
e->capture = false;
e->kind = ExpKind::RANGE;
new (&e->ranges) std::vector<ExRange>(ranges);
@@ -37,10 +37,10 @@ Exp *make_range_single(char c, bool neg = false) {
}
Exp *make_or(Exp *l, Exp *r) {
OpOr *o = new OpOr();
OpOr *o = (OpOr *)malloc(sizeof(OpOr));
o->left = l;
o->right = r;
Exp *e = new Exp();
Exp *e = (Exp *)malloc(sizeof(Exp));
e->capture = false;
e->kind = ExpKind::OR;
e->opor = o;
@@ -48,10 +48,10 @@ Exp *make_or(Exp *l, Exp *r) {
}
Exp *make_seq(Exp *l, Exp *r) {
OpSeq *o = new OpSeq();
OpSeq *o = (OpSeq *)malloc(sizeof(OpSeq));
o->left = l;
o->right = r;
Exp *e = new Exp();
Exp *e = (Exp *)malloc(sizeof(Exp));
e->capture = false;
e->kind = ExpKind::SEQ;
e->opseq = o;
@@ -95,6 +95,23 @@ Exp *regex_to_ast(std::string pattern) {
return res ? res : make_none();
}
void free_exp(Exp *exp) {
if (!exp)
return;
if (exp->kind == ExpKind::OR) {
free_exp(exp->opor->left);
free_exp(exp->opor->right);
free(exp->opor);
} else if (exp->kind == ExpKind::SEQ) {
free_exp(exp->opseq->left);
free_exp(exp->opseq->right);
free(exp->opseq);
} else if (exp->kind == ExpKind::RANGE) {
exp->ranges.~vector<ExRange>();
}
free(exp);
}
Exp *parse_alternation(Parser *p) {
std::vector<Exp *> parts;
parts.push_back(parse_sequence(p));
@@ -302,37 +319,41 @@ Exp *parse_atom_with_modifiers(Parser *p) {
if (!atom)
return nullptr;
// apply possibly multiple modifiers in sequence
while (true) {
if (peek(p) == '?') {
consume(p);
// OpOr(atom, NONE)
atom = make_or(clone_exp(atom), make_none());
Exp *old = atom;
atom = make_or(clone_exp(old), make_none());
free_exp(old);
} else if (peek(p) == '*') {
consume(p);
// Expand to 20 repeating OpOr(atom, NONE) chained by SEQ as literal
// tree
Exp *old = atom;
// Expand to 20 repeating OpOr(atom, NONE) chained by SEQ as literal tree
Exp *unit_or = nullptr;
for (int t = 0; t < 20; ++t) {
Exp *op = make_or(clone_exp(atom), make_none());
Exp *op = make_or(clone_exp(old), make_none());
if (!unit_or)
unit_or = op;
else
unit_or = make_seq(unit_or, op);
}
atom = unit_or ? unit_or : make_none();
free_exp(old);
} else if (peek(p) == '+') {
consume(p);
Exp *old = atom;
// First the atom, then 20 OpOr(atom, NONE) sequence
Exp *rest = nullptr;
for (int t = 0; t < 20; ++t) {
Exp *op = make_or(clone_exp(atom), make_none());
Exp *op = make_or(clone_exp(old), make_none());
if (!rest)
rest = op;
else
rest = make_seq(rest, op);
}
atom = rest ? make_seq(clone_exp(atom), rest) : clone_exp(atom);
Exp *new_atom = rest ? make_seq(clone_exp(old), rest) : clone_exp(old);
atom = new_atom;
free_exp(old);
} else if (peek(p) == '{') {
// parse {x,y}
size_t save = p->i;
@@ -355,18 +376,19 @@ Exp *parse_atom_with_modifiers(Parser *p) {
y = x;
if (y > 20)
y = 20; // clamp to 20 as requested
Exp *old = atom;
// Build x copies of atom concatenated, then (y-x) OpOr(atom, NONE)
// chained
Exp *prefix = nullptr;
for (int k = 0; k < x; ++k) {
if (!prefix)
prefix = clone_exp(atom);
prefix = clone_exp(old);
else
prefix = make_seq(prefix, clone_exp(atom));
prefix = make_seq(prefix, clone_exp(old));
}
Exp *suffix = nullptr;
for (int k = 0; k < (y - x); ++k) {
Exp *op = make_or(clone_exp(atom), make_none());
Exp *op = make_or(clone_exp(old), make_none());
if (!suffix)
suffix = op;
else
@@ -374,10 +396,14 @@ Exp *parse_atom_with_modifiers(Parser *p) {
}
if (!prefix)
prefix = make_none();
Exp *new_atom = nullptr;
if (!suffix)
atom = prefix;
new_atom = prefix;
else
atom = make_seq(prefix, suffix);
new_atom = make_seq(prefix, suffix);
atom = new_atom;
free_exp(old);
} else {
break;
}

View File

@@ -25,6 +25,7 @@ static void insert_inst(InstList *list, Inst *inst) {
list->cap = nc;
}
list->data[list->len++] = *inst;
free(inst);
}
Inst *make_inst(Op op) {
@@ -122,6 +123,18 @@ void compile_or(Exp *e, InstList *list) {
}
}
void free_program(Inst *instructions) {
if (!instructions)
return;
Inst *current = instructions;
while (current->op != END) {
if (current->op == MCH || current->op == NMC)
free(current->r.ranges);
current++; // Assuming sequential memory layout
}
free(instructions);
}
void compile_exp(Exp *e, InstList *list) {
switch (e->kind) {
case ExpKind::NONE:

View File

@@ -259,7 +259,7 @@ Knot *insert(Knot *node, uint32_t offset, char *str, uint32_t len) {
Knot *left_part = nullptr;
Knot *right_part = nullptr;
split(node, offset, &left_part, &right_part);
Knot *middle_part = load(str, len, node->chunk_size);
Knot *middle_part = load(str, len, left_part->chunk_size);
return concat(concat(left_part, middle_part), right_part);
}
@@ -647,12 +647,17 @@ char next_byte(ByteIterator *it) {
it->offset_g += it->offset_l;
it->offset_l = 1;
char *data = next_leaf(it->it);
it->char_count = strlen(data);
it->data = data;
if (it->data)
return *it->data;
else
if (!data)
return '\0';
it->char_count = strlen(data);
while (it->char_count <= 0) {
data = next_leaf(it->it);
if (!data)
return '\0';
it->char_count = strlen(data);
}
it->data = data;
return *it->data;
}
}
@@ -828,15 +833,15 @@ The quick brown fox jumps over the lazy dog.");
std::string pattern = "f.x";
Inst *program = compile_regex(pattern);
bool result;
while ((result = next_match(program, it2, saved))) {
printf("\nRES: %d\n", result);
for (int i = 0; i < 40; i++)
printf("%d, ", saved[i]);
}
// Inst *program = compile_regex(pattern);
//
// bool result;
// while ((result = next_match(program, it2, saved))) {
// printf("\nRES: %d\n", result);
// for (int i = 0; i < 40; i++)
// printf("%d, ", saved[i]);
// }
//
// char c2 = ' ';
// while ((c2 = next_byte(it2)) != '\0')
// printf("%c :wow!:\n", c2);

View File

@@ -41,7 +41,7 @@ int main() {
printf("Loading file into rope...\n");
char *buf = load_file("./random.bin", &len);
auto start = std::chrono::high_resolution_clock::now();
Knot *root = load(buf, len, 2);
Knot *root = load(buf, len, optimal_chunk_size(len));
auto end = std::chrono::high_resolution_clock::now();
printf("Load time: %.3f s\n",
std::chrono::duration<double>(end - start).count());
@@ -191,40 +191,56 @@ int main() {
// search test
start = std::chrono::high_resolution_clock::now();
std::vector<std::pair<size_t, size_t>> matches = search_rope(root, "f.x");
std::vector<std::pair<size_t, size_t>> matches =
search_rope(root, "[A-Z][a-z]+");
end = std::chrono::high_resolution_clock::now();
printf("Search Time: %.6f s\n",
std::chrono::duration<double>(end - start).count());
printf("Found %lu matches\n", matches.size());
char *c = read(root, 0, 1000);
printf("%s\n", c);
free(c);
// char *c = read(root, 0, 1000);
// printf("%s\n", c);
// free(c);
ByteIterator *it1 = begin_b_iter(root);
char ch;
while ((ch = next_byte(it1)) != '\0') {
printf("%c:", ch);
}
// ByteIterator *it1 = begin_b_iter(root);
// char ch;
// while ((ch = next_byte(it1)) != '\0') {
// printf("%c:", ch);
// }
ByteIterator *it2 = begin_b_iter(root);
uint32_t saved[40];
for (int i = 0; i < 40; i++)
saved[i] = 0;
std::string pattern = "f.x";
std::string pattern = "[A-Z][a-z]+";
Inst *program = compile_regex(pattern);
print_program(program);
bool result;
int prolen = proglen(program);
ThreadList *clist = (ThreadList *)malloc(sizeof(ThreadList));
clist->t = (Thread *)malloc(+sizeof(Thread) * prolen);
clist->n = 0;
ThreadList *nlist = (ThreadList *)malloc(sizeof(ThreadList));
nlist->t = (Thread *)malloc(+sizeof(Thread) * prolen);
nlist->n = 0;
int count = 0;
start = std::chrono::high_resolution_clock::now();
while ((result = next_match(program, it2, saved))) {
while ((result = next_match(program, it2, saved, clist, nlist))) {
count++;
printf("%d\n", count);
}
end = std::chrono::high_resolution_clock::now();
printf("Search Time: %.6f s\n",
std::chrono::duration<double>(end - start).count());
printf("Found2 %d matches\n", count);
free_program(program);
free(it2->it);
free(it2);
free(clist->t);
free(nlist->t);
free(clist);
free(nlist);
free_rope(root);
}