Fix memory leaks
This commit is contained in:
3
.gitignore
vendored
Normal file
3
.gitignore
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
tst
|
||||
*.bin
|
||||
*.vim
|
||||
@@ -79,9 +79,24 @@ struct Inst {
|
||||
int idx;
|
||||
};
|
||||
|
||||
struct Thread {
|
||||
Inst *pc;
|
||||
uint32_t saved[40]; /* $0 through $9 */
|
||||
};
|
||||
|
||||
struct ThreadList {
|
||||
Thread *t;
|
||||
int n;
|
||||
};
|
||||
|
||||
Exp *regex_to_ast(std::string pattern);
|
||||
void free_exp(Exp *exp);
|
||||
Inst *compile_ast(Exp *root);
|
||||
Inst *compile_regex(std::string pattern);
|
||||
int next_match(Inst *prog, ByteIterator *it, uint32_t *saved);
|
||||
int proglen(Inst *prog);
|
||||
void free_program(Inst *instructions);
|
||||
int next_match(Inst *prog, ByteIterator *it, uint32_t *saved, ThreadList *clist,
|
||||
ThreadList *nlist);
|
||||
void print_program(Inst *program);
|
||||
|
||||
#endif
|
||||
|
||||
106
src/noose.cpp
106
src/noose.cpp
@@ -3,7 +3,6 @@
|
||||
#include <assert.h>
|
||||
#include <cstdint>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <stdio.h>
|
||||
|
||||
// VM - pass 2
|
||||
@@ -19,11 +18,6 @@ bool test_ranges(char inp, Range *ranges, int len) {
|
||||
|
||||
// Use pike vm method
|
||||
|
||||
struct Thread {
|
||||
Inst *pc;
|
||||
uint32_t saved[40]; /* $0 through $9 */
|
||||
};
|
||||
|
||||
Thread thread(Inst *pc, uint32_t *saved) {
|
||||
Thread t;
|
||||
t.pc = pc;
|
||||
@@ -32,16 +26,6 @@ Thread thread(Inst *pc, uint32_t *saved) {
|
||||
return t;
|
||||
}
|
||||
|
||||
struct ThreadList {
|
||||
Thread *t;
|
||||
int n;
|
||||
};
|
||||
|
||||
void handle_end(uint32_t *tsaved, uint32_t *saved) {
|
||||
for (int i = 0; i < 40; i++)
|
||||
saved[i] = tsaved[i];
|
||||
}
|
||||
|
||||
bool addstate(Inst *prog, ThreadList *list, Thread t, int count) {
|
||||
if (t.pc->op == JMP) {
|
||||
if (addstate(prog, list, thread(prog + t.pc->j.x, t.saved), count))
|
||||
@@ -66,7 +50,6 @@ bool addstate(Inst *prog, ThreadList *list, Thread t, int count) {
|
||||
return true;
|
||||
return false;
|
||||
} else if (t.pc->op == END) {
|
||||
handle_end(t.saved, t.saved);
|
||||
return true;
|
||||
} else {
|
||||
for (int i = 0; i < list->n; i++)
|
||||
@@ -97,24 +80,13 @@ void inline free_list(ThreadList *list) {
|
||||
free(list);
|
||||
}
|
||||
|
||||
int next_match(Inst *prog, ByteIterator *it, uint32_t *saved) {
|
||||
int len;
|
||||
ThreadList *clist, *nlist;
|
||||
int next_match(Inst *prog, ByteIterator *it, uint32_t *saved, ThreadList *clist,
|
||||
ThreadList *nlist) {
|
||||
Thread t;
|
||||
|
||||
len = proglen(prog);
|
||||
clist = (ThreadList *)malloc(sizeof(ThreadList));
|
||||
clist->t = (Thread *)malloc(+sizeof(Thread) * len);
|
||||
clist->n = 0;
|
||||
nlist = (ThreadList *)malloc(sizeof(ThreadList));
|
||||
nlist->t = (Thread *)malloc(+sizeof(Thread) * len);
|
||||
nlist->n = 0;
|
||||
char sp;
|
||||
int count = 0;
|
||||
|
||||
addstate(prog, clist, thread(prog, saved), count);
|
||||
for (sp = next_byte(it); sp != '\0'; sp = next_byte(it)) {
|
||||
printf("%c", sp);
|
||||
for (int i = 0; i < clist->n; i++) {
|
||||
t = clist->t[i];
|
||||
switch (t.pc->op) {
|
||||
@@ -148,14 +120,11 @@ int next_match(Inst *prog, ByteIterator *it, uint32_t *saved) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
clear(clist);
|
||||
swap(clist, nlist);
|
||||
clear(nlist);
|
||||
count++;
|
||||
}
|
||||
|
||||
free_list(clist);
|
||||
free_list(nlist);
|
||||
|
||||
return false; // Reached EOF without a match
|
||||
}
|
||||
|
||||
@@ -235,38 +204,41 @@ void print_program(Inst *program) {
|
||||
}
|
||||
|
||||
Inst *compile_regex(std::string pattern) {
|
||||
return compile_ast(regex_to_ast(pattern));
|
||||
Exp *ast = regex_to_ast(pattern);
|
||||
Inst *program = compile_ast(ast);
|
||||
free_exp(ast);
|
||||
return program;
|
||||
}
|
||||
|
||||
int __main() {
|
||||
// Maunally compiled program for testing
|
||||
char *buffer = (char *)malloc(29);
|
||||
strcpy(buffer, "abcdabcdabcdabcdf");
|
||||
// This loads all (excluding \0 put in by strcpy)
|
||||
Knot *root = load(buffer, 17, optimal_chunk_size(12));
|
||||
ByteIterator *it = begin_b_iter(root);
|
||||
uint32_t saved[40];
|
||||
|
||||
for (int i = 0; i < 40; i++)
|
||||
saved[i] = 0;
|
||||
|
||||
std::string pattern = "(abcd)+";
|
||||
|
||||
Inst *program = compile_regex(pattern);
|
||||
|
||||
print_program(program);
|
||||
|
||||
int result;
|
||||
while ((result = next_match(program, it, saved))) {
|
||||
printf("\nRES: %d\n", result);
|
||||
for (int i = 0; i < 40; i++)
|
||||
printf("%d, ", saved[i]);
|
||||
}
|
||||
|
||||
free(program);
|
||||
free(buffer);
|
||||
free(it->it);
|
||||
free(it);
|
||||
free(root);
|
||||
return 0;
|
||||
}
|
||||
// int __main() {
|
||||
// // Maunally compiled program for testing
|
||||
// char *buffer = (char *)malloc(29);
|
||||
// strcpy(buffer, "abcdabcdabcdabcdf");
|
||||
// // This loads all (excluding \0 put in by strcpy)
|
||||
// Knot *root = load(buffer, 17, optimal_chunk_size(12));
|
||||
// ByteIterator *it = begin_b_iter(root);
|
||||
// uint32_t saved[40];
|
||||
//
|
||||
// for (int i = 0; i < 40; i++)
|
||||
// saved[i] = 0;
|
||||
//
|
||||
// std::string pattern = "(abcd)+";
|
||||
//
|
||||
// Inst *program = compile_regex(pattern);
|
||||
//
|
||||
// print_program(program);
|
||||
//
|
||||
// int result;
|
||||
// while ((result = next_match(program, it, saved))) {
|
||||
// printf("\nRES: %d\n", result);
|
||||
// for (int i = 0; i < 40; i++)
|
||||
// printf("%d, ", saved[i]);
|
||||
// }
|
||||
//
|
||||
// free(program);
|
||||
// free(buffer);
|
||||
// free(it->it);
|
||||
// free(it);
|
||||
// free(root);
|
||||
// return 0;
|
||||
// }
|
||||
|
||||
@@ -9,21 +9,21 @@ Exp *parse_atom_with_modifiers(Parser *p);
|
||||
Exp *parse_bracket_class(Parser *p);
|
||||
|
||||
Exp *make_none() {
|
||||
Exp *e = new Exp();
|
||||
Exp *e = (Exp *)malloc(sizeof(Exp));
|
||||
e->capture = false;
|
||||
e->kind = ExpKind::NONE;
|
||||
return e;
|
||||
}
|
||||
|
||||
Exp *make_any() {
|
||||
Exp *e = new Exp();
|
||||
Exp *e = (Exp *)malloc(sizeof(Exp));
|
||||
e->capture = false;
|
||||
e->kind = ExpKind::ANY;
|
||||
return e;
|
||||
}
|
||||
|
||||
Exp *make_range(const std::vector<ExRange> &ranges) {
|
||||
Exp *e = new Exp();
|
||||
Exp *e = (Exp *)malloc(sizeof(Exp));
|
||||
e->capture = false;
|
||||
e->kind = ExpKind::RANGE;
|
||||
new (&e->ranges) std::vector<ExRange>(ranges);
|
||||
@@ -37,10 +37,10 @@ Exp *make_range_single(char c, bool neg = false) {
|
||||
}
|
||||
|
||||
Exp *make_or(Exp *l, Exp *r) {
|
||||
OpOr *o = new OpOr();
|
||||
OpOr *o = (OpOr *)malloc(sizeof(OpOr));
|
||||
o->left = l;
|
||||
o->right = r;
|
||||
Exp *e = new Exp();
|
||||
Exp *e = (Exp *)malloc(sizeof(Exp));
|
||||
e->capture = false;
|
||||
e->kind = ExpKind::OR;
|
||||
e->opor = o;
|
||||
@@ -48,10 +48,10 @@ Exp *make_or(Exp *l, Exp *r) {
|
||||
}
|
||||
|
||||
Exp *make_seq(Exp *l, Exp *r) {
|
||||
OpSeq *o = new OpSeq();
|
||||
OpSeq *o = (OpSeq *)malloc(sizeof(OpSeq));
|
||||
o->left = l;
|
||||
o->right = r;
|
||||
Exp *e = new Exp();
|
||||
Exp *e = (Exp *)malloc(sizeof(Exp));
|
||||
e->capture = false;
|
||||
e->kind = ExpKind::SEQ;
|
||||
e->opseq = o;
|
||||
@@ -95,6 +95,23 @@ Exp *regex_to_ast(std::string pattern) {
|
||||
return res ? res : make_none();
|
||||
}
|
||||
|
||||
void free_exp(Exp *exp) {
|
||||
if (!exp)
|
||||
return;
|
||||
if (exp->kind == ExpKind::OR) {
|
||||
free_exp(exp->opor->left);
|
||||
free_exp(exp->opor->right);
|
||||
free(exp->opor);
|
||||
} else if (exp->kind == ExpKind::SEQ) {
|
||||
free_exp(exp->opseq->left);
|
||||
free_exp(exp->opseq->right);
|
||||
free(exp->opseq);
|
||||
} else if (exp->kind == ExpKind::RANGE) {
|
||||
exp->ranges.~vector<ExRange>();
|
||||
}
|
||||
free(exp);
|
||||
}
|
||||
|
||||
Exp *parse_alternation(Parser *p) {
|
||||
std::vector<Exp *> parts;
|
||||
parts.push_back(parse_sequence(p));
|
||||
@@ -302,37 +319,41 @@ Exp *parse_atom_with_modifiers(Parser *p) {
|
||||
if (!atom)
|
||||
return nullptr;
|
||||
|
||||
// apply possibly multiple modifiers in sequence
|
||||
while (true) {
|
||||
if (peek(p) == '?') {
|
||||
consume(p);
|
||||
// OpOr(atom, NONE)
|
||||
atom = make_or(clone_exp(atom), make_none());
|
||||
Exp *old = atom;
|
||||
atom = make_or(clone_exp(old), make_none());
|
||||
free_exp(old);
|
||||
} else if (peek(p) == '*') {
|
||||
consume(p);
|
||||
// Expand to 20 repeating OpOr(atom, NONE) chained by SEQ as literal
|
||||
// tree
|
||||
Exp *old = atom;
|
||||
// Expand to 20 repeating OpOr(atom, NONE) chained by SEQ as literal tree
|
||||
Exp *unit_or = nullptr;
|
||||
for (int t = 0; t < 20; ++t) {
|
||||
Exp *op = make_or(clone_exp(atom), make_none());
|
||||
Exp *op = make_or(clone_exp(old), make_none());
|
||||
if (!unit_or)
|
||||
unit_or = op;
|
||||
else
|
||||
unit_or = make_seq(unit_or, op);
|
||||
}
|
||||
atom = unit_or ? unit_or : make_none();
|
||||
free_exp(old);
|
||||
} else if (peek(p) == '+') {
|
||||
consume(p);
|
||||
Exp *old = atom;
|
||||
// First the atom, then 20 OpOr(atom, NONE) sequence
|
||||
Exp *rest = nullptr;
|
||||
for (int t = 0; t < 20; ++t) {
|
||||
Exp *op = make_or(clone_exp(atom), make_none());
|
||||
Exp *op = make_or(clone_exp(old), make_none());
|
||||
if (!rest)
|
||||
rest = op;
|
||||
else
|
||||
rest = make_seq(rest, op);
|
||||
}
|
||||
atom = rest ? make_seq(clone_exp(atom), rest) : clone_exp(atom);
|
||||
Exp *new_atom = rest ? make_seq(clone_exp(old), rest) : clone_exp(old);
|
||||
atom = new_atom;
|
||||
free_exp(old);
|
||||
} else if (peek(p) == '{') {
|
||||
// parse {x,y}
|
||||
size_t save = p->i;
|
||||
@@ -355,18 +376,19 @@ Exp *parse_atom_with_modifiers(Parser *p) {
|
||||
y = x;
|
||||
if (y > 20)
|
||||
y = 20; // clamp to 20 as requested
|
||||
|
||||
Exp *old = atom;
|
||||
// Build x copies of atom concatenated, then (y-x) OpOr(atom, NONE)
|
||||
// chained
|
||||
Exp *prefix = nullptr;
|
||||
for (int k = 0; k < x; ++k) {
|
||||
if (!prefix)
|
||||
prefix = clone_exp(atom);
|
||||
prefix = clone_exp(old);
|
||||
else
|
||||
prefix = make_seq(prefix, clone_exp(atom));
|
||||
prefix = make_seq(prefix, clone_exp(old));
|
||||
}
|
||||
Exp *suffix = nullptr;
|
||||
for (int k = 0; k < (y - x); ++k) {
|
||||
Exp *op = make_or(clone_exp(atom), make_none());
|
||||
Exp *op = make_or(clone_exp(old), make_none());
|
||||
if (!suffix)
|
||||
suffix = op;
|
||||
else
|
||||
@@ -374,10 +396,14 @@ Exp *parse_atom_with_modifiers(Parser *p) {
|
||||
}
|
||||
if (!prefix)
|
||||
prefix = make_none();
|
||||
Exp *new_atom = nullptr;
|
||||
if (!suffix)
|
||||
atom = prefix;
|
||||
new_atom = prefix;
|
||||
else
|
||||
atom = make_seq(prefix, suffix);
|
||||
new_atom = make_seq(prefix, suffix);
|
||||
|
||||
atom = new_atom;
|
||||
free_exp(old);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -25,6 +25,7 @@ static void insert_inst(InstList *list, Inst *inst) {
|
||||
list->cap = nc;
|
||||
}
|
||||
list->data[list->len++] = *inst;
|
||||
free(inst);
|
||||
}
|
||||
|
||||
Inst *make_inst(Op op) {
|
||||
@@ -122,6 +123,18 @@ void compile_or(Exp *e, InstList *list) {
|
||||
}
|
||||
}
|
||||
|
||||
void free_program(Inst *instructions) {
|
||||
if (!instructions)
|
||||
return;
|
||||
Inst *current = instructions;
|
||||
while (current->op != END) {
|
||||
if (current->op == MCH || current->op == NMC)
|
||||
free(current->r.ranges);
|
||||
current++; // Assuming sequential memory layout
|
||||
}
|
||||
free(instructions);
|
||||
}
|
||||
|
||||
void compile_exp(Exp *e, InstList *list) {
|
||||
switch (e->kind) {
|
||||
case ExpKind::NONE:
|
||||
|
||||
35
src/rope.cpp
35
src/rope.cpp
@@ -259,7 +259,7 @@ Knot *insert(Knot *node, uint32_t offset, char *str, uint32_t len) {
|
||||
Knot *left_part = nullptr;
|
||||
Knot *right_part = nullptr;
|
||||
split(node, offset, &left_part, &right_part);
|
||||
Knot *middle_part = load(str, len, node->chunk_size);
|
||||
Knot *middle_part = load(str, len, left_part->chunk_size);
|
||||
return concat(concat(left_part, middle_part), right_part);
|
||||
}
|
||||
|
||||
@@ -647,12 +647,17 @@ char next_byte(ByteIterator *it) {
|
||||
it->offset_g += it->offset_l;
|
||||
it->offset_l = 1;
|
||||
char *data = next_leaf(it->it);
|
||||
it->char_count = strlen(data);
|
||||
it->data = data;
|
||||
if (it->data)
|
||||
return *it->data;
|
||||
else
|
||||
if (!data)
|
||||
return '\0';
|
||||
it->char_count = strlen(data);
|
||||
while (it->char_count <= 0) {
|
||||
data = next_leaf(it->it);
|
||||
if (!data)
|
||||
return '\0';
|
||||
it->char_count = strlen(data);
|
||||
}
|
||||
it->data = data;
|
||||
return *it->data;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -828,15 +833,15 @@ The quick brown fox jumps over the lazy dog.");
|
||||
|
||||
std::string pattern = "f.x";
|
||||
|
||||
Inst *program = compile_regex(pattern);
|
||||
|
||||
bool result;
|
||||
while ((result = next_match(program, it2, saved))) {
|
||||
printf("\nRES: %d\n", result);
|
||||
for (int i = 0; i < 40; i++)
|
||||
printf("%d, ", saved[i]);
|
||||
}
|
||||
|
||||
// Inst *program = compile_regex(pattern);
|
||||
//
|
||||
// bool result;
|
||||
// while ((result = next_match(program, it2, saved))) {
|
||||
// printf("\nRES: %d\n", result);
|
||||
// for (int i = 0; i < 40; i++)
|
||||
// printf("%d, ", saved[i]);
|
||||
// }
|
||||
//
|
||||
// char c2 = ' ';
|
||||
// while ((c2 = next_byte(it2)) != '\0')
|
||||
// printf("%c :wow!:\n", c2);
|
||||
|
||||
42
src/test.cpp
42
src/test.cpp
@@ -41,7 +41,7 @@ int main() {
|
||||
printf("Loading file into rope...\n");
|
||||
char *buf = load_file("./random.bin", &len);
|
||||
auto start = std::chrono::high_resolution_clock::now();
|
||||
Knot *root = load(buf, len, 2);
|
||||
Knot *root = load(buf, len, optimal_chunk_size(len));
|
||||
auto end = std::chrono::high_resolution_clock::now();
|
||||
printf("Load time: %.3f s\n",
|
||||
std::chrono::duration<double>(end - start).count());
|
||||
@@ -191,40 +191,56 @@ int main() {
|
||||
// search test
|
||||
|
||||
start = std::chrono::high_resolution_clock::now();
|
||||
std::vector<std::pair<size_t, size_t>> matches = search_rope(root, "f.x");
|
||||
std::vector<std::pair<size_t, size_t>> matches =
|
||||
search_rope(root, "[A-Z][a-z]+");
|
||||
end = std::chrono::high_resolution_clock::now();
|
||||
printf("Search Time: %.6f s\n",
|
||||
std::chrono::duration<double>(end - start).count());
|
||||
printf("Found %lu matches\n", matches.size());
|
||||
|
||||
char *c = read(root, 0, 1000);
|
||||
printf("%s\n", c);
|
||||
free(c);
|
||||
// char *c = read(root, 0, 1000);
|
||||
// printf("%s\n", c);
|
||||
// free(c);
|
||||
|
||||
ByteIterator *it1 = begin_b_iter(root);
|
||||
char ch;
|
||||
while ((ch = next_byte(it1)) != '\0') {
|
||||
printf("%c:", ch);
|
||||
}
|
||||
// ByteIterator *it1 = begin_b_iter(root);
|
||||
// char ch;
|
||||
// while ((ch = next_byte(it1)) != '\0') {
|
||||
// printf("%c:", ch);
|
||||
// }
|
||||
|
||||
ByteIterator *it2 = begin_b_iter(root);
|
||||
uint32_t saved[40];
|
||||
for (int i = 0; i < 40; i++)
|
||||
saved[i] = 0;
|
||||
std::string pattern = "f.x";
|
||||
std::string pattern = "[A-Z][a-z]+";
|
||||
Inst *program = compile_regex(pattern);
|
||||
print_program(program);
|
||||
bool result;
|
||||
int prolen = proglen(program);
|
||||
ThreadList *clist = (ThreadList *)malloc(sizeof(ThreadList));
|
||||
clist->t = (Thread *)malloc(+sizeof(Thread) * prolen);
|
||||
clist->n = 0;
|
||||
ThreadList *nlist = (ThreadList *)malloc(sizeof(ThreadList));
|
||||
nlist->t = (Thread *)malloc(+sizeof(Thread) * prolen);
|
||||
nlist->n = 0;
|
||||
int count = 0;
|
||||
start = std::chrono::high_resolution_clock::now();
|
||||
while ((result = next_match(program, it2, saved))) {
|
||||
while ((result = next_match(program, it2, saved, clist, nlist))) {
|
||||
count++;
|
||||
printf("%d\n", count);
|
||||
}
|
||||
end = std::chrono::high_resolution_clock::now();
|
||||
printf("Search Time: %.6f s\n",
|
||||
std::chrono::duration<double>(end - start).count());
|
||||
printf("Found2 %d matches\n", count);
|
||||
|
||||
free_program(program);
|
||||
free(it2->it);
|
||||
free(it2);
|
||||
free(clist->t);
|
||||
free(nlist->t);
|
||||
free(clist);
|
||||
free(nlist);
|
||||
|
||||
free_rope(root);
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user