Initial commit
This commit is contained in:
87
api/noose.hpp
Normal file
87
api/noose.hpp
Normal file
@@ -0,0 +1,87 @@
|
||||
#ifndef NOOSE_HPP
|
||||
#define NOOSE_HPP
|
||||
#include "../headers/rope.hpp"
|
||||
|
||||
#include <string>
|
||||
|
||||
struct Exp;
|
||||
|
||||
enum class ExpKind { RANGE, OR, SEQ, ANY, NONE };
|
||||
|
||||
struct ExRange {
|
||||
bool negate = false;
|
||||
char start;
|
||||
char end;
|
||||
};
|
||||
|
||||
struct OpOr {
|
||||
Exp *left;
|
||||
Exp *right;
|
||||
};
|
||||
|
||||
struct OpSeq {
|
||||
Exp *left;
|
||||
Exp *right;
|
||||
};
|
||||
|
||||
struct Exp {
|
||||
bool capture;
|
||||
ExpKind kind;
|
||||
union {
|
||||
OpOr *opor;
|
||||
OpSeq *opseq;
|
||||
std::vector<ExRange> ranges;
|
||||
};
|
||||
Exp() {
|
||||
capture = false;
|
||||
kind = ExpKind::NONE;
|
||||
}
|
||||
};
|
||||
|
||||
struct Parser {
|
||||
std::string s;
|
||||
size_t i;
|
||||
Parser(std::string str) : s(str), i(0) {}
|
||||
};
|
||||
|
||||
enum Op {
|
||||
// These jump around
|
||||
JMP = 0, // Jump to j.x
|
||||
FRK = 1, // Fork to j.x and j.y (with priority to x)
|
||||
// These consume 1 char from the input, if not then fail thread
|
||||
// (failuire of main thread is not successfull match)
|
||||
MCH = 2, // match with range object
|
||||
NMC = 3, // not match with range object
|
||||
ANY = 4, // Anything
|
||||
// Used to save offsets
|
||||
SVS = 5, // Start save for i cap group
|
||||
SVE = 6, // End save for i cap group
|
||||
// Match is successful if main thread reaches the end
|
||||
END = 7
|
||||
};
|
||||
|
||||
struct Range { // use start == end to match a particular char
|
||||
char start;
|
||||
char end;
|
||||
};
|
||||
|
||||
struct Inst {
|
||||
Op op;
|
||||
union {
|
||||
struct {
|
||||
Range *ranges;
|
||||
int len;
|
||||
} r;
|
||||
struct {
|
||||
int x, y;
|
||||
} j;
|
||||
};
|
||||
int idx;
|
||||
};
|
||||
|
||||
Exp *regex_to_ast(std::string pattern);
|
||||
Inst *compile_ast(Exp *root);
|
||||
Inst *compile_regex(std::string pattern);
|
||||
int next_match(Inst *prog, ByteIterator *it, uint32_t *saved);
|
||||
|
||||
#endif
|
||||
157
api/rope.hpp
Normal file
157
api/rope.hpp
Normal file
@@ -0,0 +1,157 @@
|
||||
#ifndef ROPE_HPP
|
||||
#define ROPE_HPP
|
||||
|
||||
#include <cstdint>
|
||||
#include <vector>
|
||||
|
||||
#define MIN_CHUNK_SIZE 64 // 64 Bytes
|
||||
#define MAX_CHUNK_SIZE 1024 * 8 // 8192 Bytes (8 KiB)
|
||||
#define MAX(a, b) ((a) > (b) ? (a) : (b))
|
||||
#define MIN(a, b) ((a) < (b) ? (a) : (b))
|
||||
#define DEPTH(n) ((n) ? (n)->depth : 0)
|
||||
|
||||
#define PCRE2_CODE_UNIT_WIDTH 8
|
||||
#define PCRE_WORKSPACE_SIZE 512
|
||||
|
||||
// Rope node definition
|
||||
typedef struct Knot {
|
||||
Knot *left;
|
||||
Knot *right;
|
||||
uint8_t depth;
|
||||
uint32_t chunk_size;
|
||||
uint32_t line_count;
|
||||
uint32_t char_count;
|
||||
char data[];
|
||||
} Knot;
|
||||
|
||||
typedef struct LineIterator {
|
||||
Knot *node;
|
||||
uint8_t top;
|
||||
uint32_t offset;
|
||||
uint32_t line;
|
||||
Knot *stack[64];
|
||||
} LineIterator;
|
||||
|
||||
typedef struct LeafIterator {
|
||||
Knot *node;
|
||||
uint8_t top;
|
||||
uint32_t offset;
|
||||
Knot *stack[64];
|
||||
} LeafIterator;
|
||||
|
||||
typedef struct ByteIterator {
|
||||
LeafIterator *it;
|
||||
uint32_t offset_l;
|
||||
uint32_t offset_g;
|
||||
uint32_t char_count;
|
||||
char *data;
|
||||
} ByteIterator;
|
||||
|
||||
// Rope operations
|
||||
|
||||
// Takes lengt of string to be converted
|
||||
// to rope and returns a suitable chunk size
|
||||
// but rope should work with any positive chunk size
|
||||
uint32_t optimal_chunk_size(uint64_t length);
|
||||
|
||||
// Takes a string (no need for null termination) and returns a rope
|
||||
// len is the length of the string, and chunk size is the size of each chunk
|
||||
// load does not free or consume the string.
|
||||
// and the str can be freed after load has been run.
|
||||
Knot *load(char *str, uint32_t len, uint32_t chunk_size);
|
||||
|
||||
// Balances the rope and returns the root
|
||||
// n is no longer valid / do not free
|
||||
// As rope is balanced by other functions
|
||||
// this is not to be used directly
|
||||
Knot *balance(Knot *n);
|
||||
|
||||
// Concatenates two ropes and returns the joined root
|
||||
// Balances the ropes too, if needed
|
||||
// left and right are no longer valid / do not free
|
||||
// ! left and right should have the same chunk size !
|
||||
Knot *concat(Knot *left, Knot *right);
|
||||
|
||||
// Used to insert text into the rope
|
||||
// node (the rope being inserted into) is no longer valid after call
|
||||
// instead use return value as the new node
|
||||
// offset is the position of the insertion relative to the start of the rope
|
||||
// str is the string to be inserted (no need for null termination)
|
||||
// len is the length of the string
|
||||
Knot *insert(Knot *node, uint32_t offset, char *str, uint32_t len);
|
||||
|
||||
// Similar to insert but for deletion
|
||||
// node (the rope being deleted from) is no longer valid after call
|
||||
// instead use return value as the new node
|
||||
// offset is the position of the deletion relative to the start of the rope
|
||||
// len is the length of the deletion
|
||||
Knot *erase(Knot *node, uint32_t offset, uint32_t len);
|
||||
|
||||
// Used to read a string from the rope
|
||||
// root is the rope to be read from
|
||||
// offset is the position of the read relative to the start of the rope
|
||||
// len is the length of the read
|
||||
// returns a null terminated string, should be freed by the caller
|
||||
char *read(Knot *root, uint32_t offset, uint32_t len);
|
||||
|
||||
// Used to split the rope into left and right ropes
|
||||
// node is the rope to be split (it is no longer valid after call / do not free)
|
||||
// offset is the position of the split relative to the start of the rope
|
||||
// left and right are pointers set to the root of that side of the split
|
||||
void split(Knot *node, uint32_t offset, Knot **left, Knot **right);
|
||||
|
||||
// Used to convert a byte offset to a line number that contains that byte
|
||||
uint32_t byte_to_line(Knot *node, uint32_t offset);
|
||||
|
||||
// Used to convert a line number to a byte offset (start of the line)
|
||||
// also sets out_len to the length of the line
|
||||
uint32_t line_to_byte(Knot *node, uint32_t line, uint32_t *out_len);
|
||||
|
||||
// Used to start a line iterator from the start_line number
|
||||
// root is the root of the rope
|
||||
// returned iterator must be freed after iteration is done
|
||||
LineIterator *begin_l_iter(Knot *root, uint32_t start_line);
|
||||
|
||||
// Each subsequent call returns the next line as a null terminated string
|
||||
// `it` is the iterator returned from begin_l_iter
|
||||
// After getting the necessary lines free the iterator (no need to go upto the
|
||||
// end) returns null if there are no more lines All return strings `must` be
|
||||
// freed by the caller
|
||||
char *next_line(LineIterator *it);
|
||||
|
||||
// Used to start an iterator over leaf data
|
||||
// root is the root of the rope
|
||||
// the caller must free the iterator after use
|
||||
LeafIterator *begin_k_iter(Knot *root);
|
||||
|
||||
// Returns the next leaf data as a null terminated string
|
||||
// `it` is the iterator returned from begin_k_iter
|
||||
// ! Strings returned must never be freed by the caller !
|
||||
// to mutate the string a copy must be made
|
||||
char *next_leaf(LeafIterator *it);
|
||||
|
||||
// Used to start an iterator over byte data (one byte at a time)
|
||||
// Uses leaf iterator internally
|
||||
// root is the root of the rope, the caller must free the iterator after use
|
||||
ByteIterator *begin_b_iter(Knot *root);
|
||||
|
||||
// Returns the next byte from the iterator
|
||||
// Returns '\0' if there are no more bytes left
|
||||
// `it` is the iterator returned from begin_b_iter
|
||||
char next_byte(ByteIterator *it);
|
||||
|
||||
// Used to search for a pattern in the rope
|
||||
// Pattern is a null terminated string representing a regular expression (DFA
|
||||
// compliant) I.e some forms of backtracking etc. are not supported
|
||||
// root is the root of the rope to be searched
|
||||
// Returns a vector of pairs of start and length offsets (in bytes)
|
||||
std::vector<std::pair<size_t, size_t>> search_rope(Knot *root,
|
||||
const char *pattern);
|
||||
|
||||
// Helper function to free the rope
|
||||
// root is the root of the rope
|
||||
// the root is no longer valid after call
|
||||
// This must be called only once when the rope is no longer needed
|
||||
void free_rope(Knot *root);
|
||||
|
||||
#endif // ROPE_HPP
|
||||
Reference in New Issue
Block a user