From 5256e956f3022d580877082e1d6c4b6a44d35703 Mon Sep 17 00:00:00 2001 From: Syed Daanish Date: Fri, 28 Nov 2025 20:57:38 +0000 Subject: [PATCH] Add benchmark and readme --- README.md | 241 +++++++++++++++++++++++++++++++ src/rope.cpp | 8 +- src/test.cpp | 299 -------------------------------------- tests/benchmark.cpp | 342 ++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 587 insertions(+), 303 deletions(-) create mode 100644 README.md delete mode 100644 src/test.cpp create mode 100644 tests/benchmark.cpp diff --git a/README.md b/README.md new file mode 100644 index 0000000..04f37da --- /dev/null +++ b/README.md @@ -0,0 +1,241 @@ +# Rope Data Structure (C++ Implementation) + +This library provides an efficient **rope** implementation in C++ for large, mutable text. +It supports fast insertion, deletion, concatenation, iteration, and regex-based search. +The design uses fixed-size leaf chunks and automatically maintains a balanced binary tree. + +## Features + +* Efficient handling of large text buffers +* Log-time insertion, deletion, and concatenation +* Optional tunable chunk sizes +* Line, leaf, and byte iterators +* Regex search using PCRE2 (DFA mode) +* Regex using `noose` can also be done but it is much slower for now, I am working on making it faster. +* Zero-copy leaf iteration (safe immutable reads) +* Fully heap-allocated, manual memory management +* Deterministic balancing strategy (AVL-style depth tracking) + +## ⚡ Performance Benchmarks + +Performance comparison between this Rope implementation and the standard C++ `std::string`. + +**Test Context:** +* **Dataset:** ~1 GiB generated text (1,073,741,856 bytes). +* **Goal:** Simulating heavy text-editing operations. + +### Key Highlights +When handling large datasets, the structural advantages of the Rope become massive: + +* **Appending Large Text:** **2.4 Million x faster** (0.001ms vs 1831ms) +* **Splitting Text:** **162,000x faster** +* **Insert (Middle):** **87,000x faster** +* **Erase:** **15,000x faster** + +### Numbers + +| Operation | Rope Time | String Time | String to Rope Ratio | Winner | +| :--- | :--- | :--- | :--- | :--- | +| **Concat (Append Large)** | **0.001 ms** | 1831.666 ms | **2,429,265x** | ✅ **Rope** | +| **Split (Half)** | **0.008 ms** | 1353.086 ms | **162,240x** | ✅ **Rope** | +| **Insert (Middle)** | **0.011 ms** | 976.105 ms | **87,206x** | ✅ **Rope** | +| **Erase (5KB)** | **0.013 ms** | 207.760 ms | **15,953x** | ✅ **Rope** | +| **Iterate 1000 Lines** | **0.063 ms** | 5.723 ms | **90x** | ✅ **Rope** | +| **Concat (Append small)** | 0.002 ms | **0.000 ms** | 0.1 | ❌ String | +| **Load / Create** | 1512.449 ms | **871.330 ms** | 0.58x | ❌ String | +| **Free / Destruct** | 194.167 ms | **153.365 ms** | 0.79x | ❌ String | +| **Read / Substr (1KiB)** | 0.008 ms | **0.002 ms** | 0.24x | ❌ String | +| **Search (Regex)** | 6417.719 ms | **1.526 ms** | ~0x | ❌ String | + + +**Why is the Rope faster?**
+Standard strings require contiguous memory. When you insert text into the middle of a 1GB `std::string`, the CPU must shift all subsequent bytes in memory ($O(n)$ complexity). This Rope implementation uses a tree structure, allowing insertions and deletions by simply modifying pointers ($O(\log n)$ complexity). + +Also for concatenation ropes have amortized constant time vs strings with linear time.
+So smaller concatenation strings are faster but for any larger concatenation the rope is best.
+ + +**Why is the String faster for Regex/Read?**
+`std::string` is just a flat array of bytes, which is extremely cache-friendly for linear scanning and regex engines. Ropes require tree traversal to read sequential data, resulting in higher overhead for read-only operations. + +I have also tested against `ropey` the rust library and rust strings.
+Ropey has much faster load times but is slightly slower than my implementation in other operations.
+I am not including the tests here as my test might not be in the most optimal condition and so have different results.
+The tests included here are the ones that I have done myself with clang++ compiling both tests in the same binary. (see: `tests/benchmark.cpp`)
+Also on a side note rust strings are faster than c++ strings apart from reading. + + +## Rope Node Structure + +Each rope node (`Knot`) is either: + +* an **internal node** with `left` and `right` children, or +* a **leaf node** containing text data of size `chunk_size`. + +Metadata stored per node: + +* `depth` – subtree height +* `chunk_size` – leaf capacity +* `line_count` – number of `\n` newline characters +* `char_count` – total byte length covered by the subtree + +```c +typedef struct Knot { + Knot *left; + Knot *right; + uint8_t depth; + uint32_t chunk_size; + uint32_t line_count; + uint32_t char_count; + char data[]; +} Knot; +``` + + +## Chunk Size + +The library supports arbitrary positive chunk sizes, but provides: + +```c +uint32_t optimal_chunk_size(uint64_t length); +``` + +This suggests a chunk size based on the target input size. +Valid range is: + +* **MIN_CHUNK_SIZE:** 64 bytes +* **MAX_CHUNK_SIZE:** 8192 bytes + +You may choose any value; all ropes participating in concat operations **must use the same chunk size**. + + +# API Overview + +## Construction and Loading + +```c +load(char *str, uint32_t len, uint32_t chunk_size) +``` + +Builds a rope from a raw byte buffer. +`str` is not consumed and may be freed after loading. + + +## Structural Operations + +```c +Knot *concat(Knot *left, Knot *right) +``` + +Concatenates two ropes (must share chunk size). +Both input roots are invalid after the call. + +```c +Knot *insert(Knot *node, uint32_t offset, char *str, uint32_t len) +``` + +Inserts text at a byte offset. +Returns a new rope. +The original node becomes invalid. + +```c +Knot *erase(Knot *node, uint32_t offset, uint32_t len) +``` + +Deletes a byte range. +Returns a new rope. +The original node becomes invalid. + +```c +void split(Knot *node, uint32_t offset, Knot **left, Knot **right) +``` + +Splits a rope into two ropes at the given byte offset. +The original node becomes invalid. + +```c +char *read(Knot *root, uint32_t offset, uint32_t len) +``` + +Extracts a substring. +Returns a null-terminated buffer; caller must free it. + + +## Line & Byte Mapping + +```c +uint32_t byte_to_line(Knot *root, uint32_t offset) +``` + +Converts a byte offset into a line index. + +```c +uint32_t line_to_byte(Knot *root, uint32_t line, uint32_t *out_len) +``` + +Returns the byte offset of the beginning of a line and outputs that line’s length. + + +## Iterators + +### Line Iterator + +```c +LineIterator *begin_l_iter(Knot *root, uint32_t start_line); +char *next_line(LineIterator *it); // caller frees result +``` + +### Leaf Iterator + +```c +LeafIterator *begin_k_iter(Knot *root); +char *next_leaf(LeafIterator *it); // DO NOT free result +``` + +### Byte Iterator + +```c +ByteIterator *begin_b_iter(Knot *root); +char next_byte(ByteIterator *it); +``` + +All iterators must be freed by the caller after use. +> For `ByteIterator`, `ByteIterator.it` must be freed before the iterator is freed. + + +## Searching + +```cpp +std::vector> search_rope(Knot *root, const char *pattern) +``` + +Searches the rope using PCRE2 in DFA mode. +Returns a vector of `(start_offset, length)` pairs. +Only deterministic patterns are supported (no backtracking). + + +## Memory Management + +```c +void free_rope(Knot *root) +``` + +Recursively frees all nodes in the rope. +Must be called once when the rope is no longer needed. + + +# Example Usage + +```c +uint32_t chunk_size = optimal_chunk_size(strlen(input)); +Knot *r = load(input, strlen(input), chunk_size); + +r = insert(r, 5, "hello", 5); +r = erase(r, 20, 3); + +char *sub = read(r, 10, 25); +printf("%s\n", sub); +free(sub); + +free_rope(r); +``` diff --git a/src/rope.cpp b/src/rope.cpp index 884343d..6275900 100644 --- a/src/rope.cpp +++ b/src/rope.cpp @@ -44,7 +44,7 @@ Knot *load(char *str, uint32_t len, uint32_t chunk_size) { node->line_count = left->line_count + right->line_count; return node; } else { - Knot *node = (Knot *)malloc(sizeof(Knot) + chunk_size); + Knot *node = (Knot *)malloc(sizeof(Knot) + chunk_size + 1); if (!node) return nullptr; node->left = nullptr; @@ -67,7 +67,7 @@ Knot *load(char *str, uint32_t len, uint32_t chunk_size) { // leaf if consumed and freed (so dont use or free it after) // left and right are the new nodes static void split_leaf(Knot *leaf, uint32_t k, Knot **left, Knot **right) { - Knot *left_node = (Knot *)malloc(sizeof(Knot) + leaf->chunk_size); + Knot *left_node = (Knot *)malloc(sizeof(Knot) + leaf->chunk_size + 1); left_node->left = nullptr; left_node->right = nullptr; left_node->chunk_size = leaf->chunk_size; @@ -83,7 +83,7 @@ static void split_leaf(Knot *leaf, uint32_t k, Knot **left, Knot **right) { left_node->line_count = newline_count; uint16_t right_line_count = leaf->line_count - newline_count; *left = left_node; - Knot *right_node = (Knot *)malloc(sizeof(Knot) + leaf->chunk_size); + Knot *right_node = (Knot *)malloc(sizeof(Knot) + leaf->chunk_size + 1); right_node->left = nullptr; right_node->right = nullptr; right_node->chunk_size = leaf->chunk_size; @@ -191,7 +191,7 @@ Knot *concat(Knot *left, Knot *right) { } if (left->depth == 0 && right->depth == 0) { if (left->char_count + right->char_count <= left->chunk_size) { - Knot *node = (Knot *)malloc(sizeof(Knot) + left->chunk_size); + Knot *node = (Knot *)malloc(sizeof(Knot) + left->chunk_size + 1); node->left = nullptr; node->right = nullptr; node->chunk_size = left->chunk_size; diff --git a/src/test.cpp b/src/test.cpp deleted file mode 100644 index 3a4077e..0000000 --- a/src/test.cpp +++ /dev/null @@ -1,299 +0,0 @@ -#include "../api/noose.hpp" -#include "../api/rope.hpp" -#include -#include -#include -#include -#include -#include -#include - -char *load_file(const char *path, size_t *out_len) { - FILE *f = fopen(path, "rb"); - if (!f) { - perror("fopen"); - return nullptr; - } - fseek(f, 0, SEEK_END); - size_t len = ftell(f); - rewind(f); - - char *buf = (char *)malloc(len); - if (!buf) { - perror("malloc"); - fclose(f); - return nullptr; - } - - fread(buf, 1, len, f); - fclose(f); - - *out_len = len; - return buf; -} - -int main() { - - printf("My rope implementation benchmark\n"); - - { - size_t len; - printf("Loading file into rope...\n"); - char *buf = load_file("./random.bin", &len); - auto start = std::chrono::high_resolution_clock::now(); - Knot *root = load(buf, len, optimal_chunk_size(len)); - auto end = std::chrono::high_resolution_clock::now(); - printf("Load time: %.3f s\n", - std::chrono::duration(end - start).count()); - - free(buf); - - // READ TEST - printf("Testing read...\n"); - start = std::chrono::high_resolution_clock::now(); - char *content = read(root, len / 2, 1024); - end = std::chrono::high_resolution_clock::now(); - free(content); - printf("Read 1 KB from middle: %.6f s\n", - std::chrono::duration(end - start).count()); - - // INSERT TEST - printf("Testing insert...\n"); - char insert_data[1024]; - memset(insert_data, 'X', 1024); - start = std::chrono::high_resolution_clock::now(); - root = insert(root, len / 2, insert_data, 1024); - end = std::chrono::high_resolution_clock::now(); - printf("Insert 1 KB in middle: %.6f s\n", - std::chrono::duration(end - start).count()); - - // ERASE TEST (Delete the same 1 KB we just inserted) - printf("Testing erase...\n"); - start = std::chrono::high_resolution_clock::now(); - root = erase(root, len / 2, 1024); - end = std::chrono::high_resolution_clock::now(); - printf("Erase 1 KB in middle: %.6f s\n", - std::chrono::duration(end - start).count()); - - // SPLIT TEST - printf("Testing split...\n"); - Knot *left = nullptr, *right = nullptr; - start = std::chrono::high_resolution_clock::now(); - split(root, len / 2, &left, &right); - end = std::chrono::high_resolution_clock::now(); - printf("Split at middle: %.6f s\n", - std::chrono::duration(end - start).count()); - - // CONCAT TEST - printf("Testing concat...\n"); - start = std::chrono::high_resolution_clock::now(); - root = concat(left, right); - end = std::chrono::high_resolution_clock::now(); - printf("Concat: %.6f s\n", - std::chrono::duration(end - start).count()); - - // --------------------------------------------------------- - // LINE OPERATIONS TESTS - // --------------------------------------------------------- - printf("Testing line operations...\n"); - - // KNOWN CONSTANTS based on: yes "The quick brown fox jumps over the lazy - // dog." String length: 44 + 1 newline = 45 bytes per line. - const uint32_t BYTES_PER_LINE = 45; - const uint32_t TEST_LINE_INDEX = 1000; // A line deep in the file - - // 1. Test byte_to_line - // We pick a byte in the middle of TEST_LINE_INDEX. - // Offset = (100000 * 45) + 10. - uint32_t test_offset = (TEST_LINE_INDEX * BYTES_PER_LINE) + 10; - - start = std::chrono::high_resolution_clock::now(); - uint16_t calculated_line = byte_to_line(root, test_offset); - end = std::chrono::high_resolution_clock::now(); - - printf("byte_to_line (%u -> %u): %.6f s ", test_offset, calculated_line, - std::chrono::duration(end - start).count()); - - if (calculated_line == TEST_LINE_INDEX) { - printf("[PASS]\n"); - } else { - printf("[FAIL] Expected %u, got %u\n", TEST_LINE_INDEX, calculated_line); - } - - // 2. Test line_to_byte - // We ask for the start of TEST_LINE_INDEX. Should be exactly - // TEST_LINE_INDEX * 45. - uint32_t out_len = 0; - uint32_t expected_start = TEST_LINE_INDEX * BYTES_PER_LINE; - - start = std::chrono::high_resolution_clock::now(); - uint32_t calculated_start = line_to_byte(root, TEST_LINE_INDEX, &out_len); - end = std::chrono::high_resolution_clock::now(); - - printf("line_to_byte (Line %u -> Offset %u): %.6f s ", TEST_LINE_INDEX, - calculated_start, - std::chrono::duration(end - start).count()); - - if (calculated_start == expected_start && out_len == BYTES_PER_LINE) { - printf("[PASS]\n"); - } else { - printf("[FAIL] Expected offset %u (len %u), got %u (len %u)\n", - expected_start, BYTES_PER_LINE, calculated_start, out_len); - } - - // --------------------------------------------------------- - // ITERATOR SPEED TEST - // --------------------------------------------------------- - printf("Testing iterator speed...\n"); - - const uint32_t LINES_TO_ITERATE = 10000; // Iterate 10,000 lines - - // 1. Initialize the iterator at a deep line index - uint32_t start_line = TEST_LINE_INDEX + 10; - - LeafIterator *it = begin_k_iter(root); - if (!it) { - printf("Iterator Test: [FAIL] begin_iterator returned NULL.\n"); - } else { - char *line = NULL; - uint32_t lines_read = 0; - - start = std::chrono::high_resolution_clock::now(); - - // 2. Iterate and time the process - // We use the clean C idiom: get the line, check for NULL, then - // process. - while (lines_read < LINES_TO_ITERATE && (line = next_leaf(it)) != NULL) { - // Note: We deliberately skip printing to focus on the Rope operation - // time. - lines_read++; - } - - end = std::chrono::high_resolution_clock::now(); - - double elapsed_time = std::chrono::duration(end - start).count(); - - printf("Iterator speed (f:: %u): %.6f s (%.2f lines/s)\n", lines_read, - elapsed_time, (double)lines_read / elapsed_time); - - if (lines_read == LINES_TO_ITERATE) { - printf("Iterator Test: [PASS] Successfully iterated %u lines.\n", - LINES_TO_ITERATE); - } else { - printf("Iterator Test: [FAIL] Expected %u lines, read %u.\n", - LINES_TO_ITERATE, lines_read); - } - - // 3. Clean up the iterator - free(it); - } - - // search test - - start = std::chrono::high_resolution_clock::now(); - std::vector> matches = - search_rope(root, "[A-Z][a-z]+"); - end = std::chrono::high_resolution_clock::now(); - printf("Search Time: %.6f s\n", - std::chrono::duration(end - start).count()); - printf("Found %lu matches\n", matches.size()); - - // char *c = read(root, 0, 1000); - // printf("%s\n", c); - // free(c); - - // ByteIterator *it1 = begin_b_iter(root); - // char ch; - // while ((ch = next_byte(it1)) != '\0') { - // printf("%c:", ch); - // } - - ByteIterator *it2 = begin_b_iter(root); - uint32_t saved[40]; - for (int i = 0; i < 40; i++) - saved[i] = 0; - std::string pattern = "[A-Z][a-z]+"; - Inst *program = compile_regex(pattern); - print_program(program); - bool result; - int prolen = proglen(program); - ThreadList *clist = (ThreadList *)malloc(sizeof(ThreadList)); - clist->t = (Thread *)malloc(+sizeof(Thread) * prolen); - clist->n = 0; - ThreadList *nlist = (ThreadList *)malloc(sizeof(ThreadList)); - nlist->t = (Thread *)malloc(+sizeof(Thread) * prolen); - nlist->n = 0; - int count = 0; - start = std::chrono::high_resolution_clock::now(); - while ((result = next_match(program, it2, saved, clist, nlist))) { - count++; - } - end = std::chrono::high_resolution_clock::now(); - printf("Search Time: %.6f s\n", - std::chrono::duration(end - start).count()); - printf("Found2 %d matches\n", count); - - free_program(program); - free(it2->it); - free(it2); - free(clist->t); - free(nlist->t); - free(clist); - free(nlist); - - free_rope(root); - } - - printf("Testing std::string...\n"); - - { - std::ifstream file("random.bin", std::ios::binary | std::ios::ate); - if (!file) { - perror("ifstream"); - return 1; - } - size_t len = file.tellg(); - file.seekg(0); - std::string data(len, '\0'); - file.read(data.data(), len); - - std::string s = data; - - auto start = std::chrono::high_resolution_clock::now(); - // READ: middle 1 KB - std::string read_chunk = s.substr(len / 2, 1024); - auto end = std::chrono::high_resolution_clock::now(); - printf("std::string read 1 KB from middle: %.6f s\n", - std::chrono::duration(end - start).count()); - - // INSERT: middle 1 KB - std::string insert_data(1024, 'X'); - start = std::chrono::high_resolution_clock::now(); - s.insert(len / 2, insert_data); - end = std::chrono::high_resolution_clock::now(); - printf("std::string insert 1 KB in middle: %.6f s\n", - std::chrono::duration(end - start).count()); - - // ERASE: middle 1 KB - start = std::chrono::high_resolution_clock::now(); - s.erase(len / 2, 1024); - end = std::chrono::high_resolution_clock::now(); - printf("std::string erase 1 KB in middle: %.6f s\n", - std::chrono::duration(end - start).count()); - - // SPLIT: middle - start = std::chrono::high_resolution_clock::now(); - std::string left = s.substr(0, len / 2); - std::string right = s.substr(len / 2); - end = std::chrono::high_resolution_clock::now(); - printf("std::string split at middle: %.6f s\n", - std::chrono::duration(end - start).count()); - - // CONCAT - start = std::chrono::high_resolution_clock::now(); - s = left + right; - end = std::chrono::high_resolution_clock::now(); - printf("std::string concat: %.6f s\n", - std::chrono::duration(end - start).count()); - } -} diff --git a/tests/benchmark.cpp b/tests/benchmark.cpp new file mode 100644 index 0000000..f81184d --- /dev/null +++ b/tests/benchmark.cpp @@ -0,0 +1,342 @@ +#include "../api/rope.hpp" +#include +#include +#include +#include +#include +#include +#include + +// Include the user's header + +// --- Timer Helper --- +class Timer { + using Clock = std::chrono::high_resolution_clock; + std::chrono::time_point start_time; + +public: + Timer() { reset(); } + void reset() { start_time = Clock::now(); } + double elapsed_ms() { + auto end_time = Clock::now(); + return std::chrono::duration(end_time - start_time) + .count(); + } +}; + +// --- Formatting Helper --- +void print_result(const std::string &test_name, double rope_ms, double str_ms) { + std::cout << std::left << std::setw(25) << test_name + << " | Rope: " << std::setw(10) << std::fixed + << std::setprecision(3) << rope_ms << " ms" + << " | String: " << std::setw(10) << str_ms << " ms" + << " | Ratio (Str/Rope): " << std::setprecision(2) << std::setw(10) + << (str_ms / rope_ms) << "x" << " | " << std::fixed << " So " + << ((str_ms - rope_ms) <= 0 ? "string" : "rope ") + << " is faster by " << std::fabs(str_ms - rope_ms) << " ms" + << std::endl; +} + +int main() { + // 1. DATA GENERATION + std::cout << "Generating ~1GiB dataset..." << std::endl; + const std::string pattern = "The quick brown fox jumps over the lzy dog.\n"; + // Target ~100 MiB (100 * 1024 * 1024 bytes) + const size_t target_size = 1024 * 1024 * 1024; + std::string source_data; + source_data.reserve(target_size + pattern.size()); + + while (source_data.size() < target_size) + source_data.append(pattern); + + uint32_t total_len = static_cast(source_data.size()); + std::cout << "Dataset generated. Size: " << total_len << " bytes.\n" + << std::endl; + + Timer t; + double rope_time, str_time; + + // ========================================== + // TEST 1: LOAD / CREATION + // ========================================== + + // Rope Load + t.reset(); + uint32_t chunk_size = optimal_chunk_size(total_len); + // Note: Cast to char* because header asks for char*, usually strings are + // const char* + Knot *root = + load(const_cast(source_data.c_str()), total_len, chunk_size); + rope_time = t.elapsed_ms(); + + // String Load (Copy) + t.reset(); + std::string str_copy = source_data; + str_time = t.elapsed_ms(); + + print_result("Load / Create", rope_time, str_time); + + // ========================================== + // TEST 2: INSERT (Middle) + // ========================================== + std::string insert_pattern = " [INSERTED TEXT] "; + uint32_t insert_pos = total_len / 2; + + // Rope Insert + t.reset(); + root = insert(root, insert_pos, const_cast(insert_pattern.c_str()), + (uint32_t)insert_pattern.size()); + rope_time = t.elapsed_ms(); + + // String Insert + t.reset(); + str_copy.insert(insert_pos, insert_pattern); + str_time = t.elapsed_ms(); + + print_result("Insert (Middle)", rope_time, str_time); + + // ========================================== + // TEST 3: READ / SUBSTR + // ========================================== + uint32_t read_len = 1024; + uint32_t read_pos = total_len / 2; // Read from where we just inserted + + // Rope Read + t.reset(); + char *rope_read_res = read(root, read_pos, read_len); + rope_time = t.elapsed_ms(); + free(rope_read_res); // Free result as per header + + // String Substr + t.reset(); + std::string str_read_res = str_copy.substr(read_pos, read_len); + str_time = t.elapsed_ms(); + + print_result("Read / Substr (1KiB)", rope_time, str_time); + + // ========================================== + // TEST 4: CONCATENATION + // ========================================== + // Create a temporary rope to append + Knot *suffix_rope = load(const_cast(pattern.c_str()), + (uint32_t)pattern.size(), chunk_size); + + // Rope Concat + t.reset(); + root = concat(root, suffix_rope); + rope_time = t.elapsed_ms(); + + // String Append + t.reset(); + str_copy += pattern; + str_time = t.elapsed_ms(); + + print_result("Concat (Append small)", rope_time, str_time); + + Knot *large_rope = + load(const_cast(source_data.c_str()), total_len, chunk_size); + + // Rope Concat + t.reset(); + root = concat(root, large_rope); + rope_time = t.elapsed_ms(); + + Knot *L = nullptr; + Knot *R = nullptr; + split(root, total_len, &L, &R); + root = L; + free_rope(R); + + // String Append + t.reset(); + str_copy += source_data; + str_time = t.elapsed_ms(); + + print_result("Concat (Append large)", rope_time, str_time); + + // ========================================== + // TEST 5: ERASE + // ========================================== + uint32_t erase_len = 5000; // Erase 5KB + uint32_t erase_pos = total_len / 4; + + // Rope Erase + t.reset(); + root = erase(root, erase_pos, erase_len); + rope_time = t.elapsed_ms(); + + // String Erase + t.reset(); + str_copy.erase(erase_pos, erase_len); + str_time = t.elapsed_ms(); + + print_result("Erase (5KB)", rope_time, str_time); + + // ========================================== + // TEST 6: LINE TO BYTE (Indexing) + // ========================================== + // Pick a line number deep in the file + uint32_t target_line = 100000; + uint32_t out_len = 0; + + // Rope Line Lookup + t.reset(); + volatile uint32_t r_offset = line_to_byte(root, target_line, &out_len); + rope_time = t.elapsed_ms(); + + // String Line Lookup (Simulated: Must scan for newlines) + t.reset(); + size_t current_line = 0; + size_t s_offset = 0; + // Manual scan is the standard way for std::string + for (size_t i = 0; i < str_copy.size(); ++i) { + if (str_copy[i] == '\n') { + current_line++; + if (current_line == target_line) { + s_offset = i + 1; // Start of next line + break; + } + } + } + str_time = t.elapsed_ms(); + + print_result("Line -> Byte Offset", rope_time, str_time); + + // ========================================== + // TEST 7: BYTE TO LINE + // ========================================== + uint32_t target_offset = total_len / 2; + + // Rope Byte Lookup + t.reset(); + volatile uint32_t r_line = byte_to_line(root, target_offset); + rope_time = t.elapsed_ms(); + + // String Byte Lookup (Simulated scan backwards or from start) + t.reset(); + size_t s_line = 0; + for (size_t i = 0; i < target_offset && i < str_copy.size(); ++i) { + if (str_copy[i] == '\n') + s_line++; + } + str_time = t.elapsed_ms(); + + print_result("Byte Offset -> Line", rope_time, str_time); + + // ========================================== + // TEST 8: LINE ITERATION (Next 1000 lines) + // ========================================== + int lines_to_read = 1000; + uint32_t start_iter_line = 50000; + + // Rope Iteration + t.reset(); + LineIterator *lit = begin_l_iter(root, start_iter_line); + for (int i = 0; i < lines_to_read; ++i) { + char *line = next_line(lit); + if (line) + free(line); // Must free per header + else + break; + } + // Note: Assuming `free(lit)` or similar is needed, + // though header says "returned iterator must be freed". + // I will assume standard `delete` or `free` works on the struct pointer. + free(lit); + rope_time = t.elapsed_ms(); + + // String Iteration + // To be fair, we find the starting offset, then read lines + t.reset(); + size_t iter_offset = 0; + size_t cur_ln = 0; + // Fast forward (cost of finding start) + while (cur_ln < start_iter_line && iter_offset < str_copy.size()) { + if (str_copy[iter_offset++] == '\n') + cur_ln++; + } + // Read loop + for (int i = 0; i < lines_to_read && iter_offset < str_copy.size(); ++i) { + size_t next_nl = str_copy.find('\n', iter_offset); + if (next_nl == std::string::npos) + break; + // Simulate extracting the string + volatile std::string temp = + str_copy.substr(iter_offset, next_nl - iter_offset); + iter_offset = next_nl + 1; + } + str_time = t.elapsed_ms(); + + print_result("Iterate 1000 Lines", rope_time, str_time); + + // ========================================== + // TEST 9: SEARCH (Regex) + // ========================================== + // Search for a specific pattern that occurs + const char *search_pattern = "brown fox"; + + // Rope Search (DFA/PCRE as per header) + t.reset(); + auto rope_matches = search_rope(root, search_pattern); + rope_time = t.elapsed_ms(); + + t.reset(); + try { + std::regex re(search_pattern); + auto words_begin = + std::sregex_iterator(str_copy.begin(), str_copy.end(), re); + auto words_end = std::sregex_iterator(); + size_t count = 0; + for (std::sregex_iterator i = words_begin; i != words_end; ++i) { + count++; + // Don't iterate millions of times for the benchmark if it takes forever + if (count > 1000) + break; + } + } catch (...) { + } + str_time = t.elapsed_ms(); + + print_result("Search (Regex)", rope_time, str_time); + + // ========================================== + // TEST 10: SPLIT + // ========================================== + uint32_t split_point = total_len / 2; + Knot *left_side = nullptr; + Knot *right_side = nullptr; + + // Rope Split + t.reset(); + // split consumes 'root', so root is invalid after this + split(root, split_point, &left_side, &right_side); + rope_time = t.elapsed_ms(); + + // String Split (Simulated via substr copies) + t.reset(); + std::string s_left = str_copy.substr(0, split_point); + std::string s_right = str_copy.substr(split_point); + str_time = t.elapsed_ms(); + + print_result("Split (Half)", rope_time, str_time); + + // ========================================== + // CLEANUP + // ========================================== + t.reset(); + free_rope(left_side); + free_rope(right_side); + rope_time = t.elapsed_ms(); + + // std::string cleans up automatically, but let's time the destruction + t.reset(); + { + std::string temp1 = std::move(s_left); + std::string temp2 = std::move(s_right); + } // destructors run here + str_time = t.elapsed_ms(); + + print_result("Free / Destruct", rope_time, str_time); + + return 0; +}