343 lines
9.8 KiB
C++
343 lines
9.8 KiB
C++
#include "../api/rope.hpp"
|
|
#include <chrono>
|
|
#include <cmath>
|
|
#include <cstring>
|
|
#include <iomanip>
|
|
#include <iostream>
|
|
#include <regex>
|
|
#include <string>
|
|
|
|
// Include the user's header
|
|
|
|
// --- Timer Helper ---
|
|
class Timer {
|
|
using Clock = std::chrono::high_resolution_clock;
|
|
std::chrono::time_point<Clock> start_time;
|
|
|
|
public:
|
|
Timer() { reset(); }
|
|
void reset() { start_time = Clock::now(); }
|
|
double elapsed_ms() {
|
|
auto end_time = Clock::now();
|
|
return std::chrono::duration<double, std::milli>(end_time - start_time)
|
|
.count();
|
|
}
|
|
};
|
|
|
|
// --- Formatting Helper ---
|
|
void print_result(const std::string &test_name, double rope_ms, double str_ms) {
|
|
std::cout << std::left << std::setw(25) << test_name
|
|
<< " | Rope: " << std::setw(10) << std::fixed
|
|
<< std::setprecision(3) << rope_ms << " ms"
|
|
<< " | String: " << std::setw(10) << str_ms << " ms"
|
|
<< " | Ratio (Str/Rope): " << std::setprecision(2) << std::setw(10)
|
|
<< (str_ms / rope_ms) << "x" << " | " << std::fixed << " So "
|
|
<< ((str_ms - rope_ms) <= 0 ? "string" : "rope ")
|
|
<< " is faster by " << std::fabs(str_ms - rope_ms) << " ms"
|
|
<< std::endl;
|
|
}
|
|
|
|
int main() {
|
|
// 1. DATA GENERATION
|
|
std::cout << "Generating ~1GiB dataset..." << std::endl;
|
|
const std::string pattern = "The quick brown fox jumps over the lzy dog.\n";
|
|
// Target ~100 MiB (100 * 1024 * 1024 bytes)
|
|
const size_t target_size = 1024 * 1024 * 1024;
|
|
std::string source_data;
|
|
source_data.reserve(target_size + pattern.size());
|
|
|
|
while (source_data.size() < target_size)
|
|
source_data.append(pattern);
|
|
|
|
uint32_t total_len = static_cast<uint32_t>(source_data.size());
|
|
std::cout << "Dataset generated. Size: " << total_len << " bytes.\n"
|
|
<< std::endl;
|
|
|
|
Timer t;
|
|
double rope_time, str_time;
|
|
|
|
// ==========================================
|
|
// TEST 1: LOAD / CREATION
|
|
// ==========================================
|
|
|
|
// Rope Load
|
|
t.reset();
|
|
uint32_t chunk_size = optimal_chunk_size(total_len);
|
|
// Note: Cast to char* because header asks for char*, usually strings are
|
|
// const char*
|
|
Knot *root =
|
|
load(const_cast<char *>(source_data.c_str()), total_len, chunk_size);
|
|
rope_time = t.elapsed_ms();
|
|
|
|
// String Load (Copy)
|
|
t.reset();
|
|
std::string str_copy = source_data;
|
|
str_time = t.elapsed_ms();
|
|
|
|
print_result("Load / Create", rope_time, str_time);
|
|
|
|
// ==========================================
|
|
// TEST 2: INSERT (Middle)
|
|
// ==========================================
|
|
std::string insert_pattern = " [INSERTED TEXT] ";
|
|
uint32_t insert_pos = total_len / 2;
|
|
|
|
// Rope Insert
|
|
t.reset();
|
|
root = insert(root, insert_pos, const_cast<char *>(insert_pattern.c_str()),
|
|
(uint32_t)insert_pattern.size());
|
|
rope_time = t.elapsed_ms();
|
|
|
|
// String Insert
|
|
t.reset();
|
|
str_copy.insert(insert_pos, insert_pattern);
|
|
str_time = t.elapsed_ms();
|
|
|
|
print_result("Insert (Middle)", rope_time, str_time);
|
|
|
|
// ==========================================
|
|
// TEST 3: READ / SUBSTR
|
|
// ==========================================
|
|
uint32_t read_len = 1024;
|
|
uint32_t read_pos = total_len / 2; // Read from where we just inserted
|
|
|
|
// Rope Read
|
|
t.reset();
|
|
char *rope_read_res = read(root, read_pos, read_len);
|
|
rope_time = t.elapsed_ms();
|
|
free(rope_read_res); // Free result as per header
|
|
|
|
// String Substr
|
|
t.reset();
|
|
std::string str_read_res = str_copy.substr(read_pos, read_len);
|
|
str_time = t.elapsed_ms();
|
|
|
|
print_result("Read / Substr (1KiB)", rope_time, str_time);
|
|
|
|
// ==========================================
|
|
// TEST 4: CONCATENATION
|
|
// ==========================================
|
|
// Create a temporary rope to append
|
|
Knot *suffix_rope = load(const_cast<char *>(pattern.c_str()),
|
|
(uint32_t)pattern.size(), chunk_size);
|
|
|
|
// Rope Concat
|
|
t.reset();
|
|
root = concat(root, suffix_rope);
|
|
rope_time = t.elapsed_ms();
|
|
|
|
// String Append
|
|
t.reset();
|
|
str_copy += pattern;
|
|
str_time = t.elapsed_ms();
|
|
|
|
print_result("Concat (Append small)", rope_time, str_time);
|
|
|
|
Knot *large_rope =
|
|
load(const_cast<char *>(source_data.c_str()), total_len, chunk_size);
|
|
|
|
// Rope Concat
|
|
t.reset();
|
|
root = concat(root, large_rope);
|
|
rope_time = t.elapsed_ms();
|
|
|
|
Knot *L = nullptr;
|
|
Knot *R = nullptr;
|
|
split(root, total_len, &L, &R);
|
|
root = L;
|
|
free_rope(R);
|
|
|
|
// String Append
|
|
t.reset();
|
|
str_copy += source_data;
|
|
str_time = t.elapsed_ms();
|
|
|
|
print_result("Concat (Append large)", rope_time, str_time);
|
|
|
|
// ==========================================
|
|
// TEST 5: ERASE
|
|
// ==========================================
|
|
uint32_t erase_len = 5000; // Erase 5KB
|
|
uint32_t erase_pos = total_len / 4;
|
|
|
|
// Rope Erase
|
|
t.reset();
|
|
root = erase(root, erase_pos, erase_len);
|
|
rope_time = t.elapsed_ms();
|
|
|
|
// String Erase
|
|
t.reset();
|
|
str_copy.erase(erase_pos, erase_len);
|
|
str_time = t.elapsed_ms();
|
|
|
|
print_result("Erase (5KB)", rope_time, str_time);
|
|
|
|
// ==========================================
|
|
// TEST 6: LINE TO BYTE (Indexing)
|
|
// ==========================================
|
|
// Pick a line number deep in the file
|
|
uint32_t target_line = 100000;
|
|
uint32_t out_len = 0;
|
|
|
|
// Rope Line Lookup
|
|
t.reset();
|
|
volatile uint32_t r_offset = line_to_byte(root, target_line, &out_len);
|
|
rope_time = t.elapsed_ms();
|
|
|
|
// String Line Lookup (Simulated: Must scan for newlines)
|
|
t.reset();
|
|
size_t current_line = 0;
|
|
size_t s_offset = 0;
|
|
// Manual scan is the standard way for std::string
|
|
for (size_t i = 0; i < str_copy.size(); ++i) {
|
|
if (str_copy[i] == '\n') {
|
|
current_line++;
|
|
if (current_line == target_line) {
|
|
s_offset = i + 1; // Start of next line
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
str_time = t.elapsed_ms();
|
|
|
|
print_result("Line -> Byte Offset", rope_time, str_time);
|
|
|
|
// ==========================================
|
|
// TEST 7: BYTE TO LINE
|
|
// ==========================================
|
|
uint32_t target_offset = total_len / 2;
|
|
|
|
// Rope Byte Lookup
|
|
t.reset();
|
|
volatile uint32_t r_line = byte_to_line(root, target_offset);
|
|
rope_time = t.elapsed_ms();
|
|
|
|
// String Byte Lookup (Simulated scan backwards or from start)
|
|
t.reset();
|
|
size_t s_line = 0;
|
|
for (size_t i = 0; i < target_offset && i < str_copy.size(); ++i) {
|
|
if (str_copy[i] == '\n')
|
|
s_line++;
|
|
}
|
|
str_time = t.elapsed_ms();
|
|
|
|
print_result("Byte Offset -> Line", rope_time, str_time);
|
|
|
|
// ==========================================
|
|
// TEST 8: LINE ITERATION (Next 1000 lines)
|
|
// ==========================================
|
|
int lines_to_read = 1000;
|
|
uint32_t start_iter_line = 50000;
|
|
|
|
// Rope Iteration
|
|
t.reset();
|
|
LineIterator *lit = begin_l_iter(root, start_iter_line);
|
|
for (int i = 0; i < lines_to_read; ++i) {
|
|
char *line = next_line(lit);
|
|
if (line)
|
|
free(line); // Must free per header
|
|
else
|
|
break;
|
|
}
|
|
// Note: Assuming `free(lit)` or similar is needed,
|
|
// though header says "returned iterator must be freed".
|
|
// I will assume standard `delete` or `free` works on the struct pointer.
|
|
free(lit);
|
|
rope_time = t.elapsed_ms();
|
|
|
|
// String Iteration
|
|
// To be fair, we find the starting offset, then read lines
|
|
t.reset();
|
|
size_t iter_offset = 0;
|
|
size_t cur_ln = 0;
|
|
// Fast forward (cost of finding start)
|
|
while (cur_ln < start_iter_line && iter_offset < str_copy.size()) {
|
|
if (str_copy[iter_offset++] == '\n')
|
|
cur_ln++;
|
|
}
|
|
// Read loop
|
|
for (int i = 0; i < lines_to_read && iter_offset < str_copy.size(); ++i) {
|
|
size_t next_nl = str_copy.find('\n', iter_offset);
|
|
if (next_nl == std::string::npos)
|
|
break;
|
|
// Simulate extracting the string
|
|
volatile std::string temp =
|
|
str_copy.substr(iter_offset, next_nl - iter_offset);
|
|
iter_offset = next_nl + 1;
|
|
}
|
|
str_time = t.elapsed_ms();
|
|
|
|
print_result("Iterate 1000 Lines", rope_time, str_time);
|
|
|
|
// ==========================================
|
|
// TEST 9: SEARCH (Regex)
|
|
// ==========================================
|
|
// Search for a specific pattern that occurs
|
|
const char *search_pattern = "brown fox";
|
|
|
|
// Rope Search (DFA/PCRE as per header)
|
|
t.reset();
|
|
auto rope_matches = search_rope(root, search_pattern);
|
|
rope_time = t.elapsed_ms();
|
|
|
|
t.reset();
|
|
try {
|
|
std::regex re(search_pattern);
|
|
auto words_begin =
|
|
std::sregex_iterator(str_copy.begin(), str_copy.end(), re);
|
|
auto words_end = std::sregex_iterator();
|
|
size_t count = 0;
|
|
for (std::sregex_iterator i = words_begin; i != words_end; ++i) {
|
|
count++;
|
|
// Don't iterate millions of times for the benchmark if it takes forever
|
|
if (count > 1000)
|
|
break;
|
|
}
|
|
} catch (...) {
|
|
}
|
|
str_time = t.elapsed_ms();
|
|
|
|
print_result("Search (Regex)", rope_time, str_time);
|
|
|
|
// ==========================================
|
|
// TEST 10: SPLIT
|
|
// ==========================================
|
|
uint32_t split_point = total_len / 2;
|
|
Knot *left_side = nullptr;
|
|
Knot *right_side = nullptr;
|
|
|
|
// Rope Split
|
|
t.reset();
|
|
// split consumes 'root', so root is invalid after this
|
|
split(root, split_point, &left_side, &right_side);
|
|
rope_time = t.elapsed_ms();
|
|
|
|
// String Split (Simulated via substr copies)
|
|
t.reset();
|
|
std::string s_left = str_copy.substr(0, split_point);
|
|
std::string s_right = str_copy.substr(split_point);
|
|
str_time = t.elapsed_ms();
|
|
|
|
print_result("Split (Half)", rope_time, str_time);
|
|
|
|
// ==========================================
|
|
// CLEANUP
|
|
// ==========================================
|
|
t.reset();
|
|
free_rope(left_side);
|
|
free_rope(right_side);
|
|
rope_time = t.elapsed_ms();
|
|
|
|
// std::string cleans up automatically, but let's time the destruction
|
|
t.reset();
|
|
{
|
|
std::string temp1 = std::move(s_left);
|
|
std::string temp2 = std::move(s_right);
|
|
} // destructors run here
|
|
str_time = t.elapsed_ms();
|
|
|
|
print_result("Free / Destruct", rope_time, str_time);
|
|
|
|
return 0;
|
|
}
|