From dad7d844ca150e64a725be39a9da38a020898865 Mon Sep 17 00:00:00 2001 From: Syed Daanish Date: Sat, 13 Dec 2025 00:23:59 +0000 Subject: [PATCH] Add unicode input support --- README.md | 1 + include/ui.h | 4 +- src/input.cc | 213 ++++++++++++++++++++++++++++++++++++--------------- src/main.cc | 78 ++++++++++--------- src/ts.cc | 1 + 5 files changed, 198 insertions(+), 99 deletions(-) diff --git a/README.md b/README.md index 6bfe620..8c4ddbd 100644 --- a/README.md +++ b/README.md @@ -26,3 +26,4 @@ A TUI IDE. - [ ] Add `.scm` files for all the supported languages. (2/14) Done. - [ ] Add support for LSP & autocomplete / snippets. - [ ] Add codeium/copilot support. +- [ ] Add git stuff. diff --git a/include/ui.h b/include/ui.h index f2e259d..b8ed3ef 100644 --- a/include/ui.h +++ b/include/ui.h @@ -63,7 +63,8 @@ struct ScreenCell { struct KeyEvent { uint8_t key_type; - char c; + char *c; + uint32_t len; uint8_t special_key; uint8_t special_modifier; @@ -89,7 +90,6 @@ void set_cursor(int row, int col, int show_cursor_param); void render(); Coord get_size(); -int read_input(char *buf, size_t buflen); KeyEvent read_key(); #endif diff --git a/src/input.cc b/src/input.cc index 962b24f..582040a 100644 --- a/src/input.cc +++ b/src/input.cc @@ -1,22 +1,113 @@ +extern "C" { +#include "../libs/libgrapheme/grapheme.h" +} #include "../include/ui.h" +#include +#include +#include +#include +#include -int read_input(char *buf, size_t buflen) { - size_t i = 0; - int n; - n = read(STDIN_FILENO, &buf[i], 1); - if (n <= 0) - return -1; - i++; - if (buf[0] == '\x1b') { - while (i < buflen - 1) { - n = read(STDIN_FILENO, &buf[i], 1); - if (n <= 0) +static Queue input_queue; + +int get_utf8_seq_len(uint8_t byte) { + if ((byte & 0x80) == 0x00) + return 1; + if ((byte & 0xE0) == 0xC0) + return 2; + if ((byte & 0xF0) == 0xE0) + return 3; + if ((byte & 0xF8) == 0xF0) + return 4; + return 1; +} + +int get_next_byte(char *out) { + if (!input_queue.empty()) { + input_queue.pop(*out); + return 1; + } + int n = read(STDIN_FILENO, out, 1); + return (n > 0) ? 1 : 0; +} + +void enqueue_bytes(const char *bytes, int len) { + for (int i = 0; i < len; i++) + input_queue.push(bytes[i]); +} + +int read_input(char *&buf) { + size_t cap = 32; + buf = (char *)malloc(cap); + size_t len = 0; + char header; + if (!get_next_byte(&header)) { + free(buf); + return 0; + } + if (header == '\x1b') { + buf[len++] = header; + while (len < 6) { + char next_c; + if (!get_next_byte(&next_c)) break; - i++; + buf[len++] = next_c; + } + return len; + } + int seq_len = get_utf8_seq_len((uint8_t)header); + buf[len++] = header; + if (seq_len == 1) + return len; + for (int i = 1; i < seq_len; i++) { + char next_c; + if (!get_next_byte(&next_c)) { + enqueue_bytes(buf, len); + free(buf); + return 0; + } + buf[len++] = next_c; + } + uint_least32_t current_cp, prev_cp; + grapheme_decode_utf8(buf, len, &prev_cp); + uint_least16_t state = 0; + while (true) { + char next_header; + if (!get_next_byte(&next_header)) + break; + int next_seq_len = get_utf8_seq_len((uint8_t)next_header); + char temp_seq[5]; + temp_seq[0] = next_header; + int temp_len = 1; + bool complete_seq = true; + for (int i = 1; i < next_seq_len; i++) { + char c; + if (!get_next_byte(&c)) { + complete_seq = false; + break; + } + temp_seq[temp_len++] = c; + } + if (!complete_seq) { + enqueue_bytes(temp_seq, temp_len); + break; + } + grapheme_decode_utf8(temp_seq, temp_len, ¤t_cp); + if (grapheme_is_character_break(prev_cp, current_cp, &state)) { + enqueue_bytes(temp_seq, temp_len); + break; + } else { + if (len + temp_len + 1 >= cap) { + cap *= 2; + buf = (char *)realloc(buf, cap); + } + memcpy(buf + len, temp_seq, temp_len); + len += temp_len; + prev_cp = current_cp; } } - buf[i] = '\0'; - return i; + buf[len] = '\0'; + return len; } void capture_mouse(char *buf, KeyEvent *ret) { @@ -48,68 +139,66 @@ void capture_mouse(char *buf, KeyEvent *ret) { KeyEvent read_key() { KeyEvent ret; - char buf[7]; - int n = read_input(buf, sizeof(buf)); + char *buf; + int n = read_input(buf); if (n <= 0) { ret.key_type = KEY_NONE; - ret.c = '\0'; return ret; } - if (n == 1) { - ret.key_type = KEY_CHAR; - ret.c = buf[0]; - } else if (buf[0] == '\x1b' && buf[1] == '[' && buf[2] == 'M') { + if (buf[0] == '\x1b' && buf[1] == '[' && buf[2] == 'M') { ret.key_type = KEY_MOUSE; capture_mouse(buf, &ret); - } else { + } else if (buf[0] == '\x1b' && buf[1] == '[') { ret.key_type = KEY_SPECIAL; - if (buf[0] == '\x1b' && buf[1] == '[') { - int using_modifiers = buf[3] == ';'; - int pos; - if (!using_modifiers) { - pos = 2; - ret.special_modifier = 0; - } else { - pos = 4; - switch (buf[3]) { - case '2': - ret.special_modifier = SHIFT; - break; - case '3': - ret.special_modifier = ALT; - break; - case '5': - ret.special_modifier = CNTRL; - break; - case '7': - ret.special_modifier = CNTRL_ALT; - break; - default: - ret.special_modifier = 0; - break; - } - } - switch (buf[pos]) { - case 'A': - ret.special_key = KEY_UP; - break; - case 'B': - ret.special_key = KEY_DOWN; - break; - case 'C': - ret.special_key = KEY_RIGHT; - break; - case 'D': - ret.special_key = KEY_LEFT; + int using_modifiers = buf[3] == ';'; + int pos; + if (!using_modifiers) { + pos = 2; + ret.special_modifier = 0; + } else { + pos = 4; + switch (buf[3]) { + case '2': + ret.special_modifier = SHIFT; break; case '3': - ret.special_key = KEY_DELETE; + ret.special_modifier = ALT; + break; + case '5': + ret.special_modifier = CNTRL; + break; + case '7': + ret.special_modifier = CNTRL_ALT; break; default: - ret.special_key = 99; + ret.special_modifier = 0; break; } } + switch (buf[pos]) { + case 'A': + ret.special_key = KEY_UP; + break; + case 'B': + ret.special_key = KEY_DOWN; + break; + case 'C': + ret.special_key = KEY_RIGHT; + break; + case 'D': + ret.special_key = KEY_LEFT; + break; + case '3': + ret.special_key = KEY_DELETE; + break; + default: + ret.special_key = 99; + break; + } + } else if (n > 0) { + ret.key_type = KEY_CHAR; + ret.c = buf; + ret.len = n; } return ret; } diff --git a/src/main.cc b/src/main.cc index 0b6beaf..3fbc51d 100644 --- a/src/main.cc +++ b/src/main.cc @@ -10,7 +10,7 @@ std::atomic running{true}; Queue event_queue; -char m = NORMAL; +uint8_t mode = INSERT; void background_worker(Editor *editor) { while (running) { @@ -24,7 +24,7 @@ void input_listener() { KeyEvent event = read_key(); if (event.key_type == KEY_NONE) continue; - if (event.key_type == KEY_CHAR && event.c == CTRL('q')) + if (event.key_type == KEY_CHAR && *event.c == CTRL('q')) running = false; event_queue.push(event); std::this_thread::sleep_for(std::chrono::microseconds(100)); @@ -32,41 +32,49 @@ void input_listener() { } void handle_editor_event(Editor *editor, KeyEvent event) { - if (event.key_type == KEY_SPECIAL && event.special_key == KEY_DOWN) - cursor_down(editor, 1); - if (event.key_type == KEY_SPECIAL && event.special_key == KEY_UP) - cursor_up(editor, 1); - if (event.key_type == KEY_SPECIAL && event.special_key == KEY_LEFT) - cursor_left(editor, 1); - if (event.key_type == KEY_SPECIAL && event.special_key == KEY_RIGHT) - cursor_right(editor, 1); - if (event.key_type == KEY_CHAR && - ((event.c >= 'a' && event.c <= 'z') || - (event.c >= 'A' && event.c <= 'Z') || - (event.c >= '0' && event.c <= '9') || event.c == ' ' || event.c == '!' || - event.c == '@' || event.c == '#' || event.c == '$' || event.c == '%' || - event.c == '^' || event.c == '&' || event.c == '*' || event.c == '(' || - event.c == ')' || event.c == '-' || event.c == '_' || event.c == '=' || - event.c == '+' || event.c == '[' || event.c == ']' || event.c == '{' || - event.c == '}' || event.c == '\\' || event.c == '|' || event.c == ';' || - event.c == ':' || event.c == '\'' || event.c == '"' || event.c == ',' || - event.c == '.' || event.c == '<' || event.c == '>' || event.c == '/' || - event.c == '?' || event.c == '`' || event.c == '~')) { - edit_insert(editor, editor->cursor, &event.c, 1); - cursor_right(editor, 1); + if (event.key_type == KEY_SPECIAL) { + switch (event.special_key) { + case KEY_DOWN: + cursor_down(editor, 1); + break; + case KEY_UP: + cursor_up(editor, 1); + break; + case KEY_LEFT: + cursor_left(editor, 1); + break; + case KEY_RIGHT: + cursor_right(editor, 1); + break; + } } - if (event.key_type == KEY_CHAR && event.c == '\t') { - edit_insert(editor, editor->cursor, (char *)"\t", 1); - cursor_right(editor, 2); + switch (mode) { + case NORMAL: + break; + case INSERT: + if (event.key_type == KEY_CHAR) { + if (event.len == 1) { + if (event.c[0] == '\t') { + edit_insert(editor, editor->cursor, (char *)" ", 1); + cursor_right(editor, 2); + } else if (event.c[0] == '\n' || event.c[0] == '\r') { + edit_insert(editor, editor->cursor, (char *)"\n", 1); + cursor_right(editor, 1); + } else if (event.c[0] == 0x7F) { + edit_erase(editor, editor->cursor, -1); + } else if (isprint((unsigned char)(event.c[0]))) { + edit_insert(editor, editor->cursor, event.c, 1); + cursor_right(editor, 1); + } + } else if (event.len > 1) { + edit_insert(editor, editor->cursor, event.c, event.len); + cursor_right(editor, 1); + } + } + if (event.key_type == KEY_SPECIAL && event.special_key == KEY_DELETE) + edit_erase(editor, editor->cursor, 1); + break; } - if (event.key_type == KEY_CHAR && (event.c == '\n' || event.c == '\r')) { - edit_insert(editor, editor->cursor, (char *)"\n", 1); - cursor_right(editor, 1); - } - if (event.key_type == KEY_CHAR && event.c == 0x7F) - edit_erase(editor, editor->cursor, -1); - if (event.key_type == KEY_SPECIAL && event.special_key == KEY_DELETE) - edit_erase(editor, editor->cursor, 1); ensure_scroll(editor); } diff --git a/src/ts.cc b/src/ts.cc index 24c390d..a9dae70 100644 --- a/src/ts.cc +++ b/src/ts.cc @@ -1,6 +1,7 @@ #include "../include/ts.h" #include "../include/editor.h" #include "../include/knot.h" +#include "../include/main.h" #include #include #include