97 lines
2.8 KiB
C
97 lines
2.8 KiB
C
/* Generated by unicode-width generator.
|
|
*
|
|
* Unicode 16.0.0 data.
|
|
* For terminal width calculation.
|
|
*
|
|
* Copyright 2025 Dair Aidarkhanov
|
|
* SPDX-License-Identifier: 0BSD
|
|
*/
|
|
|
|
#ifndef UNICODE_WIDTH_H
|
|
#define UNICODE_WIDTH_H
|
|
|
|
#include <stddef.h>
|
|
#include <stdint.h>
|
|
|
|
#ifdef __cplusplus
|
|
extern "C" {
|
|
#endif
|
|
|
|
/* Version of Unicode data.
|
|
*/
|
|
#define UNICODE_WIDTH_VERSION_MAJOR 16
|
|
#define UNICODE_WIDTH_VERSION_MINOR 0
|
|
#define UNICODE_WIDTH_VERSION_PATCH 0
|
|
|
|
/* Width state values.
|
|
*/
|
|
typedef enum {
|
|
WIDTH_STATE_DEFAULT = 0,
|
|
WIDTH_STATE_LINE_FEED = 1,
|
|
WIDTH_STATE_EMOJI_MODIFIER = 2,
|
|
WIDTH_STATE_REGIONAL_INDICATOR = 3,
|
|
WIDTH_STATE_SEVERAL_REGIONAL_INDICATOR = 4,
|
|
WIDTH_STATE_EMOJI_PRESENTATION = 5,
|
|
WIDTH_STATE_ZWJ_EMOJI_PRESENTATION = 6,
|
|
WIDTH_STATE_KEYCAP_ZWJ_EMOJI_PRESENTATION = 7,
|
|
WIDTH_STATE_REGIONAL_INDICATOR_ZWJ_PRESENTATION = 8,
|
|
WIDTH_STATE_TAG_END_ZWJ_EMOJI_PRESENTATION = 9,
|
|
WIDTH_STATE_ZWJ_SEQUENCE_MEMBER = 10,
|
|
} width_state_t;
|
|
|
|
/* State for the width calculation state machine.
|
|
*/
|
|
typedef struct {
|
|
width_state_t state;
|
|
uint_least32_t previous_codepoint;
|
|
} unicode_width_state_t;
|
|
|
|
/* Initialize a unicode width state.
|
|
* Must be called before any other function.
|
|
*
|
|
* @param state Pointer to state to initialize
|
|
*/
|
|
void unicode_width_init(unicode_width_state_t *state);
|
|
|
|
/* Process a Unicode codepoint and return its width.
|
|
* Width is 0, 1, 2, or 3, or -1 for control characters.
|
|
*
|
|
* Control characters (0x00-0x1F except newlines, 0x7F, and 0x80-0x9F) return
|
|
* -1, allowing the caller to decide how to display them. For readline-like
|
|
* applications, control characters are typically displayed using caret notation
|
|
* (^X) with width 2. Use unicode_width_control_char() to get this width.
|
|
*
|
|
* Newlines (LF, CR, CRLF) return width 0 as they don't consume horizontal space
|
|
* in terminal displays.
|
|
*
|
|
* Note that the width of a codepoint may depend on context (preceding/following
|
|
* codepoints), so this function keeps track of context in the provided state.
|
|
*
|
|
* @param state Pointer to state
|
|
* @param codepoint Unicode codepoint to process
|
|
* @return Width of the codepoint in columns, or -1 for control characters
|
|
*/
|
|
int unicode_width_process(unicode_width_state_t *state,
|
|
uint_least32_t codepoint);
|
|
|
|
/* Get the display width of a control character in caret notation (e.g., ^A).
|
|
* This is useful for applications like readline that display control chars.
|
|
*
|
|
* @param codepoint The Unicode codepoint to check
|
|
* @return The display width (usually 2 for ^X notation), or -1 if not a control
|
|
* char
|
|
*/
|
|
int unicode_width_control_char(uint_least32_t codepoint);
|
|
|
|
/* Reset a unicode width state to its initial state.
|
|
*
|
|
* @param state Pointer to state to reset
|
|
*/
|
|
void unicode_width_reset(unicode_width_state_t *state);
|
|
|
|
#ifdef __cplusplus
|
|
} /* extern "C" */
|
|
#endif
|
|
|
|
#endif /* UNICODE_WIDTH_H */
|