Initial Commit

2025-08-30 16:07:19 +01:00
commit d86c15e30c
169 changed files with 121377 additions and 0 deletions
--- a/libs/libgrapheme-2.0.2/src/case.c
+++ b/libs/libgrapheme-2.0.2/src/case.c
@@ -0,0 +1,470 @@
+/* See LICENSE file for copyright and license details. */
+#include <stddef.h>
+#include <stdint.h>
+
+#include "../grapheme.h"
+#include "../gen/case.h"
+#include "util.h"
+
+static inline enum case_property
+get_case_property(uint_least32_t cp)
+{
+	if (likely(cp <= UINT32_C(0x10FFFF))) {
+		return (enum case_property)
+		       case_minor[case_major[cp >> 8] + (cp & 0xFF)];
+	} else {
+		return CASE_PROP_OTHER;
+	}
+}
+
+static inline int_least32_t
+get_case_offset(uint_least32_t cp, const uint_least16_t *major,
+                const int_least32_t *minor)
+{
+	if (likely(cp <= UINT32_C(0x10FFFF))) {
+		/*
+		 * this value might be larger than or equal to 0x110000
+		 * for the special-case-mapping. This needs to be handled
+		 * separately
+		 */
+		return minor[major[cp >> 8] + (cp & 0xFF)];
+	} else {
+		return 0;
+	}
+}
+
+static inline size_t
+to_case(HERODOTUS_READER *r, HERODOTUS_WRITER *w,
+        uint_least8_t final_sigma_level, const uint_least16_t *major,
+        const int_least32_t *minor, const struct special_case *sc)
+{
+	HERODOTUS_READER tmp;
+	enum case_property prop;
+	enum herodotus_status s;
+	size_t off, i;
+	uint_least32_t cp, tmp_cp;
+	int_least32_t map;
+
+	for (; herodotus_read_codepoint(r, true, &cp) == HERODOTUS_STATUS_SUCCESS;) {
+		if (sc == lower_special) {
+			/*
+			 * For the special Final_Sigma-rule (see SpecialCasing.txt),
+			 * which is the only non-localized case-dependent rule,
+			 * we apply a different mapping when a sigma is at the
+			 * end of a word.
+			 *
+			 * Before: cased case-ignorable*
+			 * After: not(case-ignorable* cased)
+			 *
+			 * We check the after-condition on demand, but the before-
+			 * condition is best checked using the "level"-heuristic
+			 * also used in the sentence and line breaking-implementations.
+			 */
+			if (cp == UINT32_C(0x03A3) && /* GREEK CAPITAL LETTER SIGMA */
+			    (final_sigma_level == 1 ||
+			     final_sigma_level == 2)) {
+				/*
+				 * check succeeding characters by first skipping
+				 * all case-ignorable characters and then checking
+				 * if the succeeding character is cased, invalidating
+				 * the after-condition
+				 */
+				herodotus_reader_copy(r, &tmp);
+				for (prop = NUM_CASE_PROPS;
+				     (s = herodotus_read_codepoint(&tmp, true, &tmp_cp)) ==
+				     HERODOTUS_STATUS_SUCCESS; ) {
+					prop = get_case_property(tmp_cp);
+
+					if (prop != CASE_PROP_CASE_IGNORABLE &&
+					    prop != CASE_PROP_BOTH_CASED_CASE_IGNORABLE) {
+					    	break;
+					}
+				}
+
+				/*
+				 * Now prop is something other than case-ignorable or
+				 * the source-string ended.
+				 * If it is something other than cased, we know
+				 * that the after-condition holds
+				 */
+				if (s != HERODOTUS_STATUS_SUCCESS ||
+				    (prop != CASE_PROP_CASED &&
+				     prop != CASE_PROP_BOTH_CASED_CASE_IGNORABLE)) {
+					/*
+					 * write GREEK SMALL LETTER FINAL SIGMA to
+					 * destination
+					 */
+					herodotus_write_codepoint(w, UINT32_C(0x03C2));
+					
+					/* reset Final_Sigma-state and continue */
+					final_sigma_level = 0;
+					continue;
+				}
+			}
+
+			/* update state */
+			prop = get_case_property(cp);
+			if ((final_sigma_level == 0 ||
+			     final_sigma_level == 1) &&
+			    (prop == CASE_PROP_CASED ||
+			     prop == CASE_PROP_BOTH_CASED_CASE_IGNORABLE)) {
+				/* sequence has begun */
+				final_sigma_level = 1;
+			} else if ((final_sigma_level == 1 ||
+			            final_sigma_level == 2) &&
+			           (prop == CASE_PROP_CASE_IGNORABLE ||
+			            prop == CASE_PROP_BOTH_CASED_CASE_IGNORABLE)) {
+				/* case-ignorable sequence begins or continued */
+				final_sigma_level = 2;
+			} else {
+				/* sequence broke */
+				final_sigma_level = 0;
+			}
+		}
+
+		/* get and handle case mapping */
+		if (unlikely((map = get_case_offset(cp, major, minor)) >=
+		             INT32_C(0x110000))) {
+			/* we have a special case and the offset in the sc-array
+			 * is the difference to 0x110000*/
+			off = (uint_least32_t)map - UINT32_C(0x110000);
+
+			for (i = 0; i < sc[off].cplen; i++) {
+				herodotus_write_codepoint(w, sc[off].cp[i]);
+			}
+		} else {
+			/* we have a simple mapping */
+			herodotus_write_codepoint(w, (uint_least32_t)
+			                          ((int_least32_t)cp + map));
+		}
+	}
+
+	herodotus_writer_nul_terminate(w);
+
+	return herodotus_writer_number_written(w);
+}
+
+static size_t
+herodotus_next_word_break(const HERODOTUS_READER *r)
+{
+	HERODOTUS_READER tmp;
+
+	herodotus_reader_copy(r, &tmp);
+
+	if (r->type == HERODOTUS_TYPE_CODEPOINT) {
+		return grapheme_next_word_break(tmp.src, tmp.srclen);
+	} else { /* r->type == HERODOTUS_TYPE_UTF8 */
+		return grapheme_next_word_break_utf8(tmp.src, tmp.srclen);
+	}
+}
+
+static inline size_t
+to_titlecase(HERODOTUS_READER *r, HERODOTUS_WRITER *w)
+{
+	enum case_property prop;
+	enum herodotus_status s;
+	uint_least32_t cp;
+	size_t nwb;
+
+	for (; (nwb = herodotus_next_word_break(r)) > 0;) {
+		herodotus_reader_push_advance_limit(r, nwb);
+		for (; (s = herodotus_read_codepoint(r, false, &cp)) == HERODOTUS_STATUS_SUCCESS;) {
+			/* check if we have a cased character */
+			prop = get_case_property(cp);
+			if (prop == CASE_PROP_CASED ||
+			    prop == CASE_PROP_BOTH_CASED_CASE_IGNORABLE) {
+				break;
+			} else {
+				/* write the data to the output verbatim, it if permits */
+				herodotus_write_codepoint(w, cp);
+
+				/* increment reader */
+				herodotus_read_codepoint(r, true, &cp);
+			}
+		}
+
+		if (s == HERODOTUS_STATUS_END_OF_BUFFER) {
+			/* we are done */
+			herodotus_reader_pop_limit(r);
+			break;
+		} else if (s == HERODOTUS_STATUS_SOFT_LIMIT_REACHED) {
+			/*
+			 * we did not encounter any cased character
+			 * up to the word break
+			 */
+			herodotus_reader_pop_limit(r);
+			continue;
+		} else {
+			/*
+			 * we encountered a cased character before the word
+			 * break, convert it to titlecase
+			 */
+			herodotus_reader_push_advance_limit(r,
+				herodotus_reader_next_codepoint_break(r));
+			to_case(r, w, 0, title_major, title_minor, title_special);
+			herodotus_reader_pop_limit(r);
+		}
+
+		/* cast the rest of the codepoints in the word to lowercase */
+		to_case(r, w, 1, lower_major, lower_minor, lower_special);
+
+		/* remove the limit on the word before the next iteration */
+		herodotus_reader_pop_limit(r);
+	}
+
+	herodotus_writer_nul_terminate(w);
+
+	return herodotus_writer_number_written(w);
+}
+
+size_t
+grapheme_to_uppercase(const uint_least32_t *src, size_t srclen, uint_least32_t *dest, size_t destlen)
+{
+	HERODOTUS_READER r;
+	HERODOTUS_WRITER w;
+
+	herodotus_reader_init(&r, HERODOTUS_TYPE_CODEPOINT, src, srclen);
+	herodotus_writer_init(&w, HERODOTUS_TYPE_CODEPOINT, dest, destlen);
+
+	return to_case(&r, &w, 0, upper_major, upper_minor, upper_special);
+}
+
+size_t
+grapheme_to_lowercase(const uint_least32_t *src, size_t srclen, uint_least32_t *dest, size_t destlen)
+{
+	HERODOTUS_READER r;
+	HERODOTUS_WRITER w;
+
+	herodotus_reader_init(&r, HERODOTUS_TYPE_CODEPOINT, src, srclen);
+	herodotus_writer_init(&w, HERODOTUS_TYPE_CODEPOINT, dest, destlen);
+
+	return to_case(&r, &w, 0, lower_major, lower_minor, lower_special);
+}
+
+size_t
+grapheme_to_titlecase(const uint_least32_t *src, size_t srclen, uint_least32_t *dest, size_t destlen)
+{
+	HERODOTUS_READER r;
+	HERODOTUS_WRITER w;
+
+	herodotus_reader_init(&r, HERODOTUS_TYPE_CODEPOINT, src, srclen);
+	herodotus_writer_init(&w, HERODOTUS_TYPE_CODEPOINT, dest, destlen);
+
+	return to_titlecase(&r, &w);
+}
+
+size_t
+grapheme_to_uppercase_utf8(const char *src, size_t srclen, char *dest, size_t destlen)
+{
+	HERODOTUS_READER r;
+	HERODOTUS_WRITER w;
+
+	herodotus_reader_init(&r, HERODOTUS_TYPE_UTF8, src, srclen);
+	herodotus_writer_init(&w, HERODOTUS_TYPE_UTF8, dest, destlen);
+
+	return to_case(&r, &w, 0, upper_major, upper_minor, upper_special);
+}
+
+size_t
+grapheme_to_lowercase_utf8(const char *src, size_t srclen, char *dest, size_t destlen)
+{
+	HERODOTUS_READER r;
+	HERODOTUS_WRITER w;
+
+	herodotus_reader_init(&r, HERODOTUS_TYPE_UTF8, src, srclen);
+	herodotus_writer_init(&w, HERODOTUS_TYPE_UTF8, dest, destlen);
+
+	return to_case(&r, &w, 0, lower_major, lower_minor, lower_special);
+}
+
+size_t
+grapheme_to_titlecase_utf8(const char *src, size_t srclen, char *dest, size_t destlen)
+{
+	HERODOTUS_READER r;
+	HERODOTUS_WRITER w;
+
+	herodotus_reader_init(&r, HERODOTUS_TYPE_UTF8, src, srclen);
+	herodotus_writer_init(&w, HERODOTUS_TYPE_UTF8, dest, destlen);
+
+	return to_titlecase(&r, &w);
+}
+
+static inline bool
+is_case(HERODOTUS_READER *r, const uint_least16_t *major,
+        const int_least32_t *minor, const struct special_case *sc,
+        size_t *output)
+{
+	size_t off, i;
+	bool ret = true;
+	uint_least32_t cp;
+	int_least32_t map;
+
+	for (; herodotus_read_codepoint(r, false, &cp) == HERODOTUS_STATUS_SUCCESS;) {
+		/* get and handle case mapping */
+		if (unlikely((map = get_case_offset(cp, major, minor)) >=
+		             INT32_C(0x110000))) {
+			/* we have a special case and the offset in the sc-array
+			 * is the difference to 0x110000*/
+			off = (uint_least32_t)map - UINT32_C(0x110000);
+
+			for (i = 0; i < sc[off].cplen; i++) {
+				if (herodotus_read_codepoint(r, false, &cp) ==
+				    HERODOTUS_STATUS_SUCCESS) {
+					if (cp != sc[off].cp[i]) {
+						ret = false;
+						goto done;
+					} else {
+						/* move forward */
+						herodotus_read_codepoint(r, true, &cp);
+					}
+				} else {
+					/*
+					 * input ended and we didn't see
+					 * any difference so far, so this
+					 * string is in fact okay
+					 */
+					ret = true;
+					goto done;
+				}
+			}
+		} else {
+			/* we have a simple mapping */
+			if (cp != (uint_least32_t)((int_least32_t)cp + map)) {
+				/* we have a difference */
+				ret = false;
+				goto done;
+			} else {
+				/* move forward */
+				herodotus_read_codepoint(r, true, &cp);
+			}
+		}
+	}
+done:
+	if (output) {
+		*output = herodotus_reader_number_read(r);
+	}
+	return ret;
+}
+
+static inline bool
+is_titlecase(HERODOTUS_READER *r, size_t *output)
+{
+	enum case_property prop;
+	enum herodotus_status s;
+	bool ret = true;
+	uint_least32_t cp;
+	size_t nwb;
+
+	for (; (nwb = herodotus_next_word_break(r)) > 0;) {
+		herodotus_reader_push_advance_limit(r, nwb);
+		for (; (s = herodotus_read_codepoint(r, false, &cp)) == HERODOTUS_STATUS_SUCCESS;) {
+			/* check if we have a cased character */
+			prop = get_case_property(cp);
+			if (prop == CASE_PROP_CASED ||
+			    prop == CASE_PROP_BOTH_CASED_CASE_IGNORABLE) {
+				break;
+			} else {
+				/* increment reader */
+				herodotus_read_codepoint(r, true, &cp);
+			}
+		}
+
+		if (s == HERODOTUS_STATUS_END_OF_BUFFER) {
+			/* we are done */
+			break;
+		} else if (s == HERODOTUS_STATUS_SOFT_LIMIT_REACHED) {
+			/*
+			 * we did not encounter any cased character
+			 * up to the word break
+			 */
+			herodotus_reader_pop_limit(r);
+			continue;
+		} else {
+			/*
+			 * we encountered a cased character before the word
+			 * break, check if it's titlecase
+			 */
+			herodotus_reader_push_advance_limit(r,
+				herodotus_reader_next_codepoint_break(r));
+			if (!is_case(r, title_major, title_minor, title_special, NULL)) {
+				ret = false;
+				goto done;
+			}
+			herodotus_reader_pop_limit(r);
+		}
+
+		/* check if the rest of the codepoints in the word are lowercase */
+		if (!is_case(r, lower_major, lower_minor, lower_special, NULL)) {
+			ret = false;
+			goto done;
+		}
+
+		/* remove the limit on the word before the next iteration */
+		herodotus_reader_pop_limit(r);
+	}
+done:
+	if (output) {
+		*output = herodotus_reader_number_read(r);
+	}
+	return ret;
+}
+
+bool
+grapheme_is_uppercase(const uint_least32_t *src, size_t srclen, size_t *caselen)
+{
+	HERODOTUS_READER r;
+
+	herodotus_reader_init(&r, HERODOTUS_TYPE_CODEPOINT, src, srclen);
+
+	return is_case(&r, upper_major, upper_minor, upper_special, caselen);
+}
+
+bool
+grapheme_is_lowercase(const uint_least32_t *src, size_t srclen, size_t *caselen)
+{
+	HERODOTUS_READER r;
+
+	herodotus_reader_init(&r, HERODOTUS_TYPE_CODEPOINT, src, srclen);
+
+	return is_case(&r, lower_major, lower_minor, lower_special, caselen);
+}
+
+bool
+grapheme_is_titlecase(const uint_least32_t *src, size_t srclen, size_t *caselen)
+{
+	HERODOTUS_READER r;
+
+	herodotus_reader_init(&r, HERODOTUS_TYPE_CODEPOINT, src, srclen);
+
+	return is_titlecase(&r, caselen);
+}
+
+bool
+grapheme_is_uppercase_utf8(const char *src, size_t srclen, size_t *caselen)
+{
+	HERODOTUS_READER r;
+
+	herodotus_reader_init(&r, HERODOTUS_TYPE_UTF8, src, srclen);
+
+	return is_case(&r, upper_major, upper_minor, upper_special, caselen);
+}
+
+bool
+grapheme_is_lowercase_utf8(const char *src, size_t srclen, size_t *caselen)
+{
+	HERODOTUS_READER r;
+
+	herodotus_reader_init(&r, HERODOTUS_TYPE_UTF8, src, srclen);
+
+	return is_case(&r, lower_major, lower_minor, lower_special, caselen);
+}
+
+bool
+grapheme_is_titlecase_utf8(const char *src, size_t srclen, size_t *caselen)
+{
+	HERODOTUS_READER r;
+
+	herodotus_reader_init(&r, HERODOTUS_TYPE_UTF8, src, srclen);
+
+	return is_titlecase(&r, caselen);
+}
--- a/libs/libgrapheme-2.0.2/src/case.o
+++ b/libs/libgrapheme-2.0.2/src/case.o
--- a/libs/libgrapheme-2.0.2/src/character.c
+++ b/libs/libgrapheme-2.0.2/src/character.c
@@ -0,0 +1,243 @@
+/* See LICENSE file for copyright and license details. */
+#include <limits.h>
+#include <stdbool.h>
+#include <stddef.h>
+
+#include "../gen/character.h"
+#include "../grapheme.h"
+#include "util.h"
+
+struct character_break_state {
+	uint_least8_t prop;
+	bool prop_set;
+	bool gb11_flag;
+	bool gb12_13_flag;
+};
+
+static const uint_least16_t dont_break[NUM_CHAR_BREAK_PROPS] = {
+	[CHAR_BREAK_PROP_OTHER] =
+		UINT16_C(1) << CHAR_BREAK_PROP_EXTEND       | /* GB9  */
+		UINT16_C(1) << CHAR_BREAK_PROP_ZWJ          | /* GB9  */
+		UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK,   /* GB9a */
+	[CHAR_BREAK_PROP_CR] =
+		UINT16_C(1) << CHAR_BREAK_PROP_LF,            /* GB3  */
+	[CHAR_BREAK_PROP_EXTEND] =
+		UINT16_C(1) << CHAR_BREAK_PROP_EXTEND       | /* GB9  */
+		UINT16_C(1) << CHAR_BREAK_PROP_ZWJ          | /* GB9  */
+		UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK,   /* GB9a */
+	[CHAR_BREAK_PROP_EXTENDED_PICTOGRAPHIC] =
+		UINT16_C(1) << CHAR_BREAK_PROP_EXTEND       | /* GB9  */
+		UINT16_C(1) << CHAR_BREAK_PROP_ZWJ          | /* GB9  */
+		UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK,   /* GB9a */
+	[CHAR_BREAK_PROP_HANGUL_L] =
+		UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_L     | /* GB6  */
+		UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_V     | /* GB6  */
+		UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_LV    | /* GB6  */
+		UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_LVT   | /* GB6  */
+		UINT16_C(1) << CHAR_BREAK_PROP_EXTEND       | /* GB9  */
+		UINT16_C(1) << CHAR_BREAK_PROP_ZWJ          | /* GB9  */
+		UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK,   /* GB9a */
+	[CHAR_BREAK_PROP_HANGUL_V] =
+		UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_V     | /* GB7  */
+		UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_T     | /* GB7  */
+		UINT16_C(1) << CHAR_BREAK_PROP_EXTEND       | /* GB9  */
+		UINT16_C(1) << CHAR_BREAK_PROP_ZWJ          | /* GB9  */
+		UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK,   /* GB9a */
+	[CHAR_BREAK_PROP_HANGUL_T] =
+		UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_T     | /* GB8  */
+		UINT16_C(1) << CHAR_BREAK_PROP_EXTEND       | /* GB9  */
+		UINT16_C(1) << CHAR_BREAK_PROP_ZWJ          | /* GB9  */
+		UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK,   /* GB9a */
+	[CHAR_BREAK_PROP_HANGUL_LV] =
+		UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_V     | /* GB7  */
+		UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_T     | /* GB7  */
+		UINT16_C(1) << CHAR_BREAK_PROP_EXTEND       | /* GB9  */
+		UINT16_C(1) << CHAR_BREAK_PROP_ZWJ          | /* GB9  */
+		UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK,   /* GB9a */
+	[CHAR_BREAK_PROP_HANGUL_LVT] =
+		UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_T     | /* GB8  */
+		UINT16_C(1) << CHAR_BREAK_PROP_EXTEND       | /* GB9  */
+		UINT16_C(1) << CHAR_BREAK_PROP_ZWJ          | /* GB9  */
+		UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK,   /* GB9a */
+	[CHAR_BREAK_PROP_PREPEND] =
+		UINT16_C(1) << CHAR_BREAK_PROP_EXTEND       | /* GB9  */
+		UINT16_C(1) << CHAR_BREAK_PROP_ZWJ          | /* GB9  */
+		UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK  | /* GB9a */
+		(UINT16_C(0xFFFF) &
+		 ~(UINT16_C(1) << CHAR_BREAK_PROP_CR      |
+		   UINT16_C(1) << CHAR_BREAK_PROP_LF      |
+		   UINT16_C(1) << CHAR_BREAK_PROP_CONTROL
+		  )
+		),                                           /* GB9b */
+	[CHAR_BREAK_PROP_REGIONAL_INDICATOR] =
+		UINT16_C(1) << CHAR_BREAK_PROP_EXTEND       | /* GB9  */
+		UINT16_C(1) << CHAR_BREAK_PROP_ZWJ          | /* GB9  */
+		UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK,   /* GB9a */
+	[CHAR_BREAK_PROP_SPACINGMARK] =
+		UINT16_C(1) << CHAR_BREAK_PROP_EXTEND       | /* GB9  */
+		UINT16_C(1) << CHAR_BREAK_PROP_ZWJ          | /* GB9  */
+		UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK,   /* GB9a */
+	[CHAR_BREAK_PROP_ZWJ] =
+		UINT16_C(1) << CHAR_BREAK_PROP_EXTEND       | /* GB9  */
+		UINT16_C(1) << CHAR_BREAK_PROP_ZWJ          | /* GB9  */
+		UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK,   /* GB9a */
+};
+static const uint_least16_t flag_update_gb11[2 * NUM_CHAR_BREAK_PROPS] = {
+	[CHAR_BREAK_PROP_EXTENDED_PICTOGRAPHIC] =
+		UINT16_C(1) << CHAR_BREAK_PROP_ZWJ                   |
+		UINT16_C(1) << CHAR_BREAK_PROP_EXTEND,
+	[CHAR_BREAK_PROP_ZWJ + NUM_CHAR_BREAK_PROPS] =
+		UINT16_C(1) << CHAR_BREAK_PROP_EXTENDED_PICTOGRAPHIC,
+	[CHAR_BREAK_PROP_EXTEND + NUM_CHAR_BREAK_PROPS] =
+		UINT16_C(1) << CHAR_BREAK_PROP_EXTEND                |
+		UINT16_C(1) << CHAR_BREAK_PROP_ZWJ,
+	[CHAR_BREAK_PROP_EXTENDED_PICTOGRAPHIC + NUM_CHAR_BREAK_PROPS] =
+		UINT16_C(1) << CHAR_BREAK_PROP_ZWJ                   |
+		UINT16_C(1) << CHAR_BREAK_PROP_EXTEND,
+};
+static const uint_least16_t dont_break_gb11[2 * NUM_CHAR_BREAK_PROPS] = {
+	[CHAR_BREAK_PROP_ZWJ + NUM_CHAR_BREAK_PROPS] =
+		UINT16_C(1) << CHAR_BREAK_PROP_EXTENDED_PICTOGRAPHIC,
+};
+static const uint_least16_t flag_update_gb12_13[2 * NUM_CHAR_BREAK_PROPS] = {
+	[CHAR_BREAK_PROP_REGIONAL_INDICATOR] =
+		UINT16_C(1) << CHAR_BREAK_PROP_REGIONAL_INDICATOR,
+};
+static const uint_least16_t dont_break_gb12_13[2 * NUM_CHAR_BREAK_PROPS] = {
+	[CHAR_BREAK_PROP_REGIONAL_INDICATOR + NUM_CHAR_BREAK_PROPS] =
+		UINT16_C(1) << CHAR_BREAK_PROP_REGIONAL_INDICATOR,
+};
+
+static inline enum char_break_property
+get_break_prop(uint_least32_t cp)
+{
+	if (likely(cp <= UINT32_C(0x10FFFF))) {
+		return (enum char_break_property)
+		       char_break_minor[char_break_major[cp >> 8] + (cp & 0xFF)];
+	} else {
+		return CHAR_BREAK_PROP_OTHER;
+	}
+}
+
+static inline void
+state_serialize(const struct character_break_state *in, uint_least16_t *out)
+{
+	*out = (uint_least16_t)(in->prop & UINT8_C(0xFF))                   | /* first 8 bits */
+	       (uint_least16_t)(((uint_least16_t)(in->prop_set))     <<  8) | /* 9th bit */
+	       (uint_least16_t)(((uint_least16_t)(in->gb11_flag))    <<  9) | /* 10th bit */
+	       (uint_least16_t)(((uint_least16_t)(in->gb12_13_flag)) << 10);  /* 11th bit */
+}
+
+static inline void
+state_deserialize(uint_least16_t in, struct character_break_state *out)
+{
+	out->prop         = in & UINT8_C(0xFF);
+	out->prop_set     = in & (UINT16_C(1) <<  8);
+	out->gb11_flag    = in & (UINT16_C(1) <<  9);
+	out->gb12_13_flag = in & (UINT16_C(1) << 10);
+}
+
+bool
+grapheme_is_character_break(uint_least32_t cp0, uint_least32_t cp1, uint_least16_t *s)
+{
+	struct character_break_state state;
+	enum char_break_property cp0_prop, cp1_prop;
+	bool notbreak = false;
+
+	if (likely(s)) {
+		state_deserialize(*s, &state);
+
+		if (likely(state.prop_set)) {
+			cp0_prop = state.prop;
+		} else {
+			cp0_prop = get_break_prop(cp0);
+		}
+		cp1_prop = get_break_prop(cp1);
+
+		/* preserve prop of right codepoint for next iteration */
+		state.prop = (uint_least8_t)cp1_prop;
+		state.prop_set = true;
+
+		/* update flags */
+		state.gb11_flag =
+			flag_update_gb11[cp0_prop + NUM_CHAR_BREAK_PROPS *
+			                 state.gb11_flag] &
+			UINT16_C(1) << cp1_prop;
+		state.gb12_13_flag =
+			flag_update_gb12_13[cp0_prop + NUM_CHAR_BREAK_PROPS *
+		                            state.gb12_13_flag] &
+		        UINT16_C(1) << cp1_prop;
+
+		/*
+		 * Apply grapheme cluster breaking algorithm (UAX #29), see
+		 * http://unicode.org/reports/tr29/#Grapheme_Cluster_Boundary_Rules
+		 */
+		notbreak = (dont_break[cp0_prop] & (UINT16_C(1) << cp1_prop)) ||
+		           (dont_break_gb11[cp0_prop + state.gb11_flag *
+		                            NUM_CHAR_BREAK_PROPS] &
+		            (UINT16_C(1) << cp1_prop)) ||
+		           (dont_break_gb12_13[cp0_prop + state.gb12_13_flag *
+		                               NUM_CHAR_BREAK_PROPS] &
+		            (UINT16_C(1) << cp1_prop));
+
+		/* update or reset flags (when we have a break) */
+		if (likely(!notbreak)) {
+			state.gb11_flag = state.gb12_13_flag = false;
+		}
+
+		state_serialize(&state, s);
+	} else {
+		cp0_prop = get_break_prop(cp0);
+		cp1_prop = get_break_prop(cp1);
+
+		/*
+		 * Apply grapheme cluster breaking algorithm (UAX #29), see
+		 * http://unicode.org/reports/tr29/#Grapheme_Cluster_Boundary_Rules
+		 *
+		 * Given we have no state, this behaves as if the state-booleans
+		 * were all set to false
+		 */
+		notbreak = (dont_break[cp0_prop] & (UINT16_C(1) << cp1_prop)) ||
+		           (dont_break_gb11[cp0_prop] & (UINT16_C(1) << cp1_prop)) ||
+		           (dont_break_gb12_13[cp0_prop] & (UINT16_C(1) << cp1_prop));
+	}
+
+	return !notbreak;
+}
+
+static size_t
+next_character_break(HERODOTUS_READER *r)
+{
+	uint_least16_t state = 0;
+	uint_least32_t cp0 = 0, cp1 = 0;
+
+	for (herodotus_read_codepoint(r, true, &cp0);
+	     herodotus_read_codepoint(r, false, &cp1) == HERODOTUS_STATUS_SUCCESS;
+	     herodotus_read_codepoint(r, true, &cp0)) {
+		if (grapheme_is_character_break(cp0, cp1, &state)) {
+			break;
+		}
+	}
+
+	return herodotus_reader_number_read(r);
+}
+
+size_t
+grapheme_next_character_break(const uint_least32_t *str, size_t len)
+{
+	HERODOTUS_READER r;
+
+	herodotus_reader_init(&r, HERODOTUS_TYPE_CODEPOINT, str, len);
+
+	return next_character_break(&r);
+}
+
+size_t
+grapheme_next_character_break_utf8(const char *str, size_t len)
+{
+	HERODOTUS_READER r;
+
+	herodotus_reader_init(&r, HERODOTUS_TYPE_UTF8, str, len);
+
+	return next_character_break(&r);
+}
--- a/libs/libgrapheme-2.0.2/src/character.o
+++ b/libs/libgrapheme-2.0.2/src/character.o
--- a/libs/libgrapheme-2.0.2/src/line.c
+++ b/libs/libgrapheme-2.0.2/src/line.c
@@ -0,0 +1,510 @@
+/* See LICENSE file for copyright and license details. */
+#include <stdbool.h>
+#include <stddef.h>
+
+#include "../gen/line.h"
+#include "../grapheme.h"
+#include "util.h"
+
+static inline enum line_break_property
+get_break_prop(uint_least32_t cp)
+{
+	if (likely(cp <= UINT32_C(0x10FFFF))) {
+		return (enum line_break_property)
+		       line_break_minor[line_break_major[cp >> 8] + (cp & 0xff)];
+	} else {
+		return LINE_BREAK_PROP_AL;
+	}
+}
+
+static size_t
+next_line_break(HERODOTUS_READER *r)
+{
+	HERODOTUS_READER tmp;
+	enum line_break_property cp0_prop, cp1_prop, last_non_cm_or_zwj_prop,
+	                         last_non_sp_prop, last_non_sp_cm_or_zwj_prop;
+	uint_least32_t cp;
+	uint_least8_t lb25_level = 0;
+	bool lb21a_flag = false, ri_even = true;
+
+	/*
+	 * Apply line breaking algorithm (UAX #14), see
+	 * https://unicode.org/reports/tr14/#Algorithm and tailoring
+	 * https://unicode.org/reports/tr14/#Examples (example 7),
+	 * given the automatic test-cases implement this example for
+	 * better number handling.
+	 *
+	 */
+
+	/*
+	 * Initialize the different properties such that we have
+	 * a good state after the state-update in the loop
+	 */
+	last_non_cm_or_zwj_prop = LINE_BREAK_PROP_AL; /* according to LB10 */
+	last_non_sp_prop = last_non_sp_cm_or_zwj_prop = NUM_LINE_BREAK_PROPS;
+
+	for (herodotus_read_codepoint(r, true, &cp), cp0_prop = get_break_prop(cp);
+	     herodotus_read_codepoint(r, false, &cp) == HERODOTUS_STATUS_SUCCESS;
+	     herodotus_read_codepoint(r, true, &cp), cp0_prop = cp1_prop) {
+		/* get property of the right codepoint */
+		cp1_prop = get_break_prop(cp);
+
+		/* update retention-states */
+
+		/*
+		 * store the last observed non-CM-or-ZWJ-property for
+		 * LB9 and following.
+		 */
+		if (cp0_prop != LINE_BREAK_PROP_CM &&
+		    cp0_prop != LINE_BREAK_PROP_ZWJ) {
+			/*
+			 * check if the property we are overwriting now is an
+			 * HL. If so, we set the LB21a-flag which depends on this
+			 * knowledge.
+			 */
+			lb21a_flag = (last_non_cm_or_zwj_prop == LINE_BREAK_PROP_HL);
+
+			/* check regional indicator state */
+			if (cp0_prop == LINE_BREAK_PROP_RI) {
+				/*
+				 * The property we just shifted in is
+				 * a regional indicator, increasing the
+				 * number of consecutive RIs on the left
+				 * side of the breakpoint by one, changing
+				 * the oddness.
+				 *
+				 */
+				ri_even = !ri_even;
+			} else {
+				/*
+				 * We saw no regional indicator, so the
+				 * number of consecutive RIs on the left
+				 * side of the breakpoint is zero, which
+				 * is an even number.
+				 *
+				 */
+				ri_even = true;
+			}
+
+			/*
+			 * Here comes a bit of magic. The tailored rule
+			 * LB25 (using example 7) has a very complicated
+			 * left-hand-side-rule of the form
+			 *
+			 *  NU (NU | SY | IS)* (CL | CP)?
+			 *
+			 * but instead of backtracking, we keep the state
+			 * as some kind of "power level" in the variable
+			 *
+			 *  lb25_level
+			 *
+			 * that goes from 0 to 3
+			 *
+			 *  0: we are not in the sequence
+			 *  1: we have one NU to the left of the middle
+			 *     spot
+			 *  2: we have one NU and one or more (NU | SY | IS)
+			 *     to the left of the middle spot
+			 *  3: we have one NU, zero or more (NU | SY | IS)
+			 *     and one (CL | CP) to the left of the middle
+			 *     spot
+			 */
+			if ((lb25_level == 0 ||
+			     lb25_level == 1) &&
+			    cp0_prop == LINE_BREAK_PROP_NU) {
+				/* sequence has begun */
+				lb25_level = 1;
+			} else if ((lb25_level == 1 || lb25_level == 2) &&
+			           (cp0_prop == LINE_BREAK_PROP_NU ||
+			            cp0_prop == LINE_BREAK_PROP_SY ||
+			            cp0_prop == LINE_BREAK_PROP_IS)) {
+				/* (NU | SY | IS) sequence begins or continued */
+				lb25_level = 2;
+			} else if ((lb25_level == 1 || lb25_level == 2) &&
+			           (cp0_prop == LINE_BREAK_PROP_CL                 ||
+				    cp0_prop == LINE_BREAK_PROP_CP_WITHOUT_EAW_HWF ||
+				    cp0_prop == LINE_BREAK_PROP_CP_WITH_EAW_HWF)) {
+				/* CL or CP at the end of the sequence */
+				lb25_level = 3;
+			} else {
+				/* sequence broke */
+				lb25_level = 0;
+			}
+
+			last_non_cm_or_zwj_prop = cp0_prop;
+		}
+
+		/*
+		 * store the last observed non-SP-property for LB8, LB14,
+		 * LB15, LB16 and LB17. LB8 gets its own unskipped property,
+		 * whereas the others build on top of the CM-ZWJ-skipped
+		 * properties as they come after LB9
+		 */
+		if (cp0_prop != LINE_BREAK_PROP_SP) {
+			last_non_sp_prop = cp0_prop;
+		}
+		if (last_non_cm_or_zwj_prop != LINE_BREAK_PROP_SP) {
+			last_non_sp_cm_or_zwj_prop = last_non_cm_or_zwj_prop;
+		}
+
+		/* apply the algorithm */
+
+		/* LB4 */
+		if (cp0_prop == LINE_BREAK_PROP_BK) {
+			break;
+		}
+
+		/* LB5 */
+		if (cp0_prop == LINE_BREAK_PROP_CR &&
+		    cp1_prop == LINE_BREAK_PROP_LF) {
+			continue;
+		}
+		if (cp0_prop == LINE_BREAK_PROP_CR ||
+		    cp0_prop == LINE_BREAK_PROP_LF ||
+		    cp0_prop == LINE_BREAK_PROP_NL) {
+			break;
+		}
+
+		/* LB6 */
+		if (cp1_prop == LINE_BREAK_PROP_BK ||
+		    cp1_prop == LINE_BREAK_PROP_CR ||
+		    cp1_prop == LINE_BREAK_PROP_LF ||
+		    cp1_prop == LINE_BREAK_PROP_NL) {
+			continue;
+		}
+
+		/* LB7 */
+		if (cp1_prop == LINE_BREAK_PROP_SP ||
+		    cp1_prop == LINE_BREAK_PROP_ZW) {
+			continue;
+		}
+
+		/* LB8 */
+		if (last_non_sp_prop == LINE_BREAK_PROP_ZW) {
+			break;
+		}
+
+		/* LB8a */
+		if (cp0_prop == LINE_BREAK_PROP_ZWJ) {
+			continue;
+		}
+
+		/* LB9 */
+		if ((cp0_prop != LINE_BREAK_PROP_BK &&
+		     cp0_prop != LINE_BREAK_PROP_CR &&
+		     cp0_prop != LINE_BREAK_PROP_LF &&
+		     cp0_prop != LINE_BREAK_PROP_NL &&
+		     cp0_prop != LINE_BREAK_PROP_SP &&
+		     cp0_prop != LINE_BREAK_PROP_ZW) &&
+		    (cp1_prop == LINE_BREAK_PROP_CM ||
+		     cp1_prop == LINE_BREAK_PROP_ZWJ)) {
+			/*
+			 * given we skip them, we don't break in such
+			 * a sequence
+			 */
+			continue;
+		}
+
+		/* LB10 is baked into the following rules */
+
+		/* LB11 */
+		if (last_non_cm_or_zwj_prop == LINE_BREAK_PROP_WJ ||
+		    cp1_prop == LINE_BREAK_PROP_WJ) {
+			continue;
+		}
+
+		/* LB12 */
+		if (last_non_cm_or_zwj_prop == LINE_BREAK_PROP_GL) {
+			continue;
+		}
+
+		/* LB12a */
+		if ((last_non_cm_or_zwj_prop != LINE_BREAK_PROP_SP &&
+		     last_non_cm_or_zwj_prop != LINE_BREAK_PROP_BA &&
+		     last_non_cm_or_zwj_prop != LINE_BREAK_PROP_HY) &&
+		    cp1_prop == LINE_BREAK_PROP_GL) {
+			continue;
+		}
+
+		/* LB13 (affected by tailoring for LB25, see example 7) */
+		if (cp1_prop == LINE_BREAK_PROP_EX ||
+		    (last_non_cm_or_zwj_prop != LINE_BREAK_PROP_NU &&
+		     (cp1_prop == LINE_BREAK_PROP_CL                 ||
+		      cp1_prop == LINE_BREAK_PROP_CP_WITHOUT_EAW_HWF ||
+		      cp1_prop == LINE_BREAK_PROP_CP_WITH_EAW_HWF    ||
+		      cp1_prop == LINE_BREAK_PROP_IS                 ||
+		      cp1_prop == LINE_BREAK_PROP_SY))) {
+			continue;
+		}
+
+		/* LB14 */
+		if (last_non_sp_cm_or_zwj_prop == LINE_BREAK_PROP_OP_WITHOUT_EAW_HWF ||
+		    last_non_sp_cm_or_zwj_prop == LINE_BREAK_PROP_OP_WITH_EAW_HWF) {
+			continue;
+		}
+
+		/* LB15 */
+		if (last_non_sp_cm_or_zwj_prop == LINE_BREAK_PROP_QU &&
+		    (cp1_prop == LINE_BREAK_PROP_OP_WITHOUT_EAW_HWF ||
+		     cp1_prop == LINE_BREAK_PROP_OP_WITH_EAW_HWF)) {
+			continue;
+		}
+
+		/* LB16 */
+		if ((last_non_sp_cm_or_zwj_prop == LINE_BREAK_PROP_CL                 ||
+		     last_non_sp_cm_or_zwj_prop == LINE_BREAK_PROP_CP_WITHOUT_EAW_HWF ||
+		     last_non_sp_cm_or_zwj_prop == LINE_BREAK_PROP_CP_WITH_EAW_HWF) &&
+		    cp1_prop == LINE_BREAK_PROP_NS) {
+			continue;
+		}
+
+		/* LB17 */
+		if (last_non_sp_cm_or_zwj_prop == LINE_BREAK_PROP_B2 &&
+		    cp1_prop == LINE_BREAK_PROP_B2) {
+			continue;
+		}
+
+		/* LB18 */
+		if (last_non_cm_or_zwj_prop == LINE_BREAK_PROP_SP) {
+			break;
+		}
+
+		/* LB19 */
+		if (last_non_cm_or_zwj_prop == LINE_BREAK_PROP_QU ||
+		    cp1_prop == LINE_BREAK_PROP_QU) {
+			continue;
+		}
+
+		/* LB20 */
+		if (last_non_cm_or_zwj_prop == LINE_BREAK_PROP_CB ||
+		    cp1_prop == LINE_BREAK_PROP_CB) {
+			break;
+		}
+
+		/* LB21 */
+		if (cp1_prop == LINE_BREAK_PROP_BA ||
+		    cp1_prop == LINE_BREAK_PROP_HY ||
+		    cp1_prop == LINE_BREAK_PROP_NS ||
+		    last_non_cm_or_zwj_prop == LINE_BREAK_PROP_BB) {
+			continue;
+		}
+
+		/* LB21a */
+		if (lb21a_flag &&
+		    (last_non_cm_or_zwj_prop == LINE_BREAK_PROP_HY ||
+		     last_non_cm_or_zwj_prop == LINE_BREAK_PROP_BA)) {
+			continue;
+		}
+
+		/* LB21b */
+		if (last_non_cm_or_zwj_prop == LINE_BREAK_PROP_SY &&
+		    cp1_prop == LINE_BREAK_PROP_HL) {
+			continue;
+		}
+
+		/* LB22 */
+		if (cp1_prop == LINE_BREAK_PROP_IN) {
+			continue;
+		}
+
+		/* LB23 */
+		if ((last_non_cm_or_zwj_prop == LINE_BREAK_PROP_AL  ||
+		     last_non_cm_or_zwj_prop == LINE_BREAK_PROP_HL) &&
+		    cp1_prop == LINE_BREAK_PROP_NU) {
+			continue;
+		}
+		if (last_non_cm_or_zwj_prop == LINE_BREAK_PROP_NU &&
+		    (cp1_prop == LINE_BREAK_PROP_AL ||
+		     cp1_prop == LINE_BREAK_PROP_HL)) {
+			continue;
+		}
+
+		/* LB23a */
+		if (last_non_cm_or_zwj_prop == LINE_BREAK_PROP_PR &&
+		    (cp1_prop == LINE_BREAK_PROP_ID ||
+		     cp1_prop == LINE_BREAK_PROP_EB ||
+		     cp1_prop == LINE_BREAK_PROP_EM)) {
+			continue;
+		}
+		if ((last_non_cm_or_zwj_prop == LINE_BREAK_PROP_ID ||
+		     last_non_cm_or_zwj_prop == LINE_BREAK_PROP_EB ||
+		     last_non_cm_or_zwj_prop == LINE_BREAK_PROP_EM) &&
+		    cp1_prop == LINE_BREAK_PROP_PO) {
+			continue;
+		}
+
+		/* LB24 */
+		if ((last_non_cm_or_zwj_prop == LINE_BREAK_PROP_PR ||
+		     last_non_cm_or_zwj_prop == LINE_BREAK_PROP_PO) &&
+		    (cp1_prop == LINE_BREAK_PROP_AL  ||
+		     cp1_prop == LINE_BREAK_PROP_HL)) {
+			continue;
+		}
+		if ((last_non_cm_or_zwj_prop == LINE_BREAK_PROP_AL  ||
+		     last_non_cm_or_zwj_prop == LINE_BREAK_PROP_HL) &&
+		    (cp1_prop == LINE_BREAK_PROP_PR ||
+		     cp1_prop == LINE_BREAK_PROP_PO)) {
+			continue;
+		}
+
+		/* LB25 (tailored with example 7) */
+		if ((last_non_cm_or_zwj_prop == LINE_BREAK_PROP_PR ||
+		     last_non_cm_or_zwj_prop == LINE_BREAK_PROP_PO)) {
+			if (cp1_prop == LINE_BREAK_PROP_NU) {
+				continue;
+			}
+
+			/* this stupid rule is the reason why we cannot
+			 * simply have a stateful break-detection between
+			 * two adjacent codepoints as we have it with
+			 * characters.
+			 */
+			herodotus_reader_copy(r, &tmp);
+			herodotus_read_codepoint(&tmp, true, &cp);
+			if (herodotus_read_codepoint(&tmp, true, &cp) ==
+			    HERODOTUS_STATUS_SUCCESS &&
+			    (cp1_prop == LINE_BREAK_PROP_OP_WITHOUT_EAW_HWF ||
+			     cp1_prop == LINE_BREAK_PROP_OP_WITH_EAW_HWF    ||
+			     cp1_prop == LINE_BREAK_PROP_HY)) {
+				if (get_break_prop(cp) == LINE_BREAK_PROP_NU) {
+					continue;
+				}
+			}
+		}
+		if ((last_non_cm_or_zwj_prop == LINE_BREAK_PROP_OP_WITHOUT_EAW_HWF ||
+		     last_non_cm_or_zwj_prop == LINE_BREAK_PROP_OP_WITH_EAW_HWF    ||
+		     last_non_cm_or_zwj_prop == LINE_BREAK_PROP_HY) &&
+		    cp1_prop == LINE_BREAK_PROP_NU) {
+			continue;
+		}
+		if (lb25_level == 1 &&
+		    (cp1_prop == LINE_BREAK_PROP_NU ||
+		     cp1_prop == LINE_BREAK_PROP_SY ||
+		     cp1_prop == LINE_BREAK_PROP_IS)) {
+			continue;
+		}
+		if ((lb25_level == 1 || lb25_level == 2) &&
+		    (cp1_prop == LINE_BREAK_PROP_NU                 ||
+		     cp1_prop == LINE_BREAK_PROP_SY                 ||
+		     cp1_prop == LINE_BREAK_PROP_IS                 ||
+		     cp1_prop == LINE_BREAK_PROP_CL                 ||
+		     cp1_prop == LINE_BREAK_PROP_CP_WITHOUT_EAW_HWF ||
+		     cp1_prop == LINE_BREAK_PROP_CP_WITH_EAW_HWF)) {
+			continue;
+		}
+		if ((lb25_level == 1 || lb25_level == 2 || lb25_level == 3) &&
+		    (cp1_prop == LINE_BREAK_PROP_PO ||
+		     cp1_prop == LINE_BREAK_PROP_PR)) {
+			continue;
+		}
+
+		/* LB26 */
+		if (last_non_cm_or_zwj_prop == LINE_BREAK_PROP_JL &&
+		    (cp1_prop == LINE_BREAK_PROP_JL ||
+		     cp1_prop == LINE_BREAK_PROP_JV ||
+		     cp1_prop == LINE_BREAK_PROP_H2 ||
+		     cp1_prop == LINE_BREAK_PROP_H3)) {
+			continue;
+		}
+		if ((last_non_cm_or_zwj_prop == LINE_BREAK_PROP_JV ||
+		     last_non_cm_or_zwj_prop == LINE_BREAK_PROP_H2) &&
+		    (cp1_prop == LINE_BREAK_PROP_JV ||
+		     cp1_prop == LINE_BREAK_PROP_JT)) {
+			continue;
+		}
+		if ((last_non_cm_or_zwj_prop == LINE_BREAK_PROP_JT ||
+		     last_non_cm_or_zwj_prop == LINE_BREAK_PROP_H3) &&
+		    cp1_prop == LINE_BREAK_PROP_JT) {
+			continue;
+		}
+
+		/* LB27 */
+		if ((last_non_cm_or_zwj_prop == LINE_BREAK_PROP_JL ||
+		     last_non_cm_or_zwj_prop == LINE_BREAK_PROP_JV ||
+		     last_non_cm_or_zwj_prop == LINE_BREAK_PROP_JT ||
+		     last_non_cm_or_zwj_prop == LINE_BREAK_PROP_H2 ||
+		     last_non_cm_or_zwj_prop == LINE_BREAK_PROP_H3) &&
+		    cp1_prop == LINE_BREAK_PROP_PO) {
+			continue;
+		}
+		if (last_non_cm_or_zwj_prop == LINE_BREAK_PROP_PR &&
+		    (cp1_prop == LINE_BREAK_PROP_JL ||
+		     cp1_prop == LINE_BREAK_PROP_JV ||
+		     cp1_prop == LINE_BREAK_PROP_JT ||
+		     cp1_prop == LINE_BREAK_PROP_H2 ||
+		     cp1_prop == LINE_BREAK_PROP_H3)) {
+			continue;
+		}
+
+		/* LB28 */
+		if ((last_non_cm_or_zwj_prop == LINE_BREAK_PROP_AL  ||
+		     last_non_cm_or_zwj_prop == LINE_BREAK_PROP_HL) &&
+		    (cp1_prop == LINE_BREAK_PROP_AL  ||
+		     cp1_prop == LINE_BREAK_PROP_HL)) {
+			continue;
+		}
+
+		/* LB29 */
+		if (last_non_cm_or_zwj_prop == LINE_BREAK_PROP_IS &&
+		    (cp1_prop == LINE_BREAK_PROP_AL  ||
+		     cp1_prop == LINE_BREAK_PROP_HL)) {
+			continue;
+		}
+
+		/* LB30 */
+		if ((last_non_cm_or_zwj_prop == LINE_BREAK_PROP_AL  ||
+		     last_non_cm_or_zwj_prop == LINE_BREAK_PROP_HL  ||
+		     last_non_cm_or_zwj_prop == LINE_BREAK_PROP_NU) &&
+		    cp1_prop == LINE_BREAK_PROP_OP_WITHOUT_EAW_HWF) {
+			continue;
+		}
+		if (last_non_cm_or_zwj_prop == LINE_BREAK_PROP_CP_WITHOUT_EAW_HWF &&
+		    (cp1_prop == LINE_BREAK_PROP_AL  ||
+		     cp1_prop == LINE_BREAK_PROP_HL  ||
+		     cp1_prop == LINE_BREAK_PROP_NU)) {
+			continue;
+		}
+
+		/* LB30a */
+		if (!ri_even &&
+		    last_non_cm_or_zwj_prop == LINE_BREAK_PROP_RI &&
+		    cp1_prop == LINE_BREAK_PROP_RI) {
+			continue;
+		}
+
+		/* LB30b */
+		if (last_non_cm_or_zwj_prop == LINE_BREAK_PROP_EB &&
+		    cp1_prop == LINE_BREAK_PROP_EM) {
+			continue;
+		}
+		if (last_non_cm_or_zwj_prop == LINE_BREAK_PROP_BOTH_CN_EXTPICT &&
+		    cp1_prop == LINE_BREAK_PROP_EM) {
+			continue;
+		}
+
+		/* LB31 */
+		break;
+	}
+
+	return herodotus_reader_number_read(r);
+}
+
+size_t
+grapheme_next_line_break(const uint_least32_t *str, size_t len)
+{
+	HERODOTUS_READER r;
+
+	herodotus_reader_init(&r, HERODOTUS_TYPE_CODEPOINT, str, len);
+
+	return next_line_break(&r);
+}
+
+size_t
+grapheme_next_line_break_utf8(const char *str, size_t len)
+{
+	HERODOTUS_READER r;
+
+	herodotus_reader_init(&r, HERODOTUS_TYPE_UTF8, str, len);
+
+	return next_line_break(&r);
+}
--- a/libs/libgrapheme-2.0.2/src/line.o
+++ b/libs/libgrapheme-2.0.2/src/line.o
--- a/libs/libgrapheme-2.0.2/src/sentence.c
+++ b/libs/libgrapheme-2.0.2/src/sentence.c
@@ -0,0 +1,282 @@
+/* See LICENSE file for copyright and license details. */
+#include <stdbool.h>
+#include <stddef.h>
+
+#include "../gen/sentence.h"
+#include "../grapheme.h"
+#include "util.h"
+
+struct sentence_break_state
+{
+	uint_least8_t aterm_close_sp_level;
+	uint_least8_t saterm_close_sp_parasep_level;
+};
+
+static inline uint_least8_t
+get_sentence_break_prop(uint_least32_t cp)
+{
+	if (likely(cp <= UINT32_C(0x10FFFF))) {
+		return (uint_least8_t)
+		       sentence_break_minor[sentence_break_major[cp >> 8] +
+		       (cp & 0xff)];
+	} else {
+		return SENTENCE_BREAK_PROP_OTHER;
+	}
+}
+
+static bool
+is_skippable_sentence_prop(uint_least8_t prop)
+{
+	return prop == SENTENCE_BREAK_PROP_EXTEND ||
+	       prop == SENTENCE_BREAK_PROP_FORMAT;
+}
+
+static void
+sentence_skip_shift_callback(uint_least8_t prop, void *s)
+{
+	struct sentence_break_state *state = (struct sentence_break_state *)s;
+
+	/*
+	 * Here comes a bit of magic. The rules
+	 * SB8, SB8a, SB9 and SB10 have very complicated
+	 * left-hand-side-rules of the form
+	 *
+	 *  ATerm Close* Sp*
+	 *  SATerm Close*
+	 *  SATerm Close* Sp*
+	 *  SATerm Close* Sp* ParaSep?
+	 *
+	 * but instead of backtracking, we keep the
+	 * state as some kind of "power level" in
+	 * two state-variables
+	 *
+	 *  aterm_close_sp_level
+	 *  saterm_close_sp_parasep_level
+	 *
+	 * that go from 0 to 3/4:
+	 *
+	 *  0: we are not in the sequence
+	 *  1: we have one ATerm/SATerm to the left of
+	 *     the middle spot
+	 *  2: we have one ATerm/SATerm and one or more
+	 *     Close to the left of the middle spot
+	 *  3: we have one ATerm/SATerm, zero or more
+	 *     Close and one or more Sp to the left of
+	 *     the middle spot.
+	 *  4: we have one SATerm, zero or more Close,
+	 *     zero or more Sp and one ParaSep to the
+	 *     left of the middle spot.
+	 *
+	 */
+	if ((state->aterm_close_sp_level == 0 ||
+	     state->aterm_close_sp_level == 1) &&
+	    prop == SENTENCE_BREAK_PROP_ATERM) {
+		/* sequence has begun */
+		state->aterm_close_sp_level = 1;
+	} else if ((state->aterm_close_sp_level == 1 ||
+	            state->aterm_close_sp_level == 2) &&
+	           prop == SENTENCE_BREAK_PROP_CLOSE) {
+		/* close-sequence begins or continued */
+		state->aterm_close_sp_level = 2;
+	} else if ((state->aterm_close_sp_level == 1 ||
+	            state->aterm_close_sp_level == 2 ||
+		    state->aterm_close_sp_level == 3) &&
+	           prop == SENTENCE_BREAK_PROP_SP) {
+		/* sp-sequence begins or continued */
+		state->aterm_close_sp_level = 3;
+	} else {
+		/* sequence broke */
+		state->aterm_close_sp_level = 0;
+	}
+
+	if ((state->saterm_close_sp_parasep_level == 0 ||
+	     state->saterm_close_sp_parasep_level == 1) &&
+	    (prop == SENTENCE_BREAK_PROP_STERM ||
+	     prop == SENTENCE_BREAK_PROP_ATERM)) {
+		/* sequence has begun */
+		state->saterm_close_sp_parasep_level = 1;
+	} else if ((state->saterm_close_sp_parasep_level == 1 ||
+	            state->saterm_close_sp_parasep_level == 2) &&
+	           prop == SENTENCE_BREAK_PROP_CLOSE) {
+		/* close-sequence begins or continued */
+		state->saterm_close_sp_parasep_level = 2;
+	} else if ((state->saterm_close_sp_parasep_level == 1 ||
+	            state->saterm_close_sp_parasep_level == 2 ||
+		    state->saterm_close_sp_parasep_level == 3) &&
+	           prop == SENTENCE_BREAK_PROP_SP) {
+		/* sp-sequence begins or continued */
+		state->saterm_close_sp_parasep_level = 3;
+	} else if ((state->saterm_close_sp_parasep_level == 1 ||
+	            state->saterm_close_sp_parasep_level == 2 ||
+	            state->saterm_close_sp_parasep_level == 3) &&
+	           (prop == SENTENCE_BREAK_PROP_SEP ||
+	            prop == SENTENCE_BREAK_PROP_CR  ||
+	            prop == SENTENCE_BREAK_PROP_LF)) {
+		/* ParaSep at the end of the sequence */
+		state->saterm_close_sp_parasep_level = 4;
+	} else {
+		/* sequence broke */
+		state->saterm_close_sp_parasep_level = 0;
+	}
+}
+
+static size_t
+next_sentence_break(HERODOTUS_READER *r)
+{
+	HERODOTUS_READER tmp;
+	enum sentence_break_property prop;
+	struct proper p;
+	struct sentence_break_state state = { 0 };
+	uint_least32_t cp;
+
+	/*
+	 * Apply sentence breaking algorithm (UAX #29), see
+	 * https://unicode.org/reports/tr29/#Sentence_Boundary_Rules
+	 */
+	proper_init(r, &state, NUM_SENTENCE_BREAK_PROPS,
+	            get_sentence_break_prop, is_skippable_sentence_prop,
+	            sentence_skip_shift_callback, &p);
+
+	while (!proper_advance(&p)) {
+		/* SB3 */
+		if (p.raw.prev_prop[0] == SENTENCE_BREAK_PROP_CR &&
+		    p.raw.next_prop[0] == SENTENCE_BREAK_PROP_LF) {
+			continue;
+		}
+
+		/* SB4 */
+		if (p.raw.prev_prop[0] == SENTENCE_BREAK_PROP_SEP ||
+		    p.raw.prev_prop[0] == SENTENCE_BREAK_PROP_CR  ||
+		    p.raw.prev_prop[0] == SENTENCE_BREAK_PROP_LF) {
+			break;
+		}
+
+		/* SB5 */
+		if (p.raw.next_prop[0] == SENTENCE_BREAK_PROP_EXTEND ||
+		    p.raw.next_prop[0] == SENTENCE_BREAK_PROP_FORMAT) {
+			continue;
+		}
+
+		/* SB6 */
+		if (p.skip.prev_prop[0] == SENTENCE_BREAK_PROP_ATERM &&
+		    p.skip.next_prop[0] == SENTENCE_BREAK_PROP_NUMERIC) {
+			continue;
+		}
+
+		/* SB7 */
+		if ((p.skip.prev_prop[1] == SENTENCE_BREAK_PROP_UPPER ||
+		     p.skip.prev_prop[1] == SENTENCE_BREAK_PROP_LOWER) &&
+		    p.skip.prev_prop[0] == SENTENCE_BREAK_PROP_ATERM &&
+		    p.skip.next_prop[0] == SENTENCE_BREAK_PROP_UPPER) {
+			continue;
+		}
+
+		/* SB8 */
+		if (state.aterm_close_sp_level == 1 ||
+		    state.aterm_close_sp_level == 2 ||
+		    state.aterm_close_sp_level == 3) {
+			/*
+			 * This is the most complicated rule, requiring
+			 * the right-hand-side to satisfy the regular expression
+			 *
+			 *  ( ¬(OLetter | Upper | Lower | ParaSep | SATerm) )* Lower
+			 *
+			 * which we simply check "manually" given LUT-lookups
+			 * are very cheap by starting at the mid_reader.
+			 *
+			 */
+			herodotus_reader_copy(&(p.mid_reader), &tmp);
+
+			prop = NUM_SENTENCE_BREAK_PROPS;
+			while (herodotus_read_codepoint(&tmp, true, &cp) ==
+			       HERODOTUS_STATUS_SUCCESS) {
+				prop = get_sentence_break_prop(cp);
+
+				/*
+				 * the skippable properties are ignored
+				 * automatically here given they do not
+				 * match the following condition
+				 */
+				if (prop == SENTENCE_BREAK_PROP_OLETTER ||
+				    prop == SENTENCE_BREAK_PROP_UPPER   ||
+				    prop == SENTENCE_BREAK_PROP_LOWER   ||
+				    prop == SENTENCE_BREAK_PROP_SEP     ||
+				    prop == SENTENCE_BREAK_PROP_CR      ||
+				    prop == SENTENCE_BREAK_PROP_LF      ||
+				    prop == SENTENCE_BREAK_PROP_STERM   ||
+				    prop == SENTENCE_BREAK_PROP_ATERM) {
+					break;
+				}
+			}
+
+			if (prop == SENTENCE_BREAK_PROP_LOWER) {
+				continue;
+			}
+		}
+
+		/* SB8a */
+		if ((state.saterm_close_sp_parasep_level == 1 ||
+		     state.saterm_close_sp_parasep_level == 2 ||
+		     state.saterm_close_sp_parasep_level == 3) &&
+		    (p.skip.next_prop[0] == SENTENCE_BREAK_PROP_SCONTINUE ||
+		     p.skip.next_prop[0] == SENTENCE_BREAK_PROP_STERM     ||
+                     p.skip.next_prop[0] == SENTENCE_BREAK_PROP_ATERM)) {
+			continue;
+		}
+
+		/* SB9 */
+		if ((state.saterm_close_sp_parasep_level == 1 ||
+		     state.saterm_close_sp_parasep_level == 2) &&
+		    (p.skip.next_prop[0] == SENTENCE_BREAK_PROP_CLOSE ||
+		     p.skip.next_prop[0] == SENTENCE_BREAK_PROP_SP    ||
+		     p.skip.next_prop[0] == SENTENCE_BREAK_PROP_SEP   ||
+		     p.skip.next_prop[0] == SENTENCE_BREAK_PROP_CR    ||
+		     p.skip.next_prop[0] == SENTENCE_BREAK_PROP_LF)) {
+			continue;
+		}
+
+		/* SB10 */
+		if ((state.saterm_close_sp_parasep_level == 1 ||
+		     state.saterm_close_sp_parasep_level == 2 ||
+		     state.saterm_close_sp_parasep_level == 3) &&
+		    (p.skip.next_prop[0] == SENTENCE_BREAK_PROP_SP  ||
+		     p.skip.next_prop[0] == SENTENCE_BREAK_PROP_SEP ||
+		     p.skip.next_prop[0] == SENTENCE_BREAK_PROP_CR  ||
+		     p.skip.next_prop[0] == SENTENCE_BREAK_PROP_LF)) {
+			continue;
+		}
+
+		/* SB11 */
+		if (state.saterm_close_sp_parasep_level == 1 ||
+		    state.saterm_close_sp_parasep_level == 2 ||
+		    state.saterm_close_sp_parasep_level == 3 ||
+		    state.saterm_close_sp_parasep_level == 4) {
+			break;
+		}
+
+		/* SB998 */
+		continue;
+	}
+
+	return herodotus_reader_number_read(&(p.mid_reader));
+}
+
+size_t
+grapheme_next_sentence_break(const uint_least32_t *str, size_t len)
+{
+	HERODOTUS_READER r;
+
+	herodotus_reader_init(&r, HERODOTUS_TYPE_CODEPOINT, str, len);
+
+	return next_sentence_break(&r);
+}
+
+size_t
+grapheme_next_sentence_break_utf8(const char *str, size_t len)
+{
+	HERODOTUS_READER r;
+
+	herodotus_reader_init(&r, HERODOTUS_TYPE_UTF8, str, len);
+
+	return next_sentence_break(&r);
+}
--- a/libs/libgrapheme-2.0.2/src/sentence.o
+++ b/libs/libgrapheme-2.0.2/src/sentence.o
--- a/libs/libgrapheme-2.0.2/src/utf8.c
+++ b/libs/libgrapheme-2.0.2/src/utf8.c
@@ -0,0 +1,219 @@
+/* See LICENSE file for copyright and license details. */
+#include <stddef.h>
+#include <stdint.h>
+
+#include "../grapheme.h"
+#include "util.h"
+
+#define BETWEEN(c, l, u) ((c) >= (l) && (c) <= (u))
+
+/* lookup-table for the types of sequence first bytes */
+static const struct {
+	uint_least8_t  lower; /* lower bound of sequence first byte */
+	uint_least8_t  upper; /* upper bound of sequence first byte */
+	uint_least32_t mincp; /* smallest non-overlong encoded codepoint */
+	uint_least32_t maxcp; /* largest encodable codepoint */
+	/*
+	 * implicit: table-offset represents the number of following
+	 * bytes of the form 10xxxxxx (6 bits capacity each)
+	 */
+} lut[] = {
+	[0] = {
+		/* 0xxxxxxx */
+		.lower = 0x00, /* 00000000 */
+		.upper = 0x7F, /* 01111111 */
+		.mincp = (uint_least32_t)0,
+		.maxcp = ((uint_least32_t)1 << 7) - 1, /* 7 bits capacity */
+	},
+	[1] = {
+		/* 110xxxxx */
+		.lower = 0xC0, /* 11000000 */
+		.upper = 0xDF, /* 11011111 */
+		.mincp = (uint_least32_t)1 << 7,
+		.maxcp = ((uint_least32_t)1 << 11) - 1, /* 5+6=11 bits capacity */
+	},
+	[2] = {
+		/* 1110xxxx */
+		.lower = 0xE0, /* 11100000 */
+		.upper = 0xEF, /* 11101111 */
+		.mincp = (uint_least32_t)1 << 11,
+		.maxcp = ((uint_least32_t)1 << 16) - 1, /* 4+6+6=16 bits capacity */
+	},
+	[3] = {
+		/* 11110xxx */
+		.lower = 0xF0, /* 11110000 */
+		.upper = 0xF7, /* 11110111 */
+		.mincp = (uint_least32_t)1 << 16,
+		.maxcp = ((uint_least32_t)1 << 21) - 1, /* 3+6+6+6=21 bits capacity */
+	},
+};
+
+size_t
+grapheme_decode_utf8(const char *str, size_t len, uint_least32_t *cp)
+{
+	size_t off, i;
+	uint_least32_t tmp;
+
+	if (cp == NULL) {
+		/*
+		 * instead of checking every time if cp is NULL within
+		 * the decoder, simply point it at a dummy variable here.
+		 */
+		cp = &tmp;
+	}
+
+	if (str == NULL || len == 0) {
+		/* a sequence must be at least 1 byte long */
+		*cp = GRAPHEME_INVALID_CODEPOINT;
+		return 0;
+	}
+
+	/* identify sequence type with the first byte */
+	for (off = 0; off < LEN(lut); off++) {
+		if (BETWEEN(((const unsigned char *)str)[0], lut[off].lower,
+		            lut[off].upper)) {
+			/*
+			 * first byte is within the bounds; fill
+			 * p with the the first bits contained in
+			 * the first byte (by subtracting the high bits)
+			 */
+			*cp = ((const unsigned char *)str)[0] - lut[off].lower;
+			break;
+		}
+	}
+	if (off == LEN(lut)) {
+		/*
+		 * first byte does not match a sequence type;
+		 * set cp as invalid and return 1 byte processed
+		 *
+		 * this also includes the cases where bits higher than
+		 * the 8th are set on systems with CHAR_BIT > 8
+		 */
+		*cp = GRAPHEME_INVALID_CODEPOINT;
+		return 1;
+	}
+	if (1 + off > len) {
+		/*
+		 * input is not long enough, set cp as invalid
+		 */
+		*cp = GRAPHEME_INVALID_CODEPOINT;
+
+		/*
+		 * count the following continuation bytes, but nothing
+		 * else in case we have a "rogue" case where e.g. such a
+		 * sequence starter occurs right before a NUL-byte.
+		 */
+		for (i = 0; 1 + i < len; i++) {
+			if(!BETWEEN(((const unsigned char *)str)[1 + i],
+			            0x80, 0xBF)) {
+				break;
+			}
+		}
+
+		/*
+		 * if the continuation bytes do not continue until
+		 * the end, return the incomplete sequence length.
+		 * Otherwise return the number of bytes we actually
+		 * expected, which is larger than n.
+		 */
+		return ((1 + i) < len) ? (1 + i) : (1 + off);
+	}
+
+	/*
+	 * process 'off' following bytes, each of the form 10xxxxxx
+	 * (i.e. between 0x80 (10000000) and 0xBF (10111111))
+	 */
+	for (i = 1; i <= off; i++) {
+		if(!BETWEEN(((const unsigned char *)str)[i], 0x80, 0xBF)) {
+			/*
+			 * byte does not match format; return
+			 * number of bytes processed excluding the
+			 * unexpected character as recommended since
+			 * Unicode 6 (chapter 3)
+			 *
+			 * this also includes the cases where bits
+			 * higher than the 8th are set on systems
+			 * with CHAR_BIT > 8
+			 */
+			*cp = GRAPHEME_INVALID_CODEPOINT;
+			return 1 + (i - 1);
+		}
+		/*
+		 * shift codepoint by 6 bits and add the 6 stored bits
+		 * in s[i] to it using the bitmask 0x3F (00111111)
+		 */
+		*cp = (*cp << 6) | (((const unsigned char *)str)[i] & 0x3F);
+	}
+
+	if (*cp < lut[off].mincp ||
+	    BETWEEN(*cp, UINT32_C(0xD800), UINT32_C(0xDFFF)) ||
+	    *cp > UINT32_C(0x10FFFF)) {
+		/*
+		 * codepoint is overlong encoded in the sequence, is a
+		 * high or low UTF-16 surrogate half (0xD800..0xDFFF) or
+		 * not representable in UTF-16 (>0x10FFFF) (RFC-3629
+		 * specifies the latter two conditions)
+		 */
+		*cp = GRAPHEME_INVALID_CODEPOINT;
+	}
+
+	return 1 + off;
+}
+
+size_t
+grapheme_encode_utf8(uint_least32_t cp, char *str, size_t len)
+{
+	size_t off, i;
+
+	if (BETWEEN(cp, UINT32_C(0xD800), UINT32_C(0xDFFF)) ||
+	    cp > UINT32_C(0x10FFFF)) {
+		/*
+		 * codepoint is a high or low UTF-16 surrogate half
+		 * (0xD800..0xDFFF) or not representable in UTF-16
+		 * (>0x10FFFF), which RFC-3629 deems invalid for UTF-8.
+		 */
+		cp = GRAPHEME_INVALID_CODEPOINT;
+	}
+
+	/* determine necessary sequence type */
+	for (off = 0; off < LEN(lut); off++) {
+		if (cp <= lut[off].maxcp) {
+			break;
+		}
+	}
+	if (1 + off > len || str == NULL || len == 0) {
+		/*
+		 * specified buffer is too small to store sequence or
+		 * the caller just wanted to know how many bytes the
+		 * codepoint needs by passing a NULL-buffer.
+		 */
+		return 1 + off;
+	}
+
+	/* build sequence by filling cp-bits into each byte */
+
+	/*
+	 * lut[off].lower is the bit-format for the first byte and
+	 * the bits to fill into it are determined by shifting the
+	 * cp 6 times the number of following bytes, as each
+	 * following byte stores 6 bits, yielding the wanted bits.
+	 *
+	 * We do not overwrite the mask because we guaranteed earlier
+	 * that there are no bits higher than the mask allows.
+	 */
+	((unsigned char *)str)[0] = lut[off].lower |
+	                            (uint_least8_t)(cp >> (6 * off));
+
+	for (i = 1; i <= off; i++) {
+		/*
+		 * the bit-format for following bytes is 10000000 (0x80)
+		 * and it each stores 6 bits in the 6 low bits that we
+		 * extract from the properly-shifted value using the
+		 * mask 00111111 (0x3F)
+		 */
+		((unsigned char *)str)[i] = 0x80 |
+		                            ((cp >> (6 * (off - i))) & 0x3F);
+	}
+
+	return 1 + off;
+}
--- a/libs/libgrapheme-2.0.2/src/utf8.o
+++ b/libs/libgrapheme-2.0.2/src/utf8.o
--- a/libs/libgrapheme-2.0.2/src/util.c
+++ b/libs/libgrapheme-2.0.2/src/util.c
@@ -0,0 +1,417 @@
+/* See LICENSE file for copyright and license details. */
+#include <limits.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include "../gen/types.h"
+#include "../grapheme.h"
+#include "util.h"
+
+void
+herodotus_reader_init(HERODOTUS_READER *r, enum herodotus_type type,
+                      const void *src, size_t srclen)
+{
+	size_t i;
+
+	r->type = type;
+	r->src = src;
+	r->srclen = srclen;
+	r->off = 0;
+	r->terminated_by_null = false;
+
+	for (i = 0; i < LEN(r->soft_limit); i++) {
+		r->soft_limit[i] = SIZE_MAX;
+	}
+}
+
+void
+herodotus_reader_copy(const HERODOTUS_READER *src, HERODOTUS_READER *dest)
+{
+	size_t i;
+
+	/*
+	 * we copy such that we have a "fresh" start and build on the
+	 * fact that src->soft_limit[i] for any i and src->srclen are
+	 * always larger or equal to src->off
+	 */
+	dest->type = src->type;
+	if (src->type == HERODOTUS_TYPE_CODEPOINT) {
+		dest->src = (src->src == NULL) ? NULL :
+		            ((const uint_least32_t *)(src->src)) + src->off;
+	} else { /* src->type == HERODOTUS_TYPE_UTF8 */
+		dest->src = (src->src == NULL) ? NULL :
+		            ((const char *)(src->src)) + src->off;
+	}
+	if (src->srclen == SIZE_MAX) {
+		dest->srclen = SIZE_MAX;
+	} else {
+		dest->srclen = (src->off < src->srclen) ? src->srclen - src->off : 0;
+	}
+	dest->off = 0;
+	dest->terminated_by_null = src->terminated_by_null;
+
+	for (i = 0; i < LEN(src->soft_limit); i++) {
+		if (src->soft_limit[i] == SIZE_MAX) {
+			dest->soft_limit[i] = SIZE_MAX;
+		} else {
+			/*
+			 * if we have a degenerate case where the offset is
+			 * higher than the soft-limit, we simply clamp the
+			 * soft-limit to zero given we can't decide here
+			 * to release the limit and, instead, we just
+			 * prevent any more reads
+			 */
+			dest->soft_limit[i] = (src->off < src->soft_limit[i]) ?
+				src->soft_limit[i] - src->off : 0;
+		}
+	}
+}
+
+void
+herodotus_reader_push_advance_limit(HERODOTUS_READER *r, size_t count)
+{
+	size_t i;
+
+	for (i = LEN(r->soft_limit) - 1; i >= 1; i--) {
+		r->soft_limit[i] = r->soft_limit[i - 1];
+	}
+	r->soft_limit[0] = r->off + count;
+}
+
+void
+herodotus_reader_pop_limit(HERODOTUS_READER *r)
+{
+	size_t i;
+
+	for (i = 0; i < LEN(r->soft_limit) - 1; i++) {
+		r->soft_limit[i] = r->soft_limit[i + 1];
+	}
+	r->soft_limit[LEN(r->soft_limit) - 1] = SIZE_MAX;
+}
+
+size_t
+herodotus_reader_next_word_break(const HERODOTUS_READER *r)
+{
+	if (r->type == HERODOTUS_TYPE_CODEPOINT) {
+		return grapheme_next_word_break(
+			(const uint_least32_t *)(r->src) + r->off,
+			MIN(r->srclen, r->soft_limit[0]) - r->off);
+	} else { /* r->type == HERODOTUS_TYPE_UTF8 */
+		return grapheme_next_word_break_utf8(
+			(const char *)(r->src) + r->off,
+			MIN(r->srclen, r->soft_limit[0]) - r->off);
+	}
+}
+
+size_t
+herodotus_reader_next_codepoint_break(const HERODOTUS_READER *r)
+{
+	if (r->type == HERODOTUS_TYPE_CODEPOINT) {
+		return (r->off < MIN(r->srclen, r->soft_limit[0])) ? 1 : 0;
+	} else { /* r->type == HERODOTUS_TYPE_UTF8 */
+		return grapheme_decode_utf8(
+			(const char *)(r->src) + r->off,
+			MIN(r->srclen, r->soft_limit[0]) - r->off, NULL);
+	}
+}
+
+size_t
+herodotus_reader_number_read(const HERODOTUS_READER *r)
+{
+	return r->off;
+}
+
+enum herodotus_status
+herodotus_read_codepoint(HERODOTUS_READER *r, bool advance, uint_least32_t *cp)
+{
+	size_t ret;
+
+	if (r->terminated_by_null || r->off >= r->srclen || r->src == NULL) {
+		*cp = GRAPHEME_INVALID_CODEPOINT;
+		return HERODOTUS_STATUS_END_OF_BUFFER;
+	}
+
+	if (r->off >= r->soft_limit[0]) {
+		*cp = GRAPHEME_INVALID_CODEPOINT;
+		return HERODOTUS_STATUS_SOFT_LIMIT_REACHED;
+	}
+
+	if (r->type == HERODOTUS_TYPE_CODEPOINT) {
+		*cp = ((const uint_least32_t *)(r->src))[r->off];
+		ret = 1;
+	} else { /* r->type == HERODOTUS_TYPE_UTF8 */
+		ret = grapheme_decode_utf8((const char *)r->src + r->off,
+		                           MIN(r->srclen, r->soft_limit[0]) -
+		                           r->off, cp);
+	}
+
+	if (unlikely(r->srclen == SIZE_MAX && *cp == 0)) {
+		/*
+		 * We encountered a null-codepoint. Don't increment
+		 * offset and return as if the buffer had ended here all
+		 * along
+		 */
+		r->terminated_by_null = true;
+		return HERODOTUS_STATUS_END_OF_BUFFER;
+	}
+
+	if (r->off + ret > MIN(r->srclen, r->soft_limit[0])) {
+		/*
+		 * we want more than we have; instead of returning
+		 * garbage we terminate here.
+		 */
+		return HERODOTUS_STATUS_END_OF_BUFFER;
+	}
+
+	/*
+	 * Increase offset which we now know won't surpass the limits,
+	 * unless we got told otherwise
+	 */
+	if (advance) {
+		r->off += ret;
+	}
+
+	return HERODOTUS_STATUS_SUCCESS;
+}
+
+void
+herodotus_writer_init(HERODOTUS_WRITER *w, enum herodotus_type type,
+                      void *dest, size_t destlen)
+{
+	w->type = type;
+	w->dest = dest;
+	w->destlen = destlen;
+	w->off = 0;
+	w->first_unwritable_offset = SIZE_MAX;
+}
+
+void
+herodotus_writer_nul_terminate(HERODOTUS_WRITER *w)
+{
+	if (w->dest == NULL) {
+		return;
+	}
+
+	if (w->off < w->destlen) {
+		/* We still have space in the buffer. Simply use it */
+		if (w->type == HERODOTUS_TYPE_CODEPOINT) {
+			((uint_least32_t *)(w->dest))[w->off] = 0;
+		} else { /* w->type == HERODOTUS_TYPE_UTF8 */
+			((char *)(w->dest))[w->off] = '\0';
+		}
+	} else if (w->first_unwritable_offset < w->destlen) {
+		/*
+		 * There is no more space in the buffer. However,
+		 * we have noted down the first offset we couldn't
+		 * use to write into the buffer and it's smaller than
+		 * destlen. Thus we bailed writing into the
+		 * destination when a multibyte-codepoint couldn't be
+		 * written. So the last "real" byte might be at
+		 * destlen-4, destlen-3, destlen-2 or destlen-1
+		 * (the last case meaning truncation).
+		 */
+		if (w->type == HERODOTUS_TYPE_CODEPOINT) {
+			((uint_least32_t *)(w->dest))
+				[w->first_unwritable_offset] = 0;
+		} else { /* w->type == HERODOTUS_TYPE_UTF8 */
+			((char *)(w->dest))[w->first_unwritable_offset] = '\0';
+		}
+	} else if (w->destlen > 0) {
+		/*
+		 * In this case, there is no more space in the buffer and
+		 * the last unwritable offset is larger than
+		 * or equal to the destination buffer length. This means
+		 * that we are forced to simply write into the last
+		 * byte.
+		 */
+		if (w->type == HERODOTUS_TYPE_CODEPOINT) {
+			((uint_least32_t *)(w->dest))
+				[w->destlen - 1] = 0;
+		} else { /* w->type == HERODOTUS_TYPE_UTF8 */
+			((char *)(w->dest))[w->destlen - 1] = '\0';
+		}
+	}
+
+	/* w->off is not incremented in any case */
+}
+
+size_t
+herodotus_writer_number_written(const HERODOTUS_WRITER *w)
+{
+	return w->off;
+}
+
+void
+herodotus_write_codepoint(HERODOTUS_WRITER *w, uint_least32_t cp)
+{
+	size_t ret;
+
+	/*
+	 * This function will always faithfully say how many codepoints
+	 * were written, even if the buffer ends. This is used to enable
+	 * truncation detection.
+	 */
+	if (w->type == HERODOTUS_TYPE_CODEPOINT) {
+		if (w->dest != NULL && w->off < w->destlen) {
+			((uint_least32_t *)(w->dest))[w->off] = cp;
+		}
+
+		w->off += 1;
+	} else { /* w->type == HERODOTUS_TYPE_UTF8 */
+		/*
+		 * First determine how many bytes we need to encode the
+		 * codepoint
+		 */
+		ret = grapheme_encode_utf8(cp, NULL, 0);
+
+		if (w->dest != NULL && w->off + ret < w->destlen) {
+			/* we still have enough room in the buffer */
+			grapheme_encode_utf8(cp, (char *)(w->dest) +
+			                     w->off, w->destlen - w->off);
+		} else if (w->first_unwritable_offset == SIZE_MAX) {
+			/*
+			 * the first unwritable offset has not been
+			 * noted down, so this is the first time we can't
+			 * write (completely) to an offset
+			 */
+			w->first_unwritable_offset = w->off;
+		}
+
+		w->off += ret;
+	}
+}
+
+void
+proper_init(const HERODOTUS_READER *r, void *state, uint_least8_t no_prop,
+            uint_least8_t (*get_break_prop)(uint_least32_t),
+            bool (*is_skippable_prop)(uint_least8_t),
+            void (*skip_shift_callback)(uint_least8_t, void *),
+            struct proper *p)
+{
+	uint_least8_t prop;
+	uint_least32_t cp;
+	size_t i;
+
+	/* set internal variables */
+	p->state = state;
+	p->no_prop = no_prop;
+	p->get_break_prop = get_break_prop;
+	p->is_skippable_prop = is_skippable_prop;
+	p->skip_shift_callback = skip_shift_callback;
+
+	/*
+	 * Initialize mid-reader, which is basically just there
+	 * to reflect the current position of the viewing-line
+	 */
+	herodotus_reader_copy(r, &(p->mid_reader));
+
+	/*
+	 * In the initialization, we simply (try to) fill in next_prop.
+	 * If we cannot read in more (due to the buffer ending), we
+	 * fill in the prop as invalid
+	 */
+
+	/*
+	 * initialize the previous properties to have no property
+	 * (given we are at the start of the buffer)
+	 */
+	p->raw.prev_prop[1] = p->raw.prev_prop[0] = p->no_prop;
+	p->skip.prev_prop[1] = p->skip.prev_prop[0] = p->no_prop;
+
+	/*
+	 * initialize the next properties
+	 */
+
+	/* initialize the raw reader */
+	herodotus_reader_copy(r, &(p->raw_reader));
+
+	/* fill in the two next raw properties (after no-initialization) */
+	p->raw.next_prop[0] = p->raw.next_prop[1] = p->no_prop;
+	for (i = 0; i < 2 && herodotus_read_codepoint(&(p->raw_reader), true, &cp) ==
+	     HERODOTUS_STATUS_SUCCESS; ) {
+		p->raw.next_prop[i++] = p->get_break_prop(cp);
+	}
+
+	/* initialize the skip reader */
+	herodotus_reader_copy(r, &(p->skip_reader));
+
+	/* fill in the two next skip properties (after no-initialization) */
+	p->skip.next_prop[0] = p->skip.next_prop[1] = p->no_prop;
+	for (i = 0; i < 2 && herodotus_read_codepoint(&(p->skip_reader), true, &cp) ==
+	     HERODOTUS_STATUS_SUCCESS; ) {
+		prop = p->get_break_prop(cp);
+		if (!p->is_skippable_prop(prop)) {
+			p->skip.next_prop[i++] = prop;
+		}
+	}
+}
+
+int
+proper_advance(struct proper *p)
+{
+	uint_least8_t prop;
+	uint_least32_t cp;
+
+	/* read in next "raw" property */
+	if (herodotus_read_codepoint(&(p->raw_reader), true, &cp) ==
+	    HERODOTUS_STATUS_SUCCESS) {
+		prop = p->get_break_prop(cp);
+	} else {
+		prop = p->no_prop;
+	}
+
+	/*
+	 * do a shift-in, unless we find that the property that is to
+	 * be moved past the "raw-viewing-line" (this property is stored
+	 * in p->raw.next_prop[0]) is a no_prop, indicating that
+	 * we are at the end of the buffer.
+	 */
+	if (p->raw.next_prop[0] == p->no_prop) {
+		return 1;
+	}
+
+	/* shift in the properties */
+	p->raw.prev_prop[1] = p->raw.prev_prop[0];
+	p->raw.prev_prop[0] = p->raw.next_prop[0];
+	p->raw.next_prop[0] = p->raw.next_prop[1];
+	p->raw.next_prop[1] = prop;
+
+	/* advance the middle reader viewing-line */
+	(void)herodotus_read_codepoint(&(p->mid_reader), true, &cp);
+
+	/* check skippability-property */
+	if (!p->is_skippable_prop(p->raw.prev_prop[0])) {
+		/*
+		 * the property that has moved past the "raw-viewing-line"
+		 * (this property is now (after the raw-shift) stored in
+		 * p->raw.prev_prop[0] and guaranteed not to be a no-prop,
+		 * guaranteeing that we won't shift a no-prop past the
+		 * "viewing-line" in the skip-properties) is not a skippable
+		 * property, thus we need to shift the skip property as well.
+		 */
+		p->skip.prev_prop[1] = p->skip.prev_prop[0];
+		p->skip.prev_prop[0] = p->skip.next_prop[0];
+		p->skip.next_prop[0] = p->skip.next_prop[1];
+
+		/*
+		 * call the skip-shift-callback on the property that
+		 * passed the skip-viewing-line (this property is now
+		 * stored in p->skip.prev_prop[0]).
+		 */
+		p->skip_shift_callback(p->skip.prev_prop[0], p->state);
+
+		/* determine the next shift property */
+		p->skip.next_prop[1] = p->no_prop;
+		while (herodotus_read_codepoint(&(p->skip_reader), true, &cp) ==
+		       HERODOTUS_STATUS_SUCCESS) {
+			prop = p->get_break_prop(cp);
+			if (!p->is_skippable_prop(prop)) {
+				p->skip.next_prop[1] = prop;
+				break;
+			}
+		}
+	}
+
+	return 0;
+}
--- a/libs/libgrapheme-2.0.2/src/util.h
+++ b/libs/libgrapheme-2.0.2/src/util.h
@@ -0,0 +1,116 @@
+/* See LICENSE file for copyright and license details. */
+#ifndef UTIL_H
+#define UTIL_H
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include "../gen/types.h"
+#include "../grapheme.h"
+
+#undef MIN
+#define MIN(x,y)  ((x) < (y) ? (x) : (y))
+#undef LEN
+#define LEN(x) (sizeof(x) / sizeof(*(x)))
+
+#undef likely
+#undef unlikely
+#ifdef __has_builtin
+	#if __has_builtin(__builtin_expect)
+		#define likely(expr) __builtin_expect(!!(expr), 1)
+		#define unlikely(expr) __builtin_expect(!!(expr), 0)
+	#else
+		#define likely(expr) (expr)
+		#define unlikely(expr) (expr)
+	#endif
+#else
+	#define likely(expr) (expr)
+	#define unlikely(expr) (expr)
+#endif
+
+/*
+ * Herodotus, the ancient greek historian and geographer,
+ * was criticized for including legends and other fantastic
+ * accounts into his works, among others by his contemporary
+ * Thucydides.
+ *
+ * The Herodotus readers and writers are tailored towards the needs
+ * of the library interface, doing all the dirty work behind the
+ * scenes. While the reader is relatively faithful in his accounts,
+ * the Herodotus writer will never fail and always claim to write the
+ * data. Internally, it only writes as much as it can, and will simply
+ * keep account of the rest. This way, we can properly signal truncation.
+ *
+ * In this sense, explaining the naming, the writer is always a bit
+ * inaccurate in his accounts.
+ *
+ */
+enum herodotus_status {
+	HERODOTUS_STATUS_SUCCESS,
+	HERODOTUS_STATUS_END_OF_BUFFER,
+	HERODOTUS_STATUS_SOFT_LIMIT_REACHED,
+};
+
+enum herodotus_type {
+	HERODOTUS_TYPE_CODEPOINT,
+	HERODOTUS_TYPE_UTF8,
+};
+
+typedef struct herodotus_reader {
+	enum herodotus_type type;
+	const void *src;
+	size_t srclen;
+	size_t off;
+	bool terminated_by_null;
+	size_t soft_limit[10];
+} HERODOTUS_READER;
+
+typedef struct herodotus_writer {
+	enum herodotus_type type;
+	void *dest;
+	size_t destlen;
+	size_t off;
+	size_t first_unwritable_offset;
+} HERODOTUS_WRITER;
+
+struct proper {
+	/*
+	 * prev_prop[1] prev_prop[0] | next_prop[0] next_prop[1]
+	 */
+	struct {
+		uint_least8_t prev_prop[2];
+		uint_least8_t next_prop[2];
+	} raw, skip;
+	HERODOTUS_READER mid_reader, raw_reader, skip_reader;
+	void *state;
+	uint_least8_t no_prop;
+	uint_least8_t (*get_break_prop)(uint_least32_t);
+	bool (*is_skippable_prop)(uint_least8_t);
+	void (*skip_shift_callback)(uint_least8_t, void *);
+};
+
+void herodotus_reader_init(HERODOTUS_READER *, enum herodotus_type,
+                           const void *, size_t);
+void herodotus_reader_copy(const HERODOTUS_READER *, HERODOTUS_READER *);
+void herodotus_reader_push_advance_limit(HERODOTUS_READER *, size_t);
+void herodotus_reader_pop_limit(HERODOTUS_READER *);
+size_t herodotus_reader_number_read(const HERODOTUS_READER *);
+size_t herodotus_reader_next_word_break(const HERODOTUS_READER *);
+size_t herodotus_reader_next_codepoint_break(const HERODOTUS_READER *);
+enum herodotus_status herodotus_read_codepoint(HERODOTUS_READER *, bool, uint_least32_t *);
+
+void herodotus_writer_init(HERODOTUS_WRITER *, enum herodotus_type, void *,
+                           size_t);
+void herodotus_writer_nul_terminate(HERODOTUS_WRITER *);
+size_t herodotus_writer_number_written(const HERODOTUS_WRITER *);
+void herodotus_write_codepoint(HERODOTUS_WRITER *, uint_least32_t);
+
+void proper_init(const HERODOTUS_READER *, void *, uint_least8_t,
+                 uint_least8_t (*get_break_prop)(uint_least32_t),
+                 bool (*is_skippable_prop)(uint_least8_t),
+                 void (*skip_shift_callback)(uint_least8_t, void *),
+                 struct proper *);
+int proper_advance(struct proper *);
+
+#endif /* UTIL_H */
--- a/libs/libgrapheme-2.0.2/src/util.o
+++ b/libs/libgrapheme-2.0.2/src/util.o
--- a/libs/libgrapheme-2.0.2/src/word.c
+++ b/libs/libgrapheme-2.0.2/src/word.c
@@ -0,0 +1,268 @@
+/* See LICENSE file for copyright and license details. */
+#include <stdbool.h>
+#include <stddef.h>
+
+#include "../gen/word.h"
+#include "../grapheme.h"
+#include "util.h"
+
+struct word_break_state
+{
+	bool ri_even;
+};
+
+static inline uint_least8_t
+get_word_break_prop(uint_least32_t cp)
+{
+	if (likely(cp <= 0x10FFFF)) {
+		return (uint_least8_t)
+		       word_break_minor[word_break_major[cp >> 8] + (cp & 0xff)];
+	} else {
+		return WORD_BREAK_PROP_OTHER;
+	}
+}
+
+static bool
+is_skippable_word_prop(uint_least8_t prop)
+{
+	return prop == WORD_BREAK_PROP_EXTEND ||
+	       prop == WORD_BREAK_PROP_FORMAT ||
+	       prop == WORD_BREAK_PROP_ZWJ;
+}
+
+static void
+word_skip_shift_callback(uint_least8_t prop, void *s)
+{
+	struct word_break_state *state = (struct word_break_state *)s;
+
+	if (prop == WORD_BREAK_PROP_REGIONAL_INDICATOR) {
+		/*
+		 * The property we just shifted in is
+		 * a regional indicator, increasing the
+		 * number of consecutive RIs on the left
+		 * side of the breakpoint by one, changing
+		 * the oddness.
+		 *
+		 */
+		state->ri_even = !(state->ri_even);
+	} else {
+		/*
+		 * We saw no regional indicator, so the
+		 * number of consecutive RIs on the left
+		 * side of the breakpoint is zero, which
+		 * is an even number.
+		 *
+		 */
+		state->ri_even = true;
+	}
+}
+
+static size_t
+next_word_break(HERODOTUS_READER *r)
+{
+	struct proper p;
+	struct word_break_state state = { .ri_even = true };
+
+	/*
+	 * Apply word breaking algorithm (UAX #29), see
+	 * https://unicode.org/reports/tr29/#Word_Boundary_Rules
+	 */
+	proper_init(r, &state, NUM_WORD_BREAK_PROPS, get_word_break_prop,
+	            is_skippable_word_prop, word_skip_shift_callback, &p);
+
+	while (!proper_advance(&p)) {
+		/* WB3 */
+		if (p.raw.prev_prop[0] == WORD_BREAK_PROP_CR &&
+		    p.raw.next_prop[0] == WORD_BREAK_PROP_LF) {
+			continue;
+		}
+
+		/* WB3a */
+		if (p.raw.prev_prop[0] == WORD_BREAK_PROP_NEWLINE ||
+		    p.raw.prev_prop[0] == WORD_BREAK_PROP_CR      ||
+		    p.raw.prev_prop[0] == WORD_BREAK_PROP_LF) {
+			break;
+		}
+
+		/* WB3b */
+		if (p.raw.next_prop[0] == WORD_BREAK_PROP_NEWLINE ||
+		    p.raw.next_prop[0] == WORD_BREAK_PROP_CR      ||
+		    p.raw.next_prop[0] == WORD_BREAK_PROP_LF) {
+			break;
+		}
+
+		/* WB3c */
+		if (p.raw.prev_prop[0] == WORD_BREAK_PROP_ZWJ &&
+		    (p.raw.next_prop[0] == WORD_BREAK_PROP_EXTENDED_PICTOGRAPHIC ||
+		     p.raw.next_prop[0] == WORD_BREAK_PROP_BOTH_ALETTER_EXTPICT)) {
+			continue;
+		}
+
+		/* WB3d */
+		if (p.raw.prev_prop[0] == WORD_BREAK_PROP_WSEGSPACE &&
+		    p.raw.next_prop[0] == WORD_BREAK_PROP_WSEGSPACE) {
+			continue;
+		}
+
+		/* WB4 */
+		if (p.raw.next_prop[0] == WORD_BREAK_PROP_EXTEND ||
+		    p.raw.next_prop[0] == WORD_BREAK_PROP_FORMAT ||
+		    p.raw.next_prop[0] == WORD_BREAK_PROP_ZWJ) {
+			continue;
+		}
+
+		/* WB5 */
+		if ((p.skip.prev_prop[0] == WORD_BREAK_PROP_ALETTER              ||
+		     p.skip.prev_prop[0] == WORD_BREAK_PROP_BOTH_ALETTER_EXTPICT ||
+		     p.skip.prev_prop[0] == WORD_BREAK_PROP_HEBREW_LETTER) &&
+		    (p.skip.next_prop[0] == WORD_BREAK_PROP_ALETTER              ||
+		     p.skip.next_prop[0] == WORD_BREAK_PROP_BOTH_ALETTER_EXTPICT ||
+		     p.skip.next_prop[0] == WORD_BREAK_PROP_HEBREW_LETTER)) {
+			continue;
+		}
+
+		/* WB6 */
+		if ((p.skip.prev_prop[0] == WORD_BREAK_PROP_ALETTER              ||
+		     p.skip.prev_prop[0] == WORD_BREAK_PROP_BOTH_ALETTER_EXTPICT ||
+		     p.skip.prev_prop[0] == WORD_BREAK_PROP_HEBREW_LETTER) &&
+		    (p.skip.next_prop[0] == WORD_BREAK_PROP_MIDLETTER    ||
+		     p.skip.next_prop[0] == WORD_BREAK_PROP_MIDNUMLET    ||
+		     p.skip.next_prop[0] == WORD_BREAK_PROP_SINGLE_QUOTE) &&
+		    (p.skip.next_prop[1] == WORD_BREAK_PROP_ALETTER              ||
+		     p.skip.next_prop[1] == WORD_BREAK_PROP_BOTH_ALETTER_EXTPICT ||
+		     p.skip.next_prop[1] == WORD_BREAK_PROP_HEBREW_LETTER)) {
+			continue;
+		}
+
+		/* WB7 */
+		if ((p.skip.prev_prop[0] == WORD_BREAK_PROP_MIDLETTER    ||
+		     p.skip.prev_prop[0] == WORD_BREAK_PROP_MIDNUMLET    ||
+		     p.skip.prev_prop[0] == WORD_BREAK_PROP_SINGLE_QUOTE) &&
+		    (p.skip.next_prop[0] == WORD_BREAK_PROP_ALETTER              ||
+		     p.skip.next_prop[0] == WORD_BREAK_PROP_BOTH_ALETTER_EXTPICT ||
+		     p.skip.next_prop[0] == WORD_BREAK_PROP_HEBREW_LETTER) &&
+		    (p.skip.prev_prop[1] == WORD_BREAK_PROP_ALETTER              ||
+		     p.skip.prev_prop[1] == WORD_BREAK_PROP_BOTH_ALETTER_EXTPICT ||
+		     p.skip.prev_prop[1] == WORD_BREAK_PROP_HEBREW_LETTER)) {
+			continue;
+		}
+
+		/* WB7a */
+		if (p.skip.prev_prop[0] == WORD_BREAK_PROP_HEBREW_LETTER &&
+		    p.skip.next_prop[0] == WORD_BREAK_PROP_SINGLE_QUOTE) {
+			continue;
+		}
+
+		/* WB7b */
+		if (p.skip.prev_prop[0] == WORD_BREAK_PROP_HEBREW_LETTER &&
+		    p.skip.next_prop[0] == WORD_BREAK_PROP_DOUBLE_QUOTE &&
+		    p.skip.next_prop[1] == WORD_BREAK_PROP_HEBREW_LETTER) {
+			continue;
+		}
+
+		/* WB7c */
+		if (p.skip.prev_prop[0] == WORD_BREAK_PROP_DOUBLE_QUOTE &&
+		    p.skip.next_prop[0] == WORD_BREAK_PROP_HEBREW_LETTER &&
+		    p.skip.prev_prop[1] == WORD_BREAK_PROP_HEBREW_LETTER) {
+			continue;
+		}
+
+		/* WB8 */
+		if (p.skip.prev_prop[0] == WORD_BREAK_PROP_NUMERIC &&
+		    p.skip.next_prop[0] == WORD_BREAK_PROP_NUMERIC) {
+			continue;
+		}
+
+		/* WB9 */
+		if ((p.skip.prev_prop[0] == WORD_BREAK_PROP_ALETTER              ||
+		     p.skip.prev_prop[0] == WORD_BREAK_PROP_BOTH_ALETTER_EXTPICT ||
+		     p.skip.prev_prop[0] == WORD_BREAK_PROP_HEBREW_LETTER) &&
+		    p.skip.next_prop[0] == WORD_BREAK_PROP_NUMERIC) {
+			continue;
+		}
+
+		/* WB10 */
+		if (p.skip.prev_prop[0] == WORD_BREAK_PROP_NUMERIC &&
+		    (p.skip.next_prop[0] == WORD_BREAK_PROP_ALETTER              ||
+		     p.skip.next_prop[0] == WORD_BREAK_PROP_BOTH_ALETTER_EXTPICT ||
+		     p.skip.next_prop[0] == WORD_BREAK_PROP_HEBREW_LETTER)) {
+			continue;
+		}
+
+		/* WB11 */
+		if ((p.skip.prev_prop[0] == WORD_BREAK_PROP_MIDNUM       ||
+		     p.skip.prev_prop[0] == WORD_BREAK_PROP_MIDNUMLET    ||
+		     p.skip.prev_prop[0] == WORD_BREAK_PROP_SINGLE_QUOTE) &&
+		    p.skip.next_prop[0] == WORD_BREAK_PROP_NUMERIC &&
+		    p.skip.prev_prop[1] == WORD_BREAK_PROP_NUMERIC) {
+			continue;
+		}
+
+		/* WB12 */
+		if (p.skip.prev_prop[0] == WORD_BREAK_PROP_NUMERIC &&
+		    (p.skip.next_prop[0] == WORD_BREAK_PROP_MIDNUM       ||
+		     p.skip.next_prop[0] == WORD_BREAK_PROP_MIDNUMLET    ||
+		     p.skip.next_prop[0] == WORD_BREAK_PROP_SINGLE_QUOTE) &&
+		    p.skip.next_prop[1] == WORD_BREAK_PROP_NUMERIC) {
+			continue;
+		}
+
+		/* WB13 */
+		if (p.skip.prev_prop[0] == WORD_BREAK_PROP_KATAKANA &&
+		    p.skip.next_prop[0] == WORD_BREAK_PROP_KATAKANA) {
+			continue;
+		}
+
+		/* WB13a */
+		if ((p.skip.prev_prop[0] == WORD_BREAK_PROP_ALETTER              ||
+		     p.skip.prev_prop[0] == WORD_BREAK_PROP_BOTH_ALETTER_EXTPICT ||
+		     p.skip.prev_prop[0] == WORD_BREAK_PROP_HEBREW_LETTER        ||
+		     p.skip.prev_prop[0] == WORD_BREAK_PROP_NUMERIC              ||
+		     p.skip.prev_prop[0] == WORD_BREAK_PROP_KATAKANA             ||
+		     p.skip.prev_prop[0] == WORD_BREAK_PROP_EXTENDNUMLET) &&
+		    p.skip.next_prop[0] == WORD_BREAK_PROP_EXTENDNUMLET) {
+			continue;
+		}
+
+		/* WB13b */
+		if (p.skip.prev_prop[0] == WORD_BREAK_PROP_EXTENDNUMLET &&
+		    (p.skip.next_prop[0] == WORD_BREAK_PROP_ALETTER              ||
+		     p.skip.next_prop[0] == WORD_BREAK_PROP_BOTH_ALETTER_EXTPICT ||
+		     p.skip.next_prop[0] == WORD_BREAK_PROP_HEBREW_LETTER        ||
+		     p.skip.next_prop[0] == WORD_BREAK_PROP_NUMERIC              ||
+		     p.skip.next_prop[0] == WORD_BREAK_PROP_KATAKANA)) {
+			continue;
+		}
+
+		/* WB15 and WB16 */
+		if (!state.ri_even &&
+		    p.skip.next_prop[0] == WORD_BREAK_PROP_REGIONAL_INDICATOR) {
+			continue;
+		}
+
+		/* WB999 */
+		break;
+	}
+
+	return herodotus_reader_number_read(&(p.mid_reader));
+}
+
+size_t
+grapheme_next_word_break(const uint_least32_t *str, size_t len)
+{
+	HERODOTUS_READER r;
+
+	herodotus_reader_init(&r, HERODOTUS_TYPE_CODEPOINT, str, len);
+
+	return next_word_break(&r);
+}
+
+size_t
+grapheme_next_word_break_utf8(const char *str, size_t len)
+{
+	HERODOTUS_READER r;
+
+	herodotus_reader_init(&r, HERODOTUS_TYPE_UTF8, str, len);
+
+	return next_word_break(&r);
+}
--- a/libs/libgrapheme-2.0.2/src/word.o
+++ b/libs/libgrapheme-2.0.2/src/word.o