Initial Commit

This commit is contained in:
2025-08-30 16:07:19 +01:00
commit d86c15e30c
169 changed files with 121377 additions and 0 deletions

View File

@@ -0,0 +1,67 @@
if [ "$ENCODING" = "utf8" ]; then
UNIT="byte"
ARRAYTYPE="UTF-8-encoded string"
SUFFIX="_utf8"
ANTISUFFIX=""
DATATYPE="char"
else
UNIT="codepoint"
ARRAYTYPE="codepoint array"
SUFFIX=""
ANTISUFFIX="_utf8"
DATATYPE="uint_least32_t"
fi
cat << EOF
.Dd ${MAN_DATE}
.Dt GRAPHEME_IS_$(printf "%s%s" "$CASE" "$SUFFIX" | tr [:lower:] [:upper:]) 3
.Os suckless.org
.Sh NAME
.Nm grapheme_is_${CASE}${SUFFIX}
.Nd check if ${ARRAYTYPE} is ${CASE}
.Sh SYNOPSIS
.In grapheme.h
.Ft size_t
.Fn grapheme_is_${CASE}${SUFFIX} "const ${DATATYPE} *str" "size_t len" "size_t *caselen"
.Sh DESCRIPTION
The
.Fn grapheme_is_${CASE}${SUFFIX}
function checks if the ${ARRAYTYPE}
.Va str
is ${CASE} and writes the length of the matching ${CASE}-sequence to the integer pointed to by
.Va caselen ,
unless
.Va caselen
is set to
.Dv NULL .
.Pp
If
.Va len
is set to
.Dv SIZE_MAX
(stdint.h is already included by grapheme.h) the ${ARRAYTYPE}
.Va src
is interpreted to be NUL-terminated and processing stops when a
NUL-byte is encountered.
.Pp
For $(if [ "$ENCODING" != "utf8" ]; then printf "UTF-8-encoded"; else printf "non-UTF-8"; fi) input data
.Xr grapheme_is_${CASE}${ANTISUFFIX} 3
can be used instead.
.Sh RETURN VALUES
The
.Fn grapheme_is_${CASE}${SUFFIX}
function returns
.Dv true
if the ${ARRAYTYPE}
.Va str
is ${CASE}, otherwise
.Dv false .
.Sh SEE ALSO
.Xr grapheme_is_${CASE}${ANTISUFFIX} 3 ,
.Xr libgrapheme 7
.Sh STANDARDS
.Fn grapheme_is_${CASE}${SUFFIX}
is compliant with the Unicode ${UNICODE_VERSION} specification.
.Sh AUTHORS
.An Laslo Hunhold Aq Mt dev@frign.de
EOF

View File

@@ -0,0 +1,112 @@
if [ "$ENCODING" = "utf8" ]; then
UNIT="byte"
SUFFIX="_utf8"
ANTISUFFIX=""
else
UNIT="codepoint"
SUFFIX=""
ANTISUFFIX="_utf8"
fi
cat << EOF
.Dd ${MAN_DATE}
.Dt GRAPHEME_NEXT_$(printf "%s_break%s" "$TYPE" "$SUFFIX" | tr [:lower:] [:upper:]) 3
.Os suckless.org
.Sh NAME
.Nm grapheme_next_${TYPE}_break${SUFFIX}
.Nd determine ${UNIT}-offset to next ${REALTYPE} break
.Sh SYNOPSIS
.In grapheme.h
.Ft size_t
.Fn grapheme_next_${TYPE}_break${SUFFIX} "const $(if [ "$ENCODING" = "utf8" ]; then printf "char"; else printf "uint_least32_t"; fi) *str" "size_t len"
.Sh DESCRIPTION
The
.Fn grapheme_next_${TYPE}_break${SUFFIX}
function computes the offset (in ${UNIT}s) to the next ${REALTYPE}
break (see
.Xr libgrapheme 7 )
in the $(if [ "$ENCODING" = "utf8" ]; then printf "UTF-8-encoded string"; else printf "codepoint array"; fi)
.Va str
of length
.Va len .$(if [ "$TYPE" != "line" ]; then printf "\nIf a ${REALTYPE} begins at
.Va str
this offset is equal to the length of said ${REALTYPE}."; fi)
.Pp
If
.Va len
is set to
.Dv SIZE_MAX
(stdint.h is already included by grapheme.h) the string
.Va str
is interpreted to be NUL-terminated and processing stops when
a $(if [ "$ENCODING" = "utf8" ]; then printf "NUL-byte"; else printf "codepoint with the value 0"; fi) is encountered.
.Pp
For $(if [ "$ENCODING" != "utf8" ]; then printf "UTF-8-encoded"; else printf "non-UTF-8"; fi) input
data$(if [ "$TYPE" = "character" ] && [ "$ENCODING" = "utf8" ]; then printf "\n.Xr grapheme_is_character_break 3 and"; fi)
.Xr grapheme_next_${TYPE}_break${ANTISUFFIX} 3
can be used instead.
.Sh RETURN VALUES
The
.Fn grapheme_next_${TYPE}_break${SUFFIX}
function returns the offset (in ${UNIT}s) to the next ${REALTYPE}
break in
.Va str
or 0 if
.Va str
is
.Dv NULL .
EOF
if [ "$ENCODING" = "utf8" ]; then
cat << EOF
.Sh EXAMPLES
.Bd -literal
/* cc (-static) -o example example.c -lgrapheme */
#include <grapheme.h>
#include <stdint.h>
#include <stdio.h>
int
main(void)
{
/* UTF-8 encoded input */
char *s = "T\\\\xC3\\\\xABst \\\\xF0\\\\x9F\\\\x91\\\\xA8\\\\xE2\\\\x80\\\\x8D\\\\xF0"
"\\\\x9F\\\\x91\\\\xA9\\\\xE2\\\\x80\\\\x8D\\\\xF0\\\\x9F\\\\x91\\\\xA6 \\\\xF0"
"\\\\x9F\\\\x87\\\\xBA\\\\xF0\\\\x9F\\\\x87\\\\xB8 \\\\xE0\\\\xA4\\\\xA8\\\\xE0"
"\\\\xA5\\\\x80 \\\\xE0\\\\xAE\\\\xA8\\\\xE0\\\\xAE\\\\xBF!";
size_t ret, len, off;
printf("Input: \\\\"%s\\\\"\\\\n", s);
/* print each ${REALTYPE} with byte-length */
printf("${REALTYPE}s in NUL-delimited input:\\\\n");
for (off = 0; s[off] != '\\\\0'; off += ret) {
ret = grapheme_next_${TYPE}_break_utf8(s + off, SIZE_MAX);
printf("%2zu bytes | %.*s\\\\n", ret, (int)ret, s + off);
}
printf("\\\\n");
/* do the same, but this time string is length-delimited */
len = 17;
printf("${REALTYPE}s in input delimited to %zu bytes:\\\\n", len);
for (off = 0; off < len; off += ret) {
ret = grapheme_next_${TYPE}_break_utf8(s + off, len - off);
printf("%2zu bytes | %.*s\\\\n", ret, (int)ret, s + off);
}
return 0;
}
.Ed
EOF
fi
cat << EOF
.Sh SEE ALSO$(if [ "$TYPE" = "character" ] && [ "$ENCODING" != "utf8" ]; then printf "\n.Xr grapheme_is_character_break 3 ,"; fi)
.Xr grapheme_next_${TYPE}_break${ANTISUFFIX} 3 ,
.Xr libgrapheme 7
.Sh STANDARDS
.Fn grapheme_next_${TYPE}_break${SUFFIX}
is compliant with the Unicode ${UNICODE_VERSION} specification.
.Sh AUTHORS
.An Laslo Hunhold Aq Mt dev@frign.de
EOF

View File

@@ -0,0 +1,72 @@
if [ "$ENCODING" = "utf8" ]; then
UNIT="byte"
ARRAYTYPE="UTF-8-encoded string"
SUFFIX="_utf8"
ANTISUFFIX=""
DATATYPE="char"
else
UNIT="codepoint"
ARRAYTYPE="codepoint array"
SUFFIX=""
ANTISUFFIX="_utf8"
DATATYPE="uint_least32_t"
fi
cat << EOF
.Dd ${MAN_DATE}
.Dt GRAPHEME_TO_$(printf "%s%s" "$CASE" "$SUFFIX" | tr [:lower:] [:upper:]) 3
.Os suckless.org
.Sh NAME
.Nm grapheme_to_${CASE}${SUFFIX}
.Nd convert ${ARRAYTYPE} to ${CASE}
.Sh SYNOPSIS
.In grapheme.h
.Ft size_t
.Fn grapheme_to_${CASE}${SUFFIX} "const ${DATATYPE} *src" "size_t srclen" "${DATATYPE} *dest" "size_t destlen"
.Sh DESCRIPTION
The
.Fn grapheme_to_${CASE}${SUFFIX}
function converts the ${ARRAYTYPE}
.Va str
to ${CASE} and writes the result to
.Va dest
up to
.Va destlen ,
unless
.Va dest
is set to
.Dv NULL .
.Pp
If
.Va srclen
is set to
.Dv SIZE_MAX
(stdint.h is already included by grapheme.h) the ${ARRAYTYPE}
.Va src
is interpreted to be NUL-terminated and processing stops when a
NUL-byte is encountered.
.Pp
For $(if [ "$ENCODING" != "utf8" ]; then printf "UTF-8-encoded"; else printf "non-UTF-8"; fi) input data
.Xr grapheme_to_${CASE}${ANTISUFFIX} 3
can be used instead.
.Sh RETURN VALUES
The
.Fn grapheme_to_${CASE}${SUFFIX}
function returns the number of ${UNIT}s in the array resulting
from converting
.Va src
to ${CASE}, even if
.Va destlen
is not large enough or
.Va dest
is
.Dv NULL .
.Sh SEE ALSO
.Xr grapheme_to_${CASE}${ANTISUFFIX} 3 ,
.Xr libgrapheme 7
.Sh STANDARDS
.Fn grapheme_to_${CASE}${SUFFIX}
is compliant with the Unicode ${UNICODE_VERSION} specification.
.Sh AUTHORS
.An Laslo Hunhold Aq Mt dev@frign.de
EOF