Initial Commit
This commit is contained in:
100
libs/libgrapheme-2.0.2/man/grapheme_decode_utf8.3
Normal file
100
libs/libgrapheme-2.0.2/man/grapheme_decode_utf8.3
Normal file
@@ -0,0 +1,100 @@
|
||||
.Dd 2022-10-06
|
||||
.Dt GRAPHEME_DECODE_UTF8 3
|
||||
.Os suckless.org
|
||||
.Sh NAME
|
||||
.Nm grapheme_decode_utf8
|
||||
.Nd decode first codepoint in UTF-8-encoded string
|
||||
.Sh SYNOPSIS
|
||||
.In grapheme.h
|
||||
.Ft size_t
|
||||
.Fn grapheme_decode_utf8 "const char *str" "size_t len" "uint_least32_t *cp"
|
||||
.Sh DESCRIPTION
|
||||
The
|
||||
.Fn grapheme_decode_utf8
|
||||
function decodes the first codepoint in the UTF-8-encoded string
|
||||
.Va str
|
||||
of length
|
||||
.Va len .
|
||||
If the UTF-8-sequence is invalid (overlong encoding, unexpected byte,
|
||||
string ends unexpectedly, empty string, etc.) the decoding is stopped
|
||||
at the last processed byte and the decoded codepoint set to
|
||||
.Dv GRAPHEME_INVALID_CODEPOINT .
|
||||
.Pp
|
||||
If
|
||||
.Va cp
|
||||
is not
|
||||
.Dv NULL
|
||||
the decoded codepoint is stored in the memory pointed to by
|
||||
.Va cp .
|
||||
.Pp
|
||||
Given NUL has a unique 1 byte representation, it is safe to operate on
|
||||
NUL-terminated strings by setting
|
||||
.Va len
|
||||
to
|
||||
.Dv SIZE_MAX
|
||||
(stdint.h is already included by grapheme.h) and terminating when
|
||||
.Va cp
|
||||
is 0 (see
|
||||
.Sx EXAMPLES
|
||||
for an example).
|
||||
.Sh RETURN VALUES
|
||||
The
|
||||
.Fn grapheme_decode_utf8
|
||||
function returns the number of processed bytes and 0 if
|
||||
.Va str
|
||||
is
|
||||
.Dv NULL
|
||||
or
|
||||
.Va len
|
||||
is 0.
|
||||
If the string ends unexpectedly in a multibyte sequence, the desired
|
||||
length (that is larger than
|
||||
.Va len )
|
||||
is returned.
|
||||
.Sh EXAMPLES
|
||||
.Bd -literal
|
||||
/* cc (-static) -o example example.c -lgrapheme */
|
||||
#include <grapheme.h>
|
||||
#include <inttypes.h>
|
||||
#include <stdio.h>
|
||||
|
||||
void
|
||||
print_cps(const char *str, size_t len)
|
||||
{
|
||||
size_t ret, off;
|
||||
uint_least32_t cp;
|
||||
|
||||
for (off = 0; off < len; off += ret) {
|
||||
if ((ret = grapheme_decode_utf8(str + off,
|
||||
len - off, &cp)) > (len - off)) {
|
||||
/*
|
||||
* string ended unexpectedly in the middle of a
|
||||
* multibyte sequence and we have the choice
|
||||
* here to possibly expand str by ret - len + off
|
||||
* bytes to get a full sequence, but we just
|
||||
* bail out in this case.
|
||||
*/
|
||||
break;
|
||||
}
|
||||
printf("%"PRIxLEAST32"\\n", cp);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
print_cps_nul_terminated(const char *str)
|
||||
{
|
||||
size_t ret, off;
|
||||
uint_least32_t cp;
|
||||
|
||||
for (off = 0; (ret = grapheme_decode_utf8(str + off,
|
||||
SIZE_MAX, &cp)) > 0 &&
|
||||
cp != 0; off += ret) {
|
||||
printf("%"PRIxLEAST32"\\n", cp);
|
||||
}
|
||||
}
|
||||
.Ed
|
||||
.Sh SEE ALSO
|
||||
.Xr grapheme_encode_utf8 3 ,
|
||||
.Xr libgrapheme 7
|
||||
.Sh AUTHORS
|
||||
.An Laslo Hunhold Aq Mt dev@frign.de
|
Reference in New Issue
Block a user