Initial Commit
This commit is contained in:
580
libs/libgrapheme-2.0.2/test/case.c
Normal file
580
libs/libgrapheme-2.0.2/test/case.c
Normal file
@@ -0,0 +1,580 @@
|
||||
/* See LICENSE file for copyright and license details. */
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "../grapheme.h"
|
||||
#include "util.h"
|
||||
|
||||
struct unit_test_is_case_utf8 {
|
||||
const char *description;
|
||||
struct {
|
||||
const char *src;
|
||||
size_t srclen;
|
||||
} input;
|
||||
struct {
|
||||
bool ret;
|
||||
size_t caselen;
|
||||
} output;
|
||||
};
|
||||
|
||||
struct unit_test_to_case_utf8 {
|
||||
const char *description;
|
||||
struct {
|
||||
const char *src;
|
||||
size_t srclen;
|
||||
size_t destlen;
|
||||
} input;
|
||||
struct {
|
||||
const char *dest;
|
||||
size_t ret;
|
||||
} output;
|
||||
};
|
||||
|
||||
static const struct unit_test_is_case_utf8 is_lowercase_utf8[] = {
|
||||
{
|
||||
.description = "empty input",
|
||||
.input = { "", 0 },
|
||||
.output = { true, 0 },
|
||||
},
|
||||
{
|
||||
.description = "one character, violation",
|
||||
.input = { "A", 1 },
|
||||
.output = { false, 0 },
|
||||
},
|
||||
{
|
||||
.description = "one character, confirmation",
|
||||
.input = { "\xC3\x9F", 2 },
|
||||
.output = { true, 2 },
|
||||
},
|
||||
{
|
||||
.description = "one character, violation, NUL-terminated",
|
||||
.input = { "A", SIZE_MAX },
|
||||
.output = { false, 0 },
|
||||
},
|
||||
{
|
||||
.description = "one character, confirmation, NUL-terminated",
|
||||
.input = { "\xC3\x9F", SIZE_MAX },
|
||||
.output = { true, 2 },
|
||||
},
|
||||
{
|
||||
.description = "one word, violation",
|
||||
.input = { "Hello", 5 },
|
||||
.output = { false, 0 },
|
||||
},
|
||||
{
|
||||
.description = "one word, partial confirmation",
|
||||
.input = { "gru" "\xC3\x9F" "fOrmel", 11 },
|
||||
.output = { false, 6 },
|
||||
},
|
||||
{
|
||||
.description = "one word, full confirmation",
|
||||
.input = { "gru" "\xC3\x9F" "formel", 11 },
|
||||
.output = { true, 11 },
|
||||
},
|
||||
{
|
||||
.description = "one word, violation, NUL-terminated",
|
||||
.input = { "Hello", SIZE_MAX },
|
||||
.output = { false, 0 },
|
||||
},
|
||||
{
|
||||
.description = "one word, partial confirmation, NUL-terminated",
|
||||
.input = { "gru" "\xC3\x9F" "fOrmel", SIZE_MAX },
|
||||
.output = { false, 6 },
|
||||
},
|
||||
{
|
||||
.description = "one word, full confirmation, NUL-terminated",
|
||||
.input = { "gru" "\xC3\x9F" "formel", SIZE_MAX },
|
||||
.output = { true, 11 },
|
||||
},
|
||||
};
|
||||
|
||||
static const struct unit_test_is_case_utf8 is_uppercase_utf8[] = {
|
||||
{
|
||||
.description = "empty input",
|
||||
.input = { "", 0 },
|
||||
.output = { true, 0 },
|
||||
},
|
||||
{
|
||||
.description = "one character, violation",
|
||||
.input = { "\xC3\x9F", 2 },
|
||||
.output = { false, 0 },
|
||||
},
|
||||
{
|
||||
.description = "one character, confirmation",
|
||||
.input = { "A", 1 },
|
||||
.output = { true, 1 },
|
||||
},
|
||||
{
|
||||
.description = "one character, violation, NUL-terminated",
|
||||
.input = { "\xC3\x9F", SIZE_MAX },
|
||||
.output = { false, 0 },
|
||||
},
|
||||
{
|
||||
.description = "one character, confirmation, NUL-terminated",
|
||||
.input = { "A", SIZE_MAX },
|
||||
.output = { true, 1 },
|
||||
},
|
||||
{
|
||||
.description = "one word, violation",
|
||||
.input = { "hello", 5 },
|
||||
.output = { false, 0 },
|
||||
},
|
||||
{
|
||||
.description = "one word, partial confirmation",
|
||||
.input = { "GRU" "\xC3\x9F" "formel", 11 },
|
||||
.output = { false, 3 },
|
||||
},
|
||||
{
|
||||
.description = "one word, full confirmation",
|
||||
.input = { "HELLO", 5 },
|
||||
.output = { true, 5 },
|
||||
},
|
||||
{
|
||||
.description = "one word, violation, NUL-terminated",
|
||||
.input = { "hello", SIZE_MAX },
|
||||
.output = { false, 0 },
|
||||
},
|
||||
{
|
||||
.description = "one word, partial confirmation, NUL-terminated",
|
||||
.input = { "GRU" "\xC3\x9F" "formel", SIZE_MAX },
|
||||
.output = { false, 3 },
|
||||
},
|
||||
{
|
||||
.description = "one word, full confirmation, NUL-terminated",
|
||||
.input = { "HELLO", SIZE_MAX },
|
||||
.output = { true, 5 },
|
||||
},
|
||||
};
|
||||
|
||||
static const struct unit_test_is_case_utf8 is_titlecase_utf8[] = {
|
||||
{
|
||||
.description = "empty input",
|
||||
.input = { "", 0 },
|
||||
.output = { true, 0 },
|
||||
},
|
||||
{
|
||||
.description = "one character, violation",
|
||||
.input = { "\xC3\x9F", 2 },
|
||||
.output = { false, 0 },
|
||||
},
|
||||
{
|
||||
.description = "one character, confirmation",
|
||||
.input = { "A", 1 },
|
||||
.output = { true, 1 },
|
||||
},
|
||||
{
|
||||
.description = "one character, violation, NUL-terminated",
|
||||
.input = { "\xC3\x9F", SIZE_MAX },
|
||||
.output = { false, 0 },
|
||||
},
|
||||
{
|
||||
.description = "one character, confirmation, NUL-terminated",
|
||||
.input = { "A", SIZE_MAX },
|
||||
.output = { true, 1 },
|
||||
},
|
||||
{
|
||||
.description = "one word, violation",
|
||||
.input = { "hello", 5 },
|
||||
.output = { false, 0 },
|
||||
},
|
||||
{
|
||||
.description = "one word, partial confirmation",
|
||||
.input = { "Gru" "\xC3\x9F" "fOrmel", 11 },
|
||||
.output = { false, 6 },
|
||||
},
|
||||
{
|
||||
.description = "one word, full confirmation",
|
||||
.input = { "Gru" "\xC3\x9F" "formel", 11 },
|
||||
.output = { true, 11 },
|
||||
},
|
||||
{
|
||||
.description = "one word, violation, NUL-terminated",
|
||||
.input = { "hello", SIZE_MAX },
|
||||
.output = { false, 0 },
|
||||
},
|
||||
{
|
||||
.description = "one word, partial confirmation, NUL-terminated",
|
||||
.input = { "Gru" "\xC3\x9F" "fOrmel", SIZE_MAX },
|
||||
.output = { false, 6 },
|
||||
},
|
||||
{
|
||||
.description = "one word, full confirmation, NUL-terminated",
|
||||
.input = { "Gru" "\xC3\x9F" "formel", SIZE_MAX },
|
||||
.output = { true, 11 },
|
||||
},
|
||||
{
|
||||
.description = "multiple words, partial confirmation",
|
||||
.input = { "Hello Gru" "\xC3\x9F" "fOrmel!", 18 },
|
||||
.output = { false, 12 },
|
||||
},
|
||||
{
|
||||
.description = "multiple words, full confirmation",
|
||||
.input = { "Hello Gru" "\xC3\x9F" "formel!", 18 },
|
||||
.output = { true, 18 },
|
||||
},
|
||||
{
|
||||
.description = "multiple words, partial confirmation, NUL-terminated",
|
||||
.input = { "Hello Gru" "\xC3\x9F" "fOrmel!", SIZE_MAX },
|
||||
.output = { false, 12 },
|
||||
},
|
||||
{
|
||||
.description = "multiple words, full confirmation, NUL-terminated",
|
||||
.input = { "Hello Gru" "\xC3\x9F" "formel!", SIZE_MAX },
|
||||
.output = { true, 18 },
|
||||
},
|
||||
};
|
||||
|
||||
static const struct unit_test_to_case_utf8 to_lowercase_utf8[] = {
|
||||
{
|
||||
.description = "empty input",
|
||||
.input = { "", 0, 10 },
|
||||
.output = { "", 0 },
|
||||
},
|
||||
{
|
||||
.description = "empty output",
|
||||
.input = { "hello", 5, 0 },
|
||||
.output = { "", 5 },
|
||||
},
|
||||
{
|
||||
.description = "one character, conversion",
|
||||
.input = { "A", 1, 10 },
|
||||
.output = { "a", 1 },
|
||||
},
|
||||
{
|
||||
.description = "one character, no conversion",
|
||||
.input = { "\xC3\x9F", 2, 10 },
|
||||
.output = { "\xC3\x9F", 2 },
|
||||
},
|
||||
{
|
||||
.description = "one character, conversion, truncation",
|
||||
.input = { "A", 1, 0 },
|
||||
.output = { "", 1 },
|
||||
},
|
||||
{
|
||||
.description = "one character, conversion, NUL-terminated",
|
||||
.input = { "A", SIZE_MAX, 10 },
|
||||
.output = { "a", 1 },
|
||||
},
|
||||
{
|
||||
.description = "one character, no conversion, NUL-terminated",
|
||||
.input = { "\xC3\x9F", SIZE_MAX, 10 },
|
||||
.output = { "\xC3\x9F", 2 },
|
||||
},
|
||||
{
|
||||
.description = "one character, conversion, NUL-terminated, truncation",
|
||||
.input = { "A", SIZE_MAX, 0 },
|
||||
.output = { "", 1 },
|
||||
},
|
||||
{
|
||||
.description = "one word, conversion",
|
||||
.input = { "wOrD", 4, 10 },
|
||||
.output = { "word", 4 },
|
||||
},
|
||||
{
|
||||
.description = "one word, no conversion",
|
||||
.input = { "word", 4, 10 },
|
||||
.output = { "word", 4 },
|
||||
},
|
||||
{
|
||||
.description = "one word, conversion, truncation",
|
||||
.input = { "wOrD", 4, 3 },
|
||||
.output = { "wo", 4 },
|
||||
},
|
||||
{
|
||||
.description = "one word, conversion, NUL-terminated",
|
||||
.input = { "wOrD", SIZE_MAX, 10 },
|
||||
.output = { "word", 4 },
|
||||
},
|
||||
{
|
||||
.description = "one word, no conversion, NUL-terminated",
|
||||
.input = { "word", SIZE_MAX, 10 },
|
||||
.output = { "word", 4 },
|
||||
},
|
||||
{
|
||||
.description = "one word, conversion, NUL-terminated, truncation",
|
||||
.input = { "wOrD", SIZE_MAX, 3 },
|
||||
.output = { "wo", 4 },
|
||||
},
|
||||
};
|
||||
|
||||
static const struct unit_test_to_case_utf8 to_uppercase_utf8[] = {
|
||||
{
|
||||
.description = "empty input",
|
||||
.input = { "", 0, 10 },
|
||||
.output = { "", 0 },
|
||||
},
|
||||
{
|
||||
.description = "empty output",
|
||||
.input = { "hello", 5, 0 },
|
||||
.output = { "", 5 },
|
||||
},
|
||||
{
|
||||
.description = "one character, conversion",
|
||||
.input = { "\xC3\x9F", 2, 10 },
|
||||
.output = { "SS", 2 },
|
||||
},
|
||||
{
|
||||
.description = "one character, no conversion",
|
||||
.input = { "A", 1, 10 },
|
||||
.output = { "A", 1 },
|
||||
},
|
||||
{
|
||||
.description = "one character, conversion, truncation",
|
||||
.input = { "\xC3\x9F", 2, 0 },
|
||||
.output = { "", 2 },
|
||||
},
|
||||
{
|
||||
.description = "one character, conversion, NUL-terminated",
|
||||
.input = { "\xC3\x9F", SIZE_MAX, 10 },
|
||||
.output = { "SS", 2 },
|
||||
},
|
||||
{
|
||||
.description = "one character, no conversion, NUL-terminated",
|
||||
.input = { "A", SIZE_MAX, 10 },
|
||||
.output = { "A", 1 },
|
||||
},
|
||||
{
|
||||
.description = "one character, conversion, NUL-terminated, truncation",
|
||||
.input = { "\xC3\x9F", SIZE_MAX, 0 },
|
||||
.output = { "", 2 },
|
||||
},
|
||||
{
|
||||
.description = "one word, conversion",
|
||||
.input = { "gRu" "\xC3\x9F" "fOrMel", 11, 15 },
|
||||
.output = { "GRUSSFORMEL", 11 },
|
||||
},
|
||||
{
|
||||
.description = "one word, no conversion",
|
||||
.input = { "WORD", 4, 10 },
|
||||
.output = { "WORD", 4 },
|
||||
},
|
||||
{
|
||||
.description = "one word, conversion, truncation",
|
||||
.input = { "gRu" "\xC3\x9F" "formel", 11, 5 },
|
||||
.output = { "GRUS", 11 },
|
||||
},
|
||||
{
|
||||
.description = "one word, conversion, NUL-terminated",
|
||||
.input = { "gRu" "\xC3\x9F" "formel", SIZE_MAX, 15 },
|
||||
.output = { "GRUSSFORMEL", 11 },
|
||||
},
|
||||
{
|
||||
.description = "one word, no conversion, NUL-terminated",
|
||||
.input = { "WORD", SIZE_MAX, 10 },
|
||||
.output = { "WORD", 4 },
|
||||
},
|
||||
{
|
||||
.description = "one word, conversion, NUL-terminated, truncation",
|
||||
.input = { "gRu" "\xC3\x9F" "formel", SIZE_MAX, 5 },
|
||||
.output = { "GRUS", 11 },
|
||||
},
|
||||
};
|
||||
|
||||
static const struct unit_test_to_case_utf8 to_titlecase_utf8[] = {
|
||||
{
|
||||
.description = "empty input",
|
||||
.input = { "", 0, 10 },
|
||||
.output = { "", 0 },
|
||||
},
|
||||
{
|
||||
.description = "empty output",
|
||||
.input = { "hello", 5, 0 },
|
||||
.output = { "", 5 },
|
||||
},
|
||||
{
|
||||
.description = "one character, conversion",
|
||||
.input = { "a", 1, 10 },
|
||||
.output = { "A", 1 },
|
||||
},
|
||||
{
|
||||
.description = "one character, no conversion",
|
||||
.input = { "A", 1, 10 },
|
||||
.output = { "A", 1 },
|
||||
},
|
||||
{
|
||||
.description = "one character, conversion, truncation",
|
||||
.input = { "a", 1, 0 },
|
||||
.output = { "", 1 },
|
||||
},
|
||||
{
|
||||
.description = "one character, conversion, NUL-terminated",
|
||||
.input = { "a", SIZE_MAX, 10 },
|
||||
.output = { "A", 1 },
|
||||
},
|
||||
{
|
||||
.description = "one character, no conversion, NUL-terminated",
|
||||
.input = { "A", SIZE_MAX, 10 },
|
||||
.output = { "A", 1 },
|
||||
},
|
||||
{
|
||||
.description = "one character, conversion, NUL-terminated, truncation",
|
||||
.input = { "a", SIZE_MAX, 0 },
|
||||
.output = { "", 1 },
|
||||
},
|
||||
{
|
||||
.description = "one word, conversion",
|
||||
.input = { "heLlo", 5, 10 },
|
||||
.output = { "Hello", 5 },
|
||||
},
|
||||
{
|
||||
.description = "one word, no conversion",
|
||||
.input = { "Hello", 5, 10 },
|
||||
.output = { "Hello", 5 },
|
||||
},
|
||||
{
|
||||
.description = "one word, conversion, truncation",
|
||||
.input = { "heLlo", 5, 2 },
|
||||
.output = { "H", 5 },
|
||||
},
|
||||
{
|
||||
.description = "one word, conversion, NUL-terminated",
|
||||
.input = { "heLlo", SIZE_MAX, 10 },
|
||||
.output = { "Hello", 5 },
|
||||
},
|
||||
{
|
||||
.description = "one word, no conversion, NUL-terminated",
|
||||
.input = { "Hello", SIZE_MAX, 10 },
|
||||
.output = { "Hello", 5 },
|
||||
},
|
||||
{
|
||||
.description = "one word, conversion, NUL-terminated, truncation",
|
||||
.input = { "heLlo", SIZE_MAX, 3 },
|
||||
.output = { "He", 5 },
|
||||
},
|
||||
{
|
||||
.description = "two words, conversion",
|
||||
.input = { "heLlo wORLd!", 12, 20 },
|
||||
.output = { "Hello World!", 12 },
|
||||
},
|
||||
{
|
||||
.description = "two words, no conversion",
|
||||
.input = { "Hello World!", 12, 20 },
|
||||
.output = { "Hello World!", 12 },
|
||||
},
|
||||
{
|
||||
.description = "two words, conversion, truncation",
|
||||
.input = { "heLlo wORLd!", 12, 8 },
|
||||
.output = { "Hello W", 12 },
|
||||
},
|
||||
{
|
||||
.description = "two words, conversion, NUL-terminated",
|
||||
.input = { "heLlo wORLd!", SIZE_MAX, 20 },
|
||||
.output = { "Hello World!", 12 },
|
||||
},
|
||||
{
|
||||
.description = "two words, no conversion, NUL-terminated",
|
||||
.input = { "Hello World!", SIZE_MAX, 20 },
|
||||
.output = { "Hello World!", 12 },
|
||||
},
|
||||
{
|
||||
.description = "two words, conversion, NUL-terminated, truncation",
|
||||
.input = { "heLlo wORLd!", SIZE_MAX, 4 },
|
||||
.output = { "Hel", 12 },
|
||||
},
|
||||
};
|
||||
|
||||
static int
|
||||
unit_test_callback_is_case_utf8(const void *t, size_t off, const char *name,
|
||||
const char *argv0)
|
||||
{
|
||||
const struct unit_test_is_case_utf8 *test =
|
||||
(const struct unit_test_is_case_utf8 *)t + off;
|
||||
bool ret = false;
|
||||
size_t caselen = 0x7f;
|
||||
|
||||
if (t == is_lowercase_utf8) {
|
||||
ret = grapheme_is_lowercase_utf8(test->input.src, test->input.srclen,
|
||||
&caselen);
|
||||
} else if (t == is_uppercase_utf8) {
|
||||
ret = grapheme_is_uppercase_utf8(test->input.src, test->input.srclen,
|
||||
&caselen);
|
||||
} else if (t == is_titlecase_utf8) {
|
||||
ret = grapheme_is_titlecase_utf8(test->input.src, test->input.srclen,
|
||||
&caselen);
|
||||
|
||||
} else {
|
||||
goto err;
|
||||
}
|
||||
|
||||
/* check results */
|
||||
if (ret != test->output.ret || caselen != test->output.caselen) {
|
||||
goto err;
|
||||
}
|
||||
|
||||
return 0;
|
||||
err:
|
||||
fprintf(stderr, "%s: %s: Failed unit test %zu \"%s\" "
|
||||
"(returned (%s, %zu) instead of (%s, %zu)).\n", argv0,
|
||||
name, off, test->description, ret ? "true" : "false",
|
||||
caselen, test->output.ret ? "true" : "false",
|
||||
test->output.caselen);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int
|
||||
unit_test_callback_to_case_utf8(const void *t, size_t off, const char *name,
|
||||
const char *argv0)
|
||||
{
|
||||
const struct unit_test_to_case_utf8 *test =
|
||||
(const struct unit_test_to_case_utf8 *)t + off;
|
||||
size_t ret = 0, i;
|
||||
char buf[512];
|
||||
|
||||
/* fill the array with canary values */
|
||||
memset(buf, 0x7f, LEN(buf));
|
||||
|
||||
if (t == to_lowercase_utf8) {
|
||||
ret = grapheme_to_lowercase_utf8(test->input.src, test->input.srclen,
|
||||
buf, test->input.destlen);
|
||||
} else if (t == to_uppercase_utf8) {
|
||||
ret = grapheme_to_uppercase_utf8(test->input.src, test->input.srclen,
|
||||
buf, test->input.destlen);
|
||||
} else if (t == to_titlecase_utf8) {
|
||||
ret = grapheme_to_titlecase_utf8(test->input.src, test->input.srclen,
|
||||
buf, test->input.destlen);
|
||||
} else {
|
||||
goto err;
|
||||
}
|
||||
|
||||
/* check results */
|
||||
if (ret != test->output.ret ||
|
||||
memcmp(buf, test->output.dest, MIN(test->input.destlen, test->output.ret))) {
|
||||
goto err;
|
||||
}
|
||||
|
||||
/* check that none of the canary values have been overwritten */
|
||||
for (i = test->input.destlen; i < LEN(buf); i++) {
|
||||
if (buf[i] != 0x7f) {
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
err:
|
||||
fprintf(stderr, "%s: %s: Failed unit test %zu \"%s\" "
|
||||
"(returned (\"%.*s\", %zu) instead of (\"%.*s\", %zu)).\n", argv0,
|
||||
name, off, test->description, (int)ret, buf, ret,
|
||||
(int)test->output.ret, test->output.dest, test->output.ret);
|
||||
return 1;
|
||||
}
|
||||
|
||||
int
|
||||
main(int argc, char *argv[])
|
||||
{
|
||||
(void)argc;
|
||||
|
||||
return run_unit_tests(unit_test_callback_is_case_utf8, is_lowercase_utf8,
|
||||
LEN(is_lowercase_utf8), "grapheme_is_lowercase_utf8", argv[0]) +
|
||||
run_unit_tests(unit_test_callback_is_case_utf8, is_uppercase_utf8,
|
||||
LEN(is_uppercase_utf8), "grapheme_is_uppercase_utf8", argv[0]) +
|
||||
run_unit_tests(unit_test_callback_is_case_utf8, is_titlecase_utf8,
|
||||
LEN(is_titlecase_utf8), "grapheme_is_titlecase_utf8", argv[0]) +
|
||||
run_unit_tests(unit_test_callback_to_case_utf8, to_lowercase_utf8,
|
||||
LEN(to_lowercase_utf8), "grapheme_to_lowercase_utf8", argv[0]) +
|
||||
run_unit_tests(unit_test_callback_to_case_utf8, to_uppercase_utf8,
|
||||
LEN(to_uppercase_utf8), "grapheme_to_uppercase_utf8", argv[0]) +
|
||||
run_unit_tests(unit_test_callback_to_case_utf8, to_titlecase_utf8,
|
||||
LEN(to_titlecase_utf8), "grapheme_to_titlecase_utf8", argv[0]);
|
||||
}
|
126
libs/libgrapheme-2.0.2/test/character.c
Normal file
126
libs/libgrapheme-2.0.2/test/character.c
Normal file
@@ -0,0 +1,126 @@
|
||||
/* See LICENSE file for copyright and license details. */
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "../gen/character-test.h"
|
||||
#include "../grapheme.h"
|
||||
#include "util.h"
|
||||
|
||||
static const struct unit_test_next_break next_character_break[] = {
|
||||
{
|
||||
.description = "NULL input",
|
||||
.input = {
|
||||
.src = NULL,
|
||||
.srclen = 0,
|
||||
},
|
||||
.output = { 0 },
|
||||
},
|
||||
{
|
||||
.description = "empty input",
|
||||
.input = {
|
||||
.src = (uint_least32_t *)(uint_least32_t[]){ 0x0 },
|
||||
.srclen = 0,
|
||||
},
|
||||
.output = { 0 },
|
||||
},
|
||||
{
|
||||
.description = "empty input, null-terminated",
|
||||
.input = {
|
||||
.src = (uint_least32_t *)(uint_least32_t[]){ 0x0 },
|
||||
.srclen = SIZE_MAX,
|
||||
},
|
||||
.output = { 0 },
|
||||
},
|
||||
{
|
||||
.description = "one character",
|
||||
.input = {
|
||||
.src = (uint_least32_t *)(uint_least32_t[]){ 0x1F1E9, 0x1F1EA, 0x2A },
|
||||
.srclen = 3,
|
||||
},
|
||||
.output = { 2 },
|
||||
},
|
||||
{
|
||||
.description = "one character, null-terminated",
|
||||
.input = {
|
||||
.src = (uint_least32_t *)(uint_least32_t[]){ 0x1F1E9, 0x1F1EA, 0x0 },
|
||||
.srclen = SIZE_MAX,
|
||||
},
|
||||
.output = { 2 },
|
||||
},
|
||||
};
|
||||
|
||||
static const struct unit_test_next_break_utf8 next_character_break_utf8[] = {
|
||||
{
|
||||
.description = "NULL input",
|
||||
.input = {
|
||||
.src = NULL,
|
||||
.srclen = 0,
|
||||
},
|
||||
.output = { 0 },
|
||||
},
|
||||
{
|
||||
.description = "empty input",
|
||||
.input = { "", 0 },
|
||||
.output = { 0 },
|
||||
},
|
||||
{
|
||||
.description = "empty input, NUL-terminated",
|
||||
.input = { "", SIZE_MAX },
|
||||
.output = { 0 },
|
||||
},
|
||||
{
|
||||
.description = "one character",
|
||||
.input = { "\xF0\x9F\x87\xA9\xF0\x9F\x87\xAA*", 9 },
|
||||
.output = { 8 },
|
||||
},
|
||||
{
|
||||
.description = "one character, fragment",
|
||||
.input = { "\xF0\x9F\x87\xA9\xF0", 5 },
|
||||
.output = { 4 },
|
||||
},
|
||||
{
|
||||
.description = "one character, NUL-terminated",
|
||||
.input = { "\xF0\x9F\x87\xA9\xF0\x9F\x87\xAA", SIZE_MAX },
|
||||
.output = { 8 },
|
||||
},
|
||||
{
|
||||
.description = "one character, fragment, NUL-terminated",
|
||||
.input = { "\xF0\x9F\x87\xA9\xF0\x9F", SIZE_MAX },
|
||||
.output = { 4 },
|
||||
},
|
||||
};
|
||||
|
||||
static int
|
||||
unit_test_callback_next_character_break(const void *t, size_t off,
|
||||
const char *name,
|
||||
const char *argv0)
|
||||
{
|
||||
return unit_test_callback_next_break(t, off,
|
||||
grapheme_next_character_break,
|
||||
name, argv0);
|
||||
}
|
||||
|
||||
static int
|
||||
unit_test_callback_next_character_break_utf8(const void *t, size_t off,
|
||||
const char *name,
|
||||
const char *argv0)
|
||||
{
|
||||
return unit_test_callback_next_break_utf8(t, off,
|
||||
grapheme_next_character_break_utf8,
|
||||
name, argv0);
|
||||
}
|
||||
|
||||
int
|
||||
main(int argc, char *argv[])
|
||||
{
|
||||
(void)argc;
|
||||
|
||||
return run_break_tests(grapheme_next_character_break,
|
||||
character_break_test, LEN(character_break_test), argv[0]) +
|
||||
run_unit_tests(unit_test_callback_next_character_break,
|
||||
next_character_break, LEN(next_character_break),
|
||||
"grapheme_next_character_break", argv[0]) +
|
||||
run_unit_tests(unit_test_callback_next_character_break_utf8,
|
||||
next_character_break_utf8, LEN(next_character_break_utf8),
|
||||
"grapheme_next_character_break_utf8", argv[0]);
|
||||
}
|
127
libs/libgrapheme-2.0.2/test/line.c
Normal file
127
libs/libgrapheme-2.0.2/test/line.c
Normal file
@@ -0,0 +1,127 @@
|
||||
/* See LICENSE file for copyright and license details. */
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "../gen/line-test.h"
|
||||
#include "../grapheme.h"
|
||||
#include "util.h"
|
||||
|
||||
static const struct unit_test_next_break next_line_break[] = {
|
||||
{
|
||||
.description = "NULL input",
|
||||
.input = {
|
||||
.src = NULL,
|
||||
.srclen = 0,
|
||||
},
|
||||
.output = { 0 },
|
||||
},
|
||||
{
|
||||
.description = "empty input",
|
||||
.input = {
|
||||
.src = (uint_least32_t *)(uint_least32_t[]){ 0x0 },
|
||||
.srclen = 0,
|
||||
},
|
||||
.output = { 0 },
|
||||
},
|
||||
{
|
||||
.description = "empty input, null-terminated",
|
||||
.input = {
|
||||
.src = (uint_least32_t *)(uint_least32_t[]){ 0x0 },
|
||||
.srclen = SIZE_MAX,
|
||||
},
|
||||
.output = { 0 },
|
||||
},
|
||||
{
|
||||
.description = "one opportunity",
|
||||
.input = {
|
||||
.src = (uint_least32_t *)(uint_least32_t[]){ 0x1F1E9, 0x1F1EA, 0x20, 0x2A },
|
||||
.srclen = 4,
|
||||
},
|
||||
.output = { 3 },
|
||||
},
|
||||
{
|
||||
.description = "one opportunity, null-terminated",
|
||||
.input = {
|
||||
.src = (uint_least32_t *)(uint_least32_t[]){ 0x1F1E9, 0x1F1EA, 0x20, 0x2A, 0x0 },
|
||||
.srclen = SIZE_MAX,
|
||||
},
|
||||
.output = { 3 },
|
||||
},
|
||||
};
|
||||
|
||||
static const struct unit_test_next_break_utf8 next_line_break_utf8[] = {
|
||||
{
|
||||
.description = "NULL input",
|
||||
.input = {
|
||||
.src = NULL,
|
||||
.srclen = 0,
|
||||
},
|
||||
.output = { 0 },
|
||||
},
|
||||
{
|
||||
.description = "empty input",
|
||||
.input = { "", 0 },
|
||||
.output = { 0 },
|
||||
},
|
||||
{
|
||||
.description = "empty input, NUL-terminated",
|
||||
.input = { "", SIZE_MAX },
|
||||
.output = { 0 },
|
||||
},
|
||||
{
|
||||
.description = "one opportunity",
|
||||
.input = { "\xF0\x9F\x87\xA9\xF0\x9F\x87\xAA *", 10 },
|
||||
.output = { 9 },
|
||||
},
|
||||
{
|
||||
.description = "one opportunity, fragment",
|
||||
.input = { "\xF0\x9F\x87\xA9\xF0", 5 },
|
||||
.output = { 4 },
|
||||
},
|
||||
{
|
||||
.description = "one opportunity, NUL-terminated",
|
||||
.input = { "\xF0\x9F\x87\xA9\xF0\x9F\x87\xAA A", SIZE_MAX },
|
||||
.output = { 9 },
|
||||
},
|
||||
{
|
||||
.description = "one opportunity, fragment, NUL-terminated",
|
||||
.input = { "\xF0\x9F\x87\xA9\xF0\x9F", SIZE_MAX },
|
||||
.output = { 4 },
|
||||
},
|
||||
};
|
||||
|
||||
static int
|
||||
unit_test_callback_next_line_break(const void *t, size_t off,
|
||||
const char *name,
|
||||
const char *argv0)
|
||||
{
|
||||
return unit_test_callback_next_break(t, off,
|
||||
grapheme_next_line_break,
|
||||
name, argv0);
|
||||
}
|
||||
|
||||
static int
|
||||
unit_test_callback_next_line_break_utf8(const void *t, size_t off,
|
||||
const char *name,
|
||||
const char *argv0)
|
||||
{
|
||||
return unit_test_callback_next_break_utf8(t, off,
|
||||
grapheme_next_line_break_utf8,
|
||||
name, argv0);
|
||||
}
|
||||
|
||||
int
|
||||
main(int argc, char *argv[])
|
||||
{
|
||||
(void)argc;
|
||||
|
||||
return run_break_tests(grapheme_next_line_break,
|
||||
line_break_test, LEN(line_break_test),
|
||||
argv[0]) +
|
||||
run_unit_tests(unit_test_callback_next_line_break,
|
||||
next_line_break, LEN(next_line_break),
|
||||
"grapheme_next_line_break", argv[0]) +
|
||||
run_unit_tests(unit_test_callback_next_line_break_utf8,
|
||||
next_line_break_utf8, LEN(next_line_break_utf8),
|
||||
"grapheme_next_line_break_utf8", argv[0]);
|
||||
}
|
127
libs/libgrapheme-2.0.2/test/sentence.c
Normal file
127
libs/libgrapheme-2.0.2/test/sentence.c
Normal file
@@ -0,0 +1,127 @@
|
||||
/* See LICENSE file for copyright and license details. */
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "../gen/sentence-test.h"
|
||||
#include "../grapheme.h"
|
||||
#include "util.h"
|
||||
|
||||
static const struct unit_test_next_break next_sentence_break[] = {
|
||||
{
|
||||
.description = "NULL input",
|
||||
.input = {
|
||||
.src = NULL,
|
||||
.srclen = 0,
|
||||
},
|
||||
.output = { 0 },
|
||||
},
|
||||
{
|
||||
.description = "empty input",
|
||||
.input = {
|
||||
.src = (uint_least32_t *)(uint_least32_t[]){ 0x0 },
|
||||
.srclen = 0,
|
||||
},
|
||||
.output = { 0 },
|
||||
},
|
||||
{
|
||||
.description = "empty input, null-terminated",
|
||||
.input = {
|
||||
.src = (uint_least32_t *)(uint_least32_t[]){ 0x0 },
|
||||
.srclen = SIZE_MAX,
|
||||
},
|
||||
.output = { 0 },
|
||||
},
|
||||
{
|
||||
.description = "one sentence",
|
||||
.input = {
|
||||
.src = (uint_least32_t *)(uint_least32_t[]){ 0x1F1E9, 0x1F1EA, 0x2E, 0x20, 0x2A },
|
||||
.srclen = 5,
|
||||
},
|
||||
.output = { 4 },
|
||||
},
|
||||
{
|
||||
.description = "one sentence, null-terminated",
|
||||
.input = {
|
||||
.src = (uint_least32_t *)(uint_least32_t[]){ 0x1F1E9, 0x1F1EA, 0x2E, 0x20, 0x2A, 0x0 },
|
||||
.srclen = SIZE_MAX,
|
||||
},
|
||||
.output = { 4 },
|
||||
},
|
||||
};
|
||||
|
||||
static const struct unit_test_next_break_utf8 next_sentence_break_utf8[] = {
|
||||
{
|
||||
.description = "NULL input",
|
||||
.input = {
|
||||
.src = NULL,
|
||||
.srclen = 0,
|
||||
},
|
||||
.output = { 0 },
|
||||
},
|
||||
{
|
||||
.description = "empty input",
|
||||
.input = { "", 0 },
|
||||
.output = { 0 },
|
||||
},
|
||||
{
|
||||
.description = "empty input, NUL-terminated",
|
||||
.input = { "", SIZE_MAX },
|
||||
.output = { 0 },
|
||||
},
|
||||
{
|
||||
.description = "one sentence",
|
||||
.input = { "\xF0\x9F\x87\xA9\xF0\x9F\x87\xAA is the flag of Germany. It", 36 },
|
||||
.output = { 34 },
|
||||
},
|
||||
{
|
||||
.description = "one sentence, fragment",
|
||||
.input = { "\xF0\x9F\x87\xA9\xF0", 5 },
|
||||
.output = { 4 },
|
||||
},
|
||||
{
|
||||
.description = "one sentence, NUL-terminated",
|
||||
.input = { "\xF0\x9F\x87\xA9\xF0\x9F\x87\xAA is the flag of Germany. It", SIZE_MAX },
|
||||
.output = { 34 },
|
||||
},
|
||||
{
|
||||
.description = "one sentence, fragment, NUL-terminated",
|
||||
.input = { "\xF0\x9F\x87\xA9\xF0\x9F", SIZE_MAX },
|
||||
.output = { 6 },
|
||||
},
|
||||
};
|
||||
|
||||
static int
|
||||
unit_test_callback_next_sentence_break(const void *t, size_t off,
|
||||
const char *name,
|
||||
const char *argv0)
|
||||
{
|
||||
return unit_test_callback_next_break(t, off,
|
||||
grapheme_next_sentence_break,
|
||||
name, argv0);
|
||||
}
|
||||
|
||||
static int
|
||||
unit_test_callback_next_sentence_break_utf8(const void *t, size_t off,
|
||||
const char *name,
|
||||
const char *argv0)
|
||||
{
|
||||
return unit_test_callback_next_break_utf8(t, off,
|
||||
grapheme_next_sentence_break_utf8,
|
||||
name, argv0);
|
||||
}
|
||||
|
||||
int
|
||||
main(int argc, char *argv[])
|
||||
{
|
||||
(void)argc;
|
||||
|
||||
return run_break_tests(grapheme_next_sentence_break,
|
||||
sentence_break_test,
|
||||
LEN(sentence_break_test), argv[0]) +
|
||||
run_unit_tests(unit_test_callback_next_sentence_break,
|
||||
next_sentence_break, LEN(next_sentence_break),
|
||||
"grapheme_next_sentence_break", argv[0]) +
|
||||
run_unit_tests(unit_test_callback_next_sentence_break_utf8,
|
||||
next_sentence_break_utf8, LEN(next_sentence_break_utf8),
|
||||
"grapheme_next_character_break_utf8", argv[0]);
|
||||
}
|
317
libs/libgrapheme-2.0.2/test/utf8-decode.c
Normal file
317
libs/libgrapheme-2.0.2/test/utf8-decode.c
Normal file
@@ -0,0 +1,317 @@
|
||||
/* See LICENSE file for copyright and license details. */
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "../grapheme.h"
|
||||
#include "util.h"
|
||||
|
||||
static const struct {
|
||||
char *arr; /* UTF-8 byte sequence */
|
||||
size_t len; /* length of UTF-8 byte sequence */
|
||||
size_t exp_len; /* expected length returned */
|
||||
uint_least32_t exp_cp; /* expected codepoint returned */
|
||||
} dec_test[] = {
|
||||
{
|
||||
/* empty sequence
|
||||
* [ ] ->
|
||||
* INVALID
|
||||
*/
|
||||
.arr = NULL,
|
||||
.len = 0,
|
||||
.exp_len = 0,
|
||||
.exp_cp = GRAPHEME_INVALID_CODEPOINT,
|
||||
},
|
||||
{
|
||||
/* invalid lead byte
|
||||
* [ 11111101 ] ->
|
||||
* INVALID
|
||||
*/
|
||||
.arr = (char *)(unsigned char[]){ 0xFD },
|
||||
.len = 1,
|
||||
.exp_len = 1,
|
||||
.exp_cp = GRAPHEME_INVALID_CODEPOINT,
|
||||
},
|
||||
{
|
||||
/* valid 1-byte sequence
|
||||
* [ 00000001 ] ->
|
||||
* 0000001
|
||||
*/
|
||||
.arr = (char *)(unsigned char[]){ 0x01 },
|
||||
.len = 1,
|
||||
.exp_len = 1,
|
||||
.exp_cp = 0x1,
|
||||
},
|
||||
{
|
||||
/* valid 2-byte sequence
|
||||
* [ 11000011 10111111 ] ->
|
||||
* 00011111111
|
||||
*/
|
||||
.arr = (char *)(unsigned char[]){ 0xC3, 0xBF },
|
||||
.len = 2,
|
||||
.exp_len = 2,
|
||||
.exp_cp = 0xFF,
|
||||
},
|
||||
{
|
||||
/* invalid 2-byte sequence (second byte missing)
|
||||
* [ 11000011 ] ->
|
||||
* INVALID
|
||||
*/
|
||||
.arr = (char *)(unsigned char[]){ 0xC3 },
|
||||
.len = 1,
|
||||
.exp_len = 2,
|
||||
.exp_cp = GRAPHEME_INVALID_CODEPOINT,
|
||||
},
|
||||
{
|
||||
/* invalid 2-byte sequence (second byte malformed)
|
||||
* [ 11000011 11111111 ] ->
|
||||
* INVALID
|
||||
*/
|
||||
.arr = (char *)(unsigned char[]){ 0xC3, 0xFF },
|
||||
.len = 2,
|
||||
.exp_len = 1,
|
||||
.exp_cp = GRAPHEME_INVALID_CODEPOINT,
|
||||
},
|
||||
{
|
||||
/* invalid 2-byte sequence (overlong encoded)
|
||||
* [ 11000001 10111111 ] ->
|
||||
* INVALID
|
||||
*/
|
||||
.arr = (char *)(unsigned char[]){ 0xC1, 0xBF },
|
||||
.len = 2,
|
||||
.exp_len = 2,
|
||||
.exp_cp = GRAPHEME_INVALID_CODEPOINT,
|
||||
},
|
||||
{
|
||||
/* valid 3-byte sequence
|
||||
* [ 11100000 10111111 10111111 ] ->
|
||||
* 0000111111111111
|
||||
*/
|
||||
.arr = (char *)(unsigned char[]){ 0xE0, 0xBF, 0xBF },
|
||||
.len = 3,
|
||||
.exp_len = 3,
|
||||
.exp_cp = 0xFFF,
|
||||
},
|
||||
{
|
||||
/* invalid 3-byte sequence (second byte missing)
|
||||
* [ 11100000 ] ->
|
||||
* INVALID
|
||||
*/
|
||||
.arr = (char *)(unsigned char[]){ 0xE0 },
|
||||
.len = 1,
|
||||
.exp_len = 3,
|
||||
.exp_cp = GRAPHEME_INVALID_CODEPOINT,
|
||||
},
|
||||
{
|
||||
/* invalid 3-byte sequence (second byte malformed)
|
||||
* [ 11100000 01111111 10111111 ] ->
|
||||
* INVALID
|
||||
*/
|
||||
.arr = (char *)(unsigned char[]){ 0xE0, 0x7F, 0xBF },
|
||||
.len = 3,
|
||||
.exp_len = 1,
|
||||
.exp_cp = GRAPHEME_INVALID_CODEPOINT,
|
||||
},
|
||||
{
|
||||
/* invalid 3-byte sequence (short string, second byte malformed)
|
||||
* [ 11100000 01111111 ] ->
|
||||
* INVALID
|
||||
*/
|
||||
.arr = (char *)(unsigned char[]){ 0xE0, 0x7F },
|
||||
.len = 2,
|
||||
.exp_len = 1,
|
||||
.exp_cp = GRAPHEME_INVALID_CODEPOINT,
|
||||
},
|
||||
{
|
||||
/* invalid 3-byte sequence (third byte missing)
|
||||
* [ 11100000 10111111 ] ->
|
||||
* INVALID
|
||||
*/
|
||||
.arr = (char *)(unsigned char[]){ 0xE0, 0xBF },
|
||||
.len = 2,
|
||||
.exp_len = 3,
|
||||
.exp_cp = GRAPHEME_INVALID_CODEPOINT,
|
||||
},
|
||||
{
|
||||
/* invalid 3-byte sequence (third byte malformed)
|
||||
* [ 11100000 10111111 01111111 ] ->
|
||||
* INVALID
|
||||
*/
|
||||
.arr = (char *)(unsigned char[]){ 0xE0, 0xBF, 0x7F },
|
||||
.len = 3,
|
||||
.exp_len = 2,
|
||||
.exp_cp = GRAPHEME_INVALID_CODEPOINT,
|
||||
},
|
||||
{
|
||||
/* invalid 3-byte sequence (overlong encoded)
|
||||
* [ 11100000 10011111 10111111 ] ->
|
||||
* INVALID
|
||||
*/
|
||||
.arr = (char *)(unsigned char[]){ 0xE0, 0x9F, 0xBF },
|
||||
.len = 3,
|
||||
.exp_len = 3,
|
||||
.exp_cp = GRAPHEME_INVALID_CODEPOINT,
|
||||
},
|
||||
{
|
||||
/* invalid 3-byte sequence (UTF-16 surrogate half)
|
||||
* [ 11101101 10100000 10000000 ] ->
|
||||
* INVALID
|
||||
*/
|
||||
.arr = (char *)(unsigned char[]){ 0xED, 0xA0, 0x80 },
|
||||
.len = 3,
|
||||
.exp_len = 3,
|
||||
.exp_cp = GRAPHEME_INVALID_CODEPOINT,
|
||||
},
|
||||
{
|
||||
/* valid 4-byte sequence
|
||||
* [ 11110011 10111111 10111111 10111111 ] ->
|
||||
* 011111111111111111111
|
||||
*/
|
||||
.arr = (char *)(unsigned char[]){ 0xF3, 0xBF, 0xBF, 0xBF },
|
||||
.len = 4,
|
||||
.exp_len = 4,
|
||||
.exp_cp = UINT32_C(0xFFFFF),
|
||||
},
|
||||
{
|
||||
/* invalid 4-byte sequence (second byte missing)
|
||||
* [ 11110011 ] ->
|
||||
* INVALID
|
||||
*/
|
||||
.arr = (char *)(unsigned char[]){ 0xF3 },
|
||||
.len = 1,
|
||||
.exp_len = 4,
|
||||
.exp_cp = GRAPHEME_INVALID_CODEPOINT,
|
||||
},
|
||||
{
|
||||
/* invalid 4-byte sequence (second byte malformed)
|
||||
* [ 11110011 01111111 10111111 10111111 ] ->
|
||||
* INVALID
|
||||
*/
|
||||
.arr = (char *)(unsigned char[]){ 0xF3, 0x7F, 0xBF, 0xBF },
|
||||
.len = 4,
|
||||
.exp_len = 1,
|
||||
.exp_cp = GRAPHEME_INVALID_CODEPOINT,
|
||||
},
|
||||
{
|
||||
/* invalid 4-byte sequence (short string 1, second byte malformed)
|
||||
* [ 11110011 011111111 ] ->
|
||||
* INVALID
|
||||
*/
|
||||
.arr = (char *)(unsigned char[]){ 0xF3, 0x7F },
|
||||
.len = 2,
|
||||
.exp_len = 1,
|
||||
.exp_cp = GRAPHEME_INVALID_CODEPOINT,
|
||||
},
|
||||
{
|
||||
/* invalid 4-byte sequence (short string 2, second byte malformed)
|
||||
* [ 11110011 011111111 10111111 ] ->
|
||||
* INVALID
|
||||
*/
|
||||
.arr = (char *)(unsigned char[]){ 0xF3, 0x7F, 0xBF },
|
||||
.len = 3,
|
||||
.exp_len = 1,
|
||||
.exp_cp = GRAPHEME_INVALID_CODEPOINT,
|
||||
},
|
||||
|
||||
{
|
||||
/* invalid 4-byte sequence (third byte missing)
|
||||
* [ 11110011 10111111 ] ->
|
||||
* INVALID
|
||||
*/
|
||||
.arr = (char *)(unsigned char[]){ 0xF3, 0xBF },
|
||||
.len = 2,
|
||||
.exp_len = 4,
|
||||
.exp_cp = GRAPHEME_INVALID_CODEPOINT,
|
||||
},
|
||||
{
|
||||
/* invalid 4-byte sequence (third byte malformed)
|
||||
* [ 11110011 10111111 01111111 10111111 ] ->
|
||||
* INVALID
|
||||
*/
|
||||
.arr = (char *)(unsigned char[]){ 0xF3, 0xBF, 0x7F, 0xBF },
|
||||
.len = 4,
|
||||
.exp_len = 2,
|
||||
.exp_cp = GRAPHEME_INVALID_CODEPOINT,
|
||||
},
|
||||
{
|
||||
/* invalid 4-byte sequence (short string, third byte malformed)
|
||||
* [ 11110011 10111111 01111111 ] ->
|
||||
* INVALID
|
||||
*/
|
||||
.arr = (char *)(unsigned char[]){ 0xF3, 0xBF, 0x7F },
|
||||
.len = 3,
|
||||
.exp_len = 2,
|
||||
.exp_cp = GRAPHEME_INVALID_CODEPOINT,
|
||||
},
|
||||
{
|
||||
/* invalid 4-byte sequence (fourth byte missing)
|
||||
* [ 11110011 10111111 10111111 ] ->
|
||||
* INVALID
|
||||
*/
|
||||
.arr = (char *)(unsigned char[]){ 0xF3, 0xBF, 0xBF },
|
||||
.len = 3,
|
||||
.exp_len = 4,
|
||||
.exp_cp = GRAPHEME_INVALID_CODEPOINT,
|
||||
},
|
||||
{
|
||||
/* invalid 4-byte sequence (fourth byte malformed)
|
||||
* [ 11110011 10111111 10111111 01111111 ] ->
|
||||
* INVALID
|
||||
*/
|
||||
.arr = (char *)(unsigned char[]){ 0xF3, 0xBF, 0xBF, 0x7F },
|
||||
.len = 4,
|
||||
.exp_len = 3,
|
||||
.exp_cp = GRAPHEME_INVALID_CODEPOINT,
|
||||
},
|
||||
{
|
||||
/* invalid 4-byte sequence (overlong encoded)
|
||||
* [ 11110000 10000000 10000001 10111111 ] ->
|
||||
* INVALID
|
||||
*/
|
||||
.arr = (char *)(unsigned char[]){ 0xF0, 0x80, 0x81, 0xBF },
|
||||
.len = 4,
|
||||
.exp_len = 4,
|
||||
.exp_cp = GRAPHEME_INVALID_CODEPOINT,
|
||||
},
|
||||
{
|
||||
/* invalid 4-byte sequence (UTF-16-unrepresentable)
|
||||
* [ 11110100 10010000 10000000 10000000 ] ->
|
||||
* INVALID
|
||||
*/
|
||||
.arr = (char *)(unsigned char[]){ 0xF4, 0x90, 0x80, 0x80 },
|
||||
.len = 4,
|
||||
.exp_len = 4,
|
||||
.exp_cp = GRAPHEME_INVALID_CODEPOINT,
|
||||
},
|
||||
};
|
||||
|
||||
int
|
||||
main(int argc, char *argv[])
|
||||
{
|
||||
size_t i, failed;
|
||||
|
||||
(void)argc;
|
||||
|
||||
/* UTF-8 decoder test */
|
||||
for (i = 0, failed = 0; i < LEN(dec_test); i++) {
|
||||
size_t len;
|
||||
uint_least32_t cp;
|
||||
|
||||
len = grapheme_decode_utf8(dec_test[i].arr,
|
||||
dec_test[i].len, &cp);
|
||||
|
||||
if (len != dec_test[i].exp_len ||
|
||||
cp != dec_test[i].exp_cp) {
|
||||
fprintf(stderr, "%s: Failed test %zu: "
|
||||
"Expected (%zx,%u), but got (%zx,%u).\n",
|
||||
argv[0], i, dec_test[i].exp_len,
|
||||
dec_test[i].exp_cp, len, cp);
|
||||
failed++;
|
||||
}
|
||||
}
|
||||
printf("%s: %zu/%zu unit tests passed.\n", argv[0],
|
||||
LEN(dec_test) - failed, LEN(dec_test));
|
||||
|
||||
return (failed > 0) ? 1 : 0;
|
||||
}
|
93
libs/libgrapheme-2.0.2/test/utf8-encode.c
Normal file
93
libs/libgrapheme-2.0.2/test/utf8-encode.c
Normal file
@@ -0,0 +1,93 @@
|
||||
/* See LICENSE file for copyright and license details. */
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "../grapheme.h"
|
||||
#include "util.h"
|
||||
|
||||
static const struct {
|
||||
uint_least32_t cp; /* input codepoint */
|
||||
char *exp_arr; /* expected UTF-8 byte sequence */
|
||||
size_t exp_len; /* expected length of UTF-8 sequence */
|
||||
} enc_test[] = {
|
||||
{
|
||||
/* invalid codepoint (UTF-16 surrogate half) */
|
||||
.cp = UINT32_C(0xD800),
|
||||
.exp_arr = (char *)(unsigned char[]){ 0xEF, 0xBF, 0xBD },
|
||||
.exp_len = 3,
|
||||
},
|
||||
{
|
||||
/* invalid codepoint (UTF-16-unrepresentable) */
|
||||
.cp = UINT32_C(0x110000),
|
||||
.exp_arr = (char *)(unsigned char[]){ 0xEF, 0xBF, 0xBD },
|
||||
.exp_len = 3,
|
||||
},
|
||||
{
|
||||
/* codepoint encoded to a 1-byte sequence */
|
||||
.cp = 0x01,
|
||||
.exp_arr = (char *)(unsigned char[]){ 0x01 },
|
||||
.exp_len = 1,
|
||||
},
|
||||
{
|
||||
/* codepoint encoded to a 2-byte sequence */
|
||||
.cp = 0xFF,
|
||||
.exp_arr = (char *)(unsigned char[]){ 0xC3, 0xBF },
|
||||
.exp_len = 2,
|
||||
},
|
||||
{
|
||||
/* codepoint encoded to a 3-byte sequence */
|
||||
.cp = 0xFFF,
|
||||
.exp_arr = (char *)(unsigned char[]){ 0xE0, 0xBF, 0xBF },
|
||||
.exp_len = 3,
|
||||
},
|
||||
{
|
||||
/* codepoint encoded to a 4-byte sequence */
|
||||
.cp = UINT32_C(0xFFFFF),
|
||||
.exp_arr = (char *)(unsigned char[]){ 0xF3, 0xBF, 0xBF, 0xBF },
|
||||
.exp_len = 4,
|
||||
},
|
||||
};
|
||||
|
||||
int
|
||||
main(int argc, char *argv[])
|
||||
{
|
||||
size_t i, j, failed;
|
||||
|
||||
(void)argc;
|
||||
|
||||
/* UTF-8 encoder test */
|
||||
for (i = 0, failed = 0; i < LEN(enc_test); i++) {
|
||||
char arr[4];
|
||||
size_t len;
|
||||
|
||||
len = grapheme_encode_utf8(enc_test[i].cp, arr, LEN(arr));
|
||||
|
||||
if (len != enc_test[i].exp_len ||
|
||||
memcmp(arr, enc_test[i].exp_arr, len)) {
|
||||
fprintf(stderr, "%s, Failed test %zu: "
|
||||
"Expected (", argv[0], i);
|
||||
for (j = 0; j < enc_test[i].exp_len; j++) {
|
||||
fprintf(stderr, "0x%x",
|
||||
enc_test[i].exp_arr[j]);
|
||||
if (j + 1 < enc_test[i].exp_len) {
|
||||
fprintf(stderr, " ");
|
||||
}
|
||||
}
|
||||
fprintf(stderr, "), but got (");
|
||||
for (j = 0; j < len; j++) {
|
||||
fprintf(stderr, "0x%x", arr[j]);
|
||||
if (j + 1 < len) {
|
||||
fprintf(stderr, " ");
|
||||
}
|
||||
}
|
||||
fprintf(stderr, ").\n");
|
||||
failed++;
|
||||
}
|
||||
}
|
||||
printf("%s: %zu/%zu unit tests passed.\n", argv[0],
|
||||
LEN(enc_test) - failed, LEN(enc_test));
|
||||
|
||||
return (failed > 0) ? 1 : 0;
|
||||
}
|
98
libs/libgrapheme-2.0.2/test/util.c
Normal file
98
libs/libgrapheme-2.0.2/test/util.c
Normal file
@@ -0,0 +1,98 @@
|
||||
/* See LICENSE file for copyright and license details. */
|
||||
#include <stdbool.h>
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "../grapheme.h"
|
||||
#include "../gen/types.h"
|
||||
#include "util.h"
|
||||
|
||||
int
|
||||
run_break_tests(size_t (*next_break)(const uint_least32_t *, size_t),
|
||||
const struct break_test *test, size_t testlen, const char *argv0)
|
||||
{
|
||||
size_t i, j, off, res, failed;
|
||||
|
||||
/* character break test */
|
||||
for (i = 0, failed = 0; i < testlen; i++) {
|
||||
for (j = 0, off = 0; off < test[i].cplen; off += res) {
|
||||
res = next_break(test[i].cp + off, test[i].cplen - off);
|
||||
|
||||
/* check if our resulting offset matches */
|
||||
if (j == test[i].lenlen ||
|
||||
res != test[i].len[j++]) {
|
||||
fprintf(stderr, "%s: Failed conformance test %zu \"%s\".\n",
|
||||
argv0, i, test[i].descr);
|
||||
fprintf(stderr, "J=%zu: EXPECTED len %zu, got %zu\n", j-1, test[i].len[j-1], res);
|
||||
failed++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
printf("%s: %zu/%zu conformance tests passed.\n", argv0,
|
||||
testlen - failed, testlen);
|
||||
|
||||
return (failed > 0) ? 1 : 0;
|
||||
}
|
||||
|
||||
int
|
||||
run_unit_tests(int (*unit_test_callback)(const void *, size_t, const char *,
|
||||
const char *), const void *test, size_t testlen, const char *name,
|
||||
const char *argv0)
|
||||
{
|
||||
size_t i, failed;
|
||||
|
||||
for (i = 0, failed = 0; i < testlen; i++) {
|
||||
failed += (unit_test_callback(test, i, name, argv0) == 0) ? 0 : 1;
|
||||
}
|
||||
|
||||
printf("%s: %s: %zu/%zu unit tests passed.\n", argv0, name,
|
||||
testlen - failed, testlen);
|
||||
|
||||
return (failed > 0) ? 1 : 0;
|
||||
}
|
||||
|
||||
int
|
||||
unit_test_callback_next_break(const struct unit_test_next_break *t, size_t off,
|
||||
size_t (*next_break)(const uint_least32_t *, size_t),
|
||||
const char *name, const char *argv0)
|
||||
{
|
||||
const struct unit_test_next_break *test = t + off;
|
||||
|
||||
size_t ret = next_break(test->input.src, test->input.srclen);
|
||||
|
||||
if (ret != test->output.ret) {
|
||||
goto err;
|
||||
}
|
||||
|
||||
return 0;
|
||||
err:
|
||||
fprintf(stderr, "%s: %s: Failed unit test %zu \"%s\" "
|
||||
"(returned %zu instead of %zu).\n", argv0,
|
||||
name, off, test->description, ret, test->output.ret);
|
||||
return 1;
|
||||
}
|
||||
|
||||
int
|
||||
unit_test_callback_next_break_utf8(const struct unit_test_next_break_utf8 *t,
|
||||
size_t off,
|
||||
size_t (*next_break_utf8)(const char *, size_t),
|
||||
const char *name, const char *argv0)
|
||||
{
|
||||
const struct unit_test_next_break_utf8 *test = t + off;
|
||||
|
||||
size_t ret = next_break_utf8(test->input.src, test->input.srclen);
|
||||
|
||||
if (ret != test->output.ret) {
|
||||
goto err;
|
||||
}
|
||||
|
||||
return 0;
|
||||
err:
|
||||
fprintf(stderr, "%s: %s: Failed unit test %zu \"%s\" "
|
||||
"(returned %zu instead of %zu).\n", argv0,
|
||||
name, off, test->description, ret, test->output.ret);
|
||||
return 1;
|
||||
}
|
49
libs/libgrapheme-2.0.2/test/util.h
Normal file
49
libs/libgrapheme-2.0.2/test/util.h
Normal file
@@ -0,0 +1,49 @@
|
||||
/* See LICENSE file for copyright and license details. */
|
||||
#ifndef UTIL_H
|
||||
#define UTIL_H
|
||||
|
||||
#include "../gen/types.h"
|
||||
#include "../grapheme.h"
|
||||
|
||||
#undef MIN
|
||||
#define MIN(x,y) ((x) < (y) ? (x) : (y))
|
||||
#undef LEN
|
||||
#define LEN(x) (sizeof(x) / sizeof(*(x)))
|
||||
|
||||
struct unit_test_next_break {
|
||||
const char *description;
|
||||
struct {
|
||||
const uint_least32_t *src;
|
||||
size_t srclen;
|
||||
} input;
|
||||
struct {
|
||||
size_t ret;
|
||||
} output;
|
||||
};
|
||||
|
||||
struct unit_test_next_break_utf8 {
|
||||
const char *description;
|
||||
struct {
|
||||
const char *src;
|
||||
size_t srclen;
|
||||
} input;
|
||||
struct {
|
||||
size_t ret;
|
||||
} output;
|
||||
};
|
||||
|
||||
int run_break_tests(size_t (*next_break)(const uint_least32_t *, size_t),
|
||||
const struct break_test *test, size_t testlen,
|
||||
const char *);
|
||||
int run_unit_tests(int (*unit_test_callback)(const void *, size_t, const char *,
|
||||
const char *), const void *, size_t, const char *, const char *);
|
||||
|
||||
int unit_test_callback_next_break(const struct unit_test_next_break *, size_t,
|
||||
size_t (*next_break)(const uint_least32_t *, size_t),
|
||||
const char *, const char *);
|
||||
int unit_test_callback_next_break_utf8(const struct unit_test_next_break_utf8 *,
|
||||
size_t,
|
||||
size_t (*next_break_utf8)(const char *, size_t),
|
||||
const char *, const char *);
|
||||
|
||||
#endif /* UTIL_H */
|
126
libs/libgrapheme-2.0.2/test/word.c
Normal file
126
libs/libgrapheme-2.0.2/test/word.c
Normal file
@@ -0,0 +1,126 @@
|
||||
/* See LICENSE file for copyright and license details. */
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "../gen/word-test.h"
|
||||
#include "../grapheme.h"
|
||||
#include "util.h"
|
||||
|
||||
static const struct unit_test_next_break next_word_break[] = {
|
||||
{
|
||||
.description = "NULL input",
|
||||
.input = {
|
||||
.src = NULL,
|
||||
.srclen = 0,
|
||||
},
|
||||
.output = { 0 },
|
||||
},
|
||||
{
|
||||
.description = "empty input",
|
||||
.input = {
|
||||
.src = (uint_least32_t *)(uint_least32_t[]){ 0x0 },
|
||||
.srclen = 0,
|
||||
},
|
||||
.output = { 0 },
|
||||
},
|
||||
{
|
||||
.description = "empty input, null-terminated",
|
||||
.input = {
|
||||
.src = (uint_least32_t *)(uint_least32_t[]){ 0x0 },
|
||||
.srclen = SIZE_MAX,
|
||||
},
|
||||
.output = { 0 },
|
||||
},
|
||||
{
|
||||
.description = "one word",
|
||||
.input = {
|
||||
.src = (uint_least32_t *)(uint_least32_t[]){ 0x1F1E9, 0x1F1EA, 0x20, 0x2A },
|
||||
.srclen = 4,
|
||||
},
|
||||
.output = { 2 },
|
||||
},
|
||||
{
|
||||
.description = "one word, null-terminated",
|
||||
.input = {
|
||||
.src = (uint_least32_t *)(uint_least32_t[]){ 0x1F1E9, 0x1F1EA, 0x20, 0x2A, 0x0 },
|
||||
.srclen = SIZE_MAX,
|
||||
},
|
||||
.output = { 2 },
|
||||
},
|
||||
};
|
||||
|
||||
static const struct unit_test_next_break_utf8 next_word_break_utf8[] = {
|
||||
{
|
||||
.description = "NULL input",
|
||||
.input = {
|
||||
.src = NULL,
|
||||
.srclen = 0,
|
||||
},
|
||||
.output = { 0 },
|
||||
},
|
||||
{
|
||||
.description = "empty input",
|
||||
.input = { "", 0 },
|
||||
.output = { 0 },
|
||||
},
|
||||
{
|
||||
.description = "empty input, NUL-terminated",
|
||||
.input = { "", SIZE_MAX },
|
||||
.output = { 0 },
|
||||
},
|
||||
{
|
||||
.description = "one word",
|
||||
.input = { "\xF0\x9F\x87\xA9\xF0\x9F\x87\xAA is", 11 },
|
||||
.output = { 8 },
|
||||
},
|
||||
{
|
||||
.description = "one word, fragment",
|
||||
.input = { "\xF0\x9F\x87\xA9\xF0", 5 },
|
||||
.output = { 4 },
|
||||
},
|
||||
{
|
||||
.description = "one word, NUL-terminated",
|
||||
.input = { "\xF0\x9F\x87\xA9\xF0\x9F\x87\xAA is", SIZE_MAX },
|
||||
.output = { 8 },
|
||||
},
|
||||
{
|
||||
.description = "one word, fragment, NUL-terminated",
|
||||
.input = { "\xF0\x9F\x87\xA9\xF0\x9F", SIZE_MAX },
|
||||
.output = { 4 },
|
||||
},
|
||||
};
|
||||
|
||||
static int
|
||||
unit_test_callback_next_word_break(const void *t, size_t off,
|
||||
const char *name,
|
||||
const char *argv0)
|
||||
{
|
||||
return unit_test_callback_next_break(t, off,
|
||||
grapheme_next_word_break,
|
||||
name, argv0);
|
||||
}
|
||||
|
||||
static int
|
||||
unit_test_callback_next_word_break_utf8(const void *t, size_t off,
|
||||
const char *name,
|
||||
const char *argv0)
|
||||
{
|
||||
return unit_test_callback_next_break_utf8(t, off,
|
||||
grapheme_next_word_break_utf8,
|
||||
name, argv0);
|
||||
}
|
||||
|
||||
int
|
||||
main(int argc, char *argv[])
|
||||
{
|
||||
(void)argc;
|
||||
|
||||
return run_break_tests(grapheme_next_word_break, word_break_test,
|
||||
LEN(word_break_test), argv[0]) +
|
||||
run_unit_tests(unit_test_callback_next_word_break,
|
||||
next_word_break, LEN(next_word_break),
|
||||
"grapheme_next_word_break", argv[0]) +
|
||||
run_unit_tests(unit_test_callback_next_word_break_utf8,
|
||||
next_word_break_utf8, LEN(next_word_break_utf8),
|
||||
"grapheme_next_word_break_utf8", argv[0]);
|
||||
}
|
Reference in New Issue
Block a user