Initial Commit
This commit is contained in:
BIN
libs/libgrapheme-2.0.2/gen/case
Executable file
BIN
libs/libgrapheme-2.0.2/gen/case
Executable file
Binary file not shown.
318
libs/libgrapheme-2.0.2/gen/case.c
Normal file
318
libs/libgrapheme-2.0.2/gen/case.c
Normal file
@@ -0,0 +1,318 @@
|
||||
/* See LICENSE file for copyright and license details. */
|
||||
#include <errno.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "util.h"
|
||||
|
||||
#define FILE_DCP "data/DerivedCoreProperties.txt"
|
||||
|
||||
static const struct property_spec case_property[] = {
|
||||
{
|
||||
.enumname = "OTHER",
|
||||
.file = NULL,
|
||||
.ucdname = NULL,
|
||||
},
|
||||
{
|
||||
.enumname = "BOTH_CASED_CASE_IGNORABLE",
|
||||
.file = NULL,
|
||||
.ucdname = NULL,
|
||||
},
|
||||
{
|
||||
.enumname = "CASED",
|
||||
.file = FILE_DCP,
|
||||
.ucdname = "Cased",
|
||||
},
|
||||
{
|
||||
.enumname = "CASE_IGNORABLE",
|
||||
.file = FILE_DCP,
|
||||
.ucdname = "Case_Ignorable",
|
||||
},
|
||||
{
|
||||
.enumname = "UNCASED",
|
||||
.file = FILE_DCP,
|
||||
.ucdname = "Uncased",
|
||||
},
|
||||
};
|
||||
|
||||
static uint_least8_t
|
||||
handle_conflict(uint_least32_t cp, uint_least8_t prop1, uint_least8_t prop2)
|
||||
{
|
||||
uint_least8_t result;
|
||||
|
||||
(void)cp;
|
||||
|
||||
if ((!strcmp(case_property[prop1].enumname, "CASED") &&
|
||||
!strcmp(case_property[prop2].enumname, "CASE_IGNORABLE")) ||
|
||||
(!strcmp(case_property[prop1].enumname, "CASE_IGNORABLE") &&
|
||||
!strcmp(case_property[prop2].enumname, "CASED"))) {
|
||||
for (result = 0; result < LEN(case_property); result++) {
|
||||
if (!strcmp(case_property[result].enumname,
|
||||
"BOTH_CASED_CASE_IGNORABLE")) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (result == LEN(case_property)) {
|
||||
fprintf(stderr, "handle_conflict: Internal error.\n");
|
||||
exit(1);
|
||||
}
|
||||
} else {
|
||||
fprintf(stderr, "handle_conflict: Cannot handle conflict.\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static struct properties *prop_upper = NULL, *prop_lower, *prop_title;
|
||||
static struct special_case {
|
||||
struct {
|
||||
uint_least32_t *cp;
|
||||
size_t cplen;
|
||||
} upper, lower, title;
|
||||
} *sc = NULL;
|
||||
static size_t sclen = 0;
|
||||
|
||||
static int
|
||||
unicodedata_callback(const char *file, char **field, size_t nfields,
|
||||
char *comment, void *payload)
|
||||
{
|
||||
uint_least32_t cp, upper, lower, title;
|
||||
|
||||
(void)file;
|
||||
(void)comment;
|
||||
(void)payload;
|
||||
|
||||
hextocp(field[0], strlen(field[0]), &cp);
|
||||
|
||||
upper = lower = title = cp;
|
||||
|
||||
if ((strlen(field[12]) > 0 && hextocp(field[12], strlen(field[12]), &upper)) ||
|
||||
(strlen(field[13]) > 0 && hextocp(field[13], strlen(field[13]), &lower)) ||
|
||||
(nfields >= 15 && strlen(field[14]) > 0 && hextocp(field[14], strlen(field[14]), &title))) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
prop_upper[cp].property = (int_least32_t)upper - (int_least32_t)cp;
|
||||
prop_lower[cp].property = (int_least32_t)lower - (int_least32_t)cp;
|
||||
prop_title[cp].property = (int_least32_t)title - (int_least32_t)cp;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
parse_cp_list(const char *str, uint_least32_t **cp, size_t *cplen)
|
||||
{
|
||||
size_t count, i;
|
||||
const char *tmp1 = NULL, *tmp2 = NULL;
|
||||
|
||||
/* count the number of spaces in the string and infer list length */
|
||||
for (count = 1, tmp1 = str; (tmp2 = strchr(tmp1, ' ')) != NULL; count++, tmp1 = tmp2 + 1)
|
||||
;
|
||||
|
||||
/* allocate resources */
|
||||
if (!(*cp = calloc((*cplen = count), sizeof(**cp)))) {
|
||||
fprintf(stderr, "calloc: %s\n", strerror(errno));
|
||||
exit(1);
|
||||
}
|
||||
|
||||
/* go through the string again, parsing the numbers */
|
||||
for (i = 0, tmp1 = tmp2 = str; tmp2 != NULL; i++) {
|
||||
tmp2 = strchr(tmp1, ' ');
|
||||
if (hextocp(tmp1, tmp2 ? (size_t)(tmp2 - tmp1) : strlen(tmp1), &((*cp)[i]))) {
|
||||
return 1;
|
||||
}
|
||||
if (tmp2 != NULL) {
|
||||
tmp1 = tmp2 + 1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
specialcasing_callback(const char *file, char **field, size_t nfields,
|
||||
char *comment, void *payload)
|
||||
{
|
||||
uint_least32_t cp;
|
||||
|
||||
(void)file;
|
||||
(void)comment;
|
||||
(void)payload;
|
||||
|
||||
if (nfields > 4 && strlen(field[4]) > 0) {
|
||||
/*
|
||||
* we have more than 4 fields, i.e. the rule has a
|
||||
* condition (language-sensitive, etc.) and is discarded
|
||||
*/
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* parse affected codepoint */
|
||||
hextocp(field[0], strlen(field[0]), &cp);
|
||||
|
||||
/* extend special case array */
|
||||
if (!(sc = realloc(sc, (++sclen) * sizeof(*sc)))) {
|
||||
fprintf(stderr, "realloc: %s\n", strerror(errno));
|
||||
exit(1);
|
||||
}
|
||||
|
||||
/* parse field data */
|
||||
parse_cp_list(field[3], &(sc[sclen - 1].upper.cp),
|
||||
&(sc[sclen - 1].upper.cplen));
|
||||
parse_cp_list(field[1], &(sc[sclen - 1].lower.cp),
|
||||
&(sc[sclen - 1].lower.cplen));
|
||||
parse_cp_list(field[2], &(sc[sclen - 1].title.cp),
|
||||
&(sc[sclen - 1].title.cplen));
|
||||
|
||||
/*
|
||||
* overwrite value in "single mapping" property table by the
|
||||
* special value 0x110000 + (offset in special case array),
|
||||
* even if the special case has length 1
|
||||
*/
|
||||
prop_upper[cp].property = (int_least64_t)(UINT32_C(0x110000) + (sclen - 1));
|
||||
prop_lower[cp].property = (int_least64_t)(UINT32_C(0x110000) + (sclen - 1));
|
||||
prop_title[cp].property = (int_least64_t)(UINT32_C(0x110000) + (sclen - 1));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int_least64_t
|
||||
get_value(const struct properties *prop, size_t offset)
|
||||
{
|
||||
return prop[offset].property;
|
||||
}
|
||||
|
||||
int
|
||||
main(int argc, char *argv[])
|
||||
{
|
||||
struct properties_compressed comp_upper, comp_lower, comp_title;
|
||||
struct properties_major_minor mm_upper, mm_lower, mm_title;
|
||||
size_t i, j;
|
||||
|
||||
(void)argc;
|
||||
|
||||
/* generate case property table from the specification */
|
||||
properties_generate_break_property(case_property,
|
||||
LEN(case_property),
|
||||
handle_conflict, NULL, "case",
|
||||
argv[0]);
|
||||
|
||||
/*
|
||||
* allocate property buffers for all 0x110000 codepoints
|
||||
*
|
||||
* the buffers contain the offset from the "base" character
|
||||
* to the respective case mapping. By callocing we set all fields
|
||||
* to zero, which is also the Unicode "default" in the sense that
|
||||
* there is no case mapping by default (unless we fill it in)
|
||||
*/
|
||||
if (!(prop_upper = calloc(UINT32_C(0x110000), sizeof(*prop_upper))) ||
|
||||
!(prop_lower = calloc(UINT32_C(0x110000), sizeof(*prop_lower))) ||
|
||||
!(prop_title = calloc(UINT32_C(0x110000), sizeof(*prop_title)))) {
|
||||
fprintf(stderr, "calloc: %s\n", strerror(errno));
|
||||
exit(1);
|
||||
}
|
||||
parse_file_with_callback("data/UnicodeData.txt", unicodedata_callback,
|
||||
NULL);
|
||||
parse_file_with_callback("data/SpecialCasing.txt", specialcasing_callback,
|
||||
NULL);
|
||||
|
||||
/* compress properties */
|
||||
properties_compress(prop_upper, &comp_upper);
|
||||
properties_compress(prop_lower, &comp_lower);
|
||||
properties_compress(prop_title, &comp_title);
|
||||
|
||||
fprintf(stderr, "%s: LUT compression-ratios: upper=%.2f%%, lower=%.2f%%, title=%.2f%%\n",
|
||||
argv[0], properties_get_major_minor(&comp_upper, &mm_upper),
|
||||
properties_get_major_minor(&comp_lower, &mm_lower),
|
||||
properties_get_major_minor(&comp_title, &mm_title));
|
||||
|
||||
/* print tables */
|
||||
printf("/* Automatically generated by %s */\n#include <stdint.h>\n#include <stddef.h>\n\n", argv[0]);
|
||||
|
||||
printf("struct special_case {\n\tuint_least32_t *cp;\n\tsize_t cplen;\n};\n\n");
|
||||
|
||||
properties_print_lookup_table("upper_major", mm_upper.major, 0x1100);
|
||||
printf("\n");
|
||||
properties_print_derived_lookup_table("upper_minor", "int_least32_t", mm_upper.minor,
|
||||
mm_upper.minorlen, get_value, comp_upper.data);
|
||||
printf("\n");
|
||||
properties_print_lookup_table("lower_major", mm_lower.major, 0x1100);
|
||||
printf("\n");
|
||||
properties_print_derived_lookup_table("lower_minor", "int_least32_t", mm_lower.minor,
|
||||
mm_lower.minorlen, get_value, comp_lower.data);
|
||||
printf("\n");
|
||||
properties_print_lookup_table("title_major", mm_title.major, 0x1100);
|
||||
printf("\n");
|
||||
properties_print_derived_lookup_table("title_minor", "int_least32_t", mm_title.minor,
|
||||
mm_title.minorlen, get_value, comp_title.data);
|
||||
printf("\n");
|
||||
|
||||
printf("static const struct special_case upper_special[] = {\n");
|
||||
for (i = 0; i < sclen; i++) {
|
||||
printf("\t{\n");
|
||||
|
||||
printf("\t\t.cp = (uint_least32_t[]){");
|
||||
for (j = 0; j < sc[i].upper.cplen; j++) {
|
||||
printf(" UINT32_C(0x%06X)", sc[i].upper.cp[j]);
|
||||
if (j + 1 < sc[i].upper.cplen) {
|
||||
putchar(',');
|
||||
}
|
||||
}
|
||||
printf(" },\n");
|
||||
printf("\t\t.cplen = %zu,\n", sc[i].upper.cplen);
|
||||
printf("\t},\n");
|
||||
}
|
||||
printf("};\n\n");
|
||||
|
||||
printf("static const struct special_case lower_special[] = {\n");
|
||||
for (i = 0; i < sclen; i++) {
|
||||
printf("\t{\n");
|
||||
|
||||
printf("\t\t.cp = (uint_least32_t[]){");
|
||||
for (j = 0; j < sc[i].lower.cplen; j++) {
|
||||
printf(" UINT32_C(0x%06X)", sc[i].lower.cp[j]);
|
||||
if (j + 1 < sc[i].lower.cplen) {
|
||||
putchar(',');
|
||||
}
|
||||
}
|
||||
printf(" },\n");
|
||||
printf("\t\t.cplen = %zu,\n", sc[i].lower.cplen);
|
||||
printf("\t},\n");
|
||||
}
|
||||
printf("};\n\n");
|
||||
|
||||
printf("static const struct special_case title_special[] = {\n");
|
||||
for (i = 0; i < sclen; i++) {
|
||||
printf("\t{\n");
|
||||
|
||||
printf("\t\t.cp = (uint_least32_t[]){");
|
||||
for (j = 0; j < sc[i].title.cplen; j++) {
|
||||
printf(" UINT32_C(0x%06X)", sc[i].title.cp[j]);
|
||||
if (j + 1 < sc[i].title.cplen) {
|
||||
putchar(',');
|
||||
}
|
||||
}
|
||||
printf(" },\n");
|
||||
printf("\t\t.cplen = %zu,\n", sc[i].title.cplen);
|
||||
printf("\t},\n");
|
||||
}
|
||||
printf("};\n\n");
|
||||
|
||||
free(comp_lower.data);
|
||||
free(comp_lower.offset);
|
||||
free(comp_title.data);
|
||||
free(comp_title.offset);
|
||||
free(comp_upper.data);
|
||||
free(comp_upper.offset);
|
||||
free(mm_lower.major);
|
||||
free(mm_lower.minor);
|
||||
free(mm_title.major);
|
||||
free(mm_title.minor);
|
||||
free(mm_upper.major);
|
||||
free(mm_upper.minor);
|
||||
|
||||
return 0;
|
||||
}
|
9033
libs/libgrapheme-2.0.2/gen/case.h
Normal file
9033
libs/libgrapheme-2.0.2/gen/case.h
Normal file
File diff suppressed because it is too large
Load Diff
BIN
libs/libgrapheme-2.0.2/gen/case.o
Normal file
BIN
libs/libgrapheme-2.0.2/gen/case.o
Normal file
Binary file not shown.
BIN
libs/libgrapheme-2.0.2/gen/character
Executable file
BIN
libs/libgrapheme-2.0.2/gen/character
Executable file
Binary file not shown.
19
libs/libgrapheme-2.0.2/gen/character-test.c
Normal file
19
libs/libgrapheme-2.0.2/gen/character-test.c
Normal file
@@ -0,0 +1,19 @@
|
||||
/* See LICENSE file for copyright and license details. */
|
||||
#include <stddef.h>
|
||||
|
||||
#include "util.h"
|
||||
|
||||
int
|
||||
main(int argc, char *argv[])
|
||||
{
|
||||
struct break_test *test = NULL;
|
||||
size_t testlen = 0;
|
||||
|
||||
(void)argc;
|
||||
|
||||
break_test_list_parse("data/GraphemeBreakTest.txt", &test, &testlen);
|
||||
break_test_list_print(test, testlen, "character_break_test", argv[0]);
|
||||
break_test_list_free(test, testlen);
|
||||
|
||||
return 0;
|
||||
}
|
97
libs/libgrapheme-2.0.2/gen/character.c
Normal file
97
libs/libgrapheme-2.0.2/gen/character.c
Normal file
@@ -0,0 +1,97 @@
|
||||
/* See LICENSE file for copyright and license details. */
|
||||
#include <stddef.h>
|
||||
|
||||
#include "util.h"
|
||||
|
||||
#define FILE_EMOJI "data/emoji-data.txt"
|
||||
#define FILE_GRAPHEME "data/GraphemeBreakProperty.txt"
|
||||
|
||||
static const struct property_spec char_break_property[] = {
|
||||
{
|
||||
.enumname = "OTHER",
|
||||
.file = NULL,
|
||||
.ucdname = NULL,
|
||||
},
|
||||
{
|
||||
.enumname = "CONTROL",
|
||||
.file = FILE_GRAPHEME,
|
||||
.ucdname = "Control",
|
||||
},
|
||||
{
|
||||
.enumname = "CR",
|
||||
.file = FILE_GRAPHEME,
|
||||
.ucdname = "CR",
|
||||
},
|
||||
{
|
||||
.enumname = "EXTEND",
|
||||
.file = FILE_GRAPHEME,
|
||||
.ucdname = "Extend",
|
||||
},
|
||||
{
|
||||
.enumname = "EXTENDED_PICTOGRAPHIC",
|
||||
.file = FILE_EMOJI,
|
||||
.ucdname = "Extended_Pictographic",
|
||||
},
|
||||
{
|
||||
.enumname = "HANGUL_L",
|
||||
.file = FILE_GRAPHEME,
|
||||
.ucdname = "L",
|
||||
},
|
||||
{
|
||||
.enumname = "HANGUL_V",
|
||||
.file = FILE_GRAPHEME,
|
||||
.ucdname = "V",
|
||||
},
|
||||
{
|
||||
.enumname = "HANGUL_T",
|
||||
.file = FILE_GRAPHEME,
|
||||
.ucdname = "T",
|
||||
},
|
||||
{
|
||||
.enumname = "HANGUL_LV",
|
||||
.file = FILE_GRAPHEME,
|
||||
.ucdname = "LV",
|
||||
},
|
||||
{
|
||||
.enumname = "HANGUL_LVT",
|
||||
.file = FILE_GRAPHEME,
|
||||
.ucdname = "LVT",
|
||||
},
|
||||
{
|
||||
.enumname = "LF",
|
||||
.file = FILE_GRAPHEME,
|
||||
.ucdname = "LF",
|
||||
},
|
||||
{
|
||||
.enumname = "PREPEND",
|
||||
.file = FILE_GRAPHEME,
|
||||
.ucdname = "Prepend",
|
||||
},
|
||||
{
|
||||
.enumname = "REGIONAL_INDICATOR",
|
||||
.file = FILE_GRAPHEME,
|
||||
.ucdname = "Regional_Indicator",
|
||||
},
|
||||
{
|
||||
.enumname = "SPACINGMARK",
|
||||
.file = FILE_GRAPHEME,
|
||||
.ucdname = "SpacingMark",
|
||||
},
|
||||
{
|
||||
.enumname = "ZWJ",
|
||||
.file = FILE_GRAPHEME,
|
||||
.ucdname = "ZWJ",
|
||||
},
|
||||
};
|
||||
|
||||
int
|
||||
main(int argc, char *argv[])
|
||||
{
|
||||
(void)argc;
|
||||
|
||||
properties_generate_break_property(char_break_property,
|
||||
LEN(char_break_property),
|
||||
NULL, NULL, "char_break", argv[0]);
|
||||
|
||||
return 0;
|
||||
}
|
3547
libs/libgrapheme-2.0.2/gen/character.h
Normal file
3547
libs/libgrapheme-2.0.2/gen/character.h
Normal file
File diff suppressed because it is too large
Load Diff
BIN
libs/libgrapheme-2.0.2/gen/character.o
Normal file
BIN
libs/libgrapheme-2.0.2/gen/character.o
Normal file
Binary file not shown.
BIN
libs/libgrapheme-2.0.2/gen/line
Executable file
BIN
libs/libgrapheme-2.0.2/gen/line
Executable file
Binary file not shown.
19
libs/libgrapheme-2.0.2/gen/line-test.c
Normal file
19
libs/libgrapheme-2.0.2/gen/line-test.c
Normal file
@@ -0,0 +1,19 @@
|
||||
/* See LICENSE file for copyright and license details. */
|
||||
#include <stddef.h>
|
||||
|
||||
#include "util.h"
|
||||
|
||||
int
|
||||
main(int argc, char *argv[])
|
||||
{
|
||||
struct break_test *test = NULL;
|
||||
size_t testlen = 0;
|
||||
|
||||
(void)argc;
|
||||
|
||||
break_test_list_parse("data/LineBreakTest.txt", &test, &testlen);
|
||||
break_test_list_print(test, testlen, "line_break_test", argv[0]);
|
||||
break_test_list_free(test, testlen);
|
||||
|
||||
return 0;
|
||||
}
|
456
libs/libgrapheme-2.0.2/gen/line.c
Normal file
456
libs/libgrapheme-2.0.2/gen/line.c
Normal file
@@ -0,0 +1,456 @@
|
||||
/* See LICENSE file for copyright and license details. */
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "util.h"
|
||||
|
||||
#define FILE_EAW "data/EastAsianWidth.txt"
|
||||
#define FILE_EMOJI "data/emoji-data.txt"
|
||||
#define FILE_LINE "data/LineBreak.txt"
|
||||
|
||||
static const struct property_spec line_break_property[] = {
|
||||
{
|
||||
.enumname = "AL",
|
||||
.file = FILE_LINE,
|
||||
.ucdname = "AL",
|
||||
},
|
||||
/*
|
||||
* Both extended pictographic and cn are large classes,
|
||||
* but we are only interested in their intersection for LB30b,
|
||||
* so we have the following two temporary classes. At first
|
||||
* the extpict-class is filled, then the cn-class, which leads
|
||||
* to conflicts (that we handle by putting them in the "proper"
|
||||
* class BOTH_CN_EXTPICT). We make use of the fact that there
|
||||
* is no intersection between AL and Cn.
|
||||
*
|
||||
* Any consecutive conflicts are permitted to overwrite
|
||||
* TMP_EXTENDED_PICTOGRAPHIC and TMP_CN, because we don't need
|
||||
* them, and in the final postprocessing we "reset" all
|
||||
* remaining matches (that then didn't fit any of the other
|
||||
* classes) to the generic class AL.
|
||||
*/
|
||||
{
|
||||
.enumname = "TMP_CN",
|
||||
.file = FILE_LINE,
|
||||
.ucdname = "Cn",
|
||||
},
|
||||
{
|
||||
.enumname = "TMP_EXTENDED_PICTOGRAPHIC",
|
||||
.file = FILE_EMOJI,
|
||||
.ucdname = "Extended_Pictographic",
|
||||
},
|
||||
/* end of special block */
|
||||
{
|
||||
.enumname = "B2",
|
||||
.file = FILE_LINE,
|
||||
.ucdname = "B2",
|
||||
},
|
||||
{
|
||||
.enumname = "BA",
|
||||
.file = FILE_LINE,
|
||||
.ucdname = "BA",
|
||||
},
|
||||
{
|
||||
.enumname = "BB",
|
||||
.file = FILE_LINE,
|
||||
.ucdname = "BB",
|
||||
},
|
||||
{
|
||||
.enumname = "BK",
|
||||
.file = FILE_LINE,
|
||||
.ucdname = "BK",
|
||||
},
|
||||
{
|
||||
.enumname = "BOTH_CN_EXTPICT",
|
||||
.file = NULL,
|
||||
.ucdname = NULL,
|
||||
},
|
||||
{
|
||||
.enumname = "CB",
|
||||
.file = FILE_LINE,
|
||||
.ucdname = "CB",
|
||||
},
|
||||
{
|
||||
.enumname = "CL",
|
||||
.file = FILE_LINE,
|
||||
.ucdname = "CL",
|
||||
},
|
||||
{
|
||||
.enumname = "CM",
|
||||
.file = FILE_LINE,
|
||||
.ucdname = "CM",
|
||||
},
|
||||
{
|
||||
.enumname = "CP_WITHOUT_EAW_HWF",
|
||||
.file = FILE_LINE,
|
||||
.ucdname = "CP",
|
||||
},
|
||||
{
|
||||
.enumname = "CP_WITH_EAW_HWF",
|
||||
.file = NULL,
|
||||
.ucdname = NULL,
|
||||
},
|
||||
{
|
||||
.enumname = "CR",
|
||||
.file = FILE_LINE,
|
||||
.ucdname = "CR",
|
||||
},
|
||||
{
|
||||
.enumname = "EB",
|
||||
.file = FILE_LINE,
|
||||
.ucdname = "EB",
|
||||
},
|
||||
{
|
||||
.enumname = "EM",
|
||||
.file = FILE_LINE,
|
||||
.ucdname = "EM",
|
||||
},
|
||||
{
|
||||
.enumname = "EX",
|
||||
.file = FILE_LINE,
|
||||
.ucdname = "EX",
|
||||
},
|
||||
{
|
||||
.enumname = "GL",
|
||||
.file = FILE_LINE,
|
||||
.ucdname = "GL",
|
||||
},
|
||||
{
|
||||
.enumname = "H2",
|
||||
.file = FILE_LINE,
|
||||
.ucdname = "H2",
|
||||
},
|
||||
{
|
||||
.enumname = "H3",
|
||||
.file = FILE_LINE,
|
||||
.ucdname = "H3",
|
||||
},
|
||||
{
|
||||
.enumname = "HL",
|
||||
.file = FILE_LINE,
|
||||
.ucdname = "HL",
|
||||
},
|
||||
{
|
||||
.enumname = "HY",
|
||||
.file = FILE_LINE,
|
||||
.ucdname = "HY",
|
||||
},
|
||||
{
|
||||
.enumname = "ID",
|
||||
.file = FILE_LINE,
|
||||
.ucdname = "ID",
|
||||
},
|
||||
{
|
||||
.enumname = "IN",
|
||||
.file = FILE_LINE,
|
||||
.ucdname = "IN",
|
||||
},
|
||||
{
|
||||
.enumname = "IS",
|
||||
.file = FILE_LINE,
|
||||
.ucdname = "IS",
|
||||
},
|
||||
{
|
||||
.enumname = "JL",
|
||||
.file = FILE_LINE,
|
||||
.ucdname = "JL",
|
||||
},
|
||||
{
|
||||
.enumname = "JT",
|
||||
.file = FILE_LINE,
|
||||
.ucdname = "JT",
|
||||
},
|
||||
{
|
||||
.enumname = "JV",
|
||||
.file = FILE_LINE,
|
||||
.ucdname = "JV",
|
||||
},
|
||||
{
|
||||
.enumname = "LF",
|
||||
.file = FILE_LINE,
|
||||
.ucdname = "LF",
|
||||
},
|
||||
{
|
||||
.enumname = "NL",
|
||||
.file = FILE_LINE,
|
||||
.ucdname = "NL",
|
||||
},
|
||||
{
|
||||
.enumname = "NS",
|
||||
.file = FILE_LINE,
|
||||
.ucdname = "NS",
|
||||
},
|
||||
{
|
||||
.enumname = "NU",
|
||||
.file = FILE_LINE,
|
||||
.ucdname = "NU",
|
||||
},
|
||||
{
|
||||
.enumname = "OP_WITHOUT_EAW_HWF",
|
||||
.file = FILE_LINE,
|
||||
.ucdname = "OP",
|
||||
},
|
||||
{
|
||||
.enumname = "OP_WITH_EAW_HWF",
|
||||
.file = NULL,
|
||||
.ucdname = NULL,
|
||||
},
|
||||
{
|
||||
.enumname = "PO",
|
||||
.file = FILE_LINE,
|
||||
.ucdname = "PO",
|
||||
},
|
||||
{
|
||||
.enumname = "PR",
|
||||
.file = FILE_LINE,
|
||||
.ucdname = "PR",
|
||||
},
|
||||
{
|
||||
.enumname = "QU",
|
||||
.file = FILE_LINE,
|
||||
.ucdname = "QU",
|
||||
},
|
||||
{
|
||||
.enumname = "RI",
|
||||
.file = FILE_LINE,
|
||||
.ucdname = "RI",
|
||||
},
|
||||
{
|
||||
.enumname = "SP",
|
||||
.file = FILE_LINE,
|
||||
.ucdname = "SP",
|
||||
},
|
||||
{
|
||||
.enumname = "SY",
|
||||
.file = FILE_LINE,
|
||||
.ucdname = "SY",
|
||||
},
|
||||
{
|
||||
.enumname = "WJ",
|
||||
.file = FILE_LINE,
|
||||
.ucdname = "WJ",
|
||||
},
|
||||
{
|
||||
.enumname = "ZW",
|
||||
.file = FILE_LINE,
|
||||
.ucdname = "ZW",
|
||||
},
|
||||
{
|
||||
.enumname = "ZWJ",
|
||||
.file = FILE_LINE,
|
||||
.ucdname = "ZWJ",
|
||||
},
|
||||
{
|
||||
.enumname = "TMP_AI",
|
||||
.file = FILE_LINE,
|
||||
.ucdname = "AI",
|
||||
},
|
||||
{
|
||||
.enumname = "TMP_CJ",
|
||||
.file = FILE_LINE,
|
||||
.ucdname = "CJ",
|
||||
},
|
||||
{
|
||||
.enumname = "TMP_XX",
|
||||
.file = NULL,
|
||||
.ucdname = NULL,
|
||||
},
|
||||
{
|
||||
.enumname = "TMP_MN",
|
||||
.file = FILE_LINE,
|
||||
.ucdname = "Mn",
|
||||
},
|
||||
{
|
||||
.enumname = "TMP_MC",
|
||||
.file = FILE_LINE,
|
||||
.ucdname = "Mc",
|
||||
},
|
||||
{
|
||||
.enumname = "TMP_SA_WITHOUT_MN_OR_MC",
|
||||
.file = FILE_LINE,
|
||||
.ucdname = "SA",
|
||||
},
|
||||
{
|
||||
.enumname = "TMP_SA_WITH_MN_OR_MC",
|
||||
.file = FILE_LINE,
|
||||
.ucdname = "SA",
|
||||
},
|
||||
{
|
||||
.enumname = "TMP_SG",
|
||||
.file = FILE_LINE,
|
||||
.ucdname = "SG",
|
||||
},
|
||||
{
|
||||
.enumname = "TMP_EAW_H",
|
||||
.file = FILE_EAW,
|
||||
.ucdname = "H",
|
||||
},
|
||||
{
|
||||
.enumname = "TMP_EAW_W",
|
||||
.file = FILE_EAW,
|
||||
.ucdname = "W",
|
||||
},
|
||||
{
|
||||
.enumname = "TMP_EAW_F",
|
||||
.file = FILE_EAW,
|
||||
.ucdname = "F",
|
||||
},
|
||||
};
|
||||
|
||||
static uint_least8_t
|
||||
handle_conflict(uint_least32_t cp, uint_least8_t prop1, uint_least8_t prop2)
|
||||
{
|
||||
uint_least8_t result = prop2;
|
||||
char *target = NULL;
|
||||
|
||||
(void)cp;
|
||||
|
||||
if ((!strcmp(line_break_property[prop1].enumname, "TMP_EAW_H") ||
|
||||
!strcmp(line_break_property[prop1].enumname, "TMP_EAW_W") ||
|
||||
!strcmp(line_break_property[prop1].enumname, "TMP_EAW_F")) ||
|
||||
(!strcmp(line_break_property[prop2].enumname, "TMP_EAW_H") ||
|
||||
!strcmp(line_break_property[prop2].enumname, "TMP_EAW_W") ||
|
||||
!strcmp(line_break_property[prop2].enumname, "TMP_EAW_F"))) {
|
||||
if (!strcmp(line_break_property[prop1].enumname, "CP_WITHOUT_EAW_HWF") ||
|
||||
!strcmp(line_break_property[prop2].enumname, "CP_WITHOUT_EAW_HWF")) {
|
||||
target = "CP_WITH_EAW_HWF";
|
||||
} else if (!strcmp(line_break_property[prop1].enumname, "OP_WITHOUT_EAW_HWF") ||
|
||||
!strcmp(line_break_property[prop2].enumname, "OP_WITHOUT_EAW_HWF")) {
|
||||
target = "OP_WITH_EAW_HWF";
|
||||
} else {
|
||||
/* ignore EAW for the rest */
|
||||
if ((!strcmp(line_break_property[prop1].enumname, "TMP_EAW_H") ||
|
||||
!strcmp(line_break_property[prop1].enumname, "TMP_EAW_W") ||
|
||||
!strcmp(line_break_property[prop1].enumname, "TMP_EAW_F"))) {
|
||||
result = prop2;
|
||||
} else {
|
||||
result = prop1;
|
||||
}
|
||||
}
|
||||
} else if ((!strcmp(line_break_property[prop1].enumname, "TMP_MN") ||
|
||||
!strcmp(line_break_property[prop1].enumname, "TMP_MC")) ||
|
||||
(!strcmp(line_break_property[prop2].enumname, "TMP_MN") ||
|
||||
!strcmp(line_break_property[prop2].enumname, "TMP_MC"))) {
|
||||
if (!strcmp(line_break_property[prop1].enumname, "SA_WITHOUT_MN_OR_MC") ||
|
||||
!strcmp(line_break_property[prop2].enumname, "SA_WITHOUT_MN_OR_MC")) {
|
||||
target = "SA_WITH_MN_OR_MC";
|
||||
} else {
|
||||
/* ignore Mn and Mc for the rest */
|
||||
if ((!strcmp(line_break_property[prop1].enumname, "TMP_MN") ||
|
||||
!strcmp(line_break_property[prop1].enumname, "TMP_MC"))) {
|
||||
result = prop2;
|
||||
} else {
|
||||
result = prop1;
|
||||
}
|
||||
}
|
||||
} else if (!strcmp(line_break_property[prop1].enumname, "TMP_CN") ||
|
||||
!strcmp(line_break_property[prop2].enumname, "TMP_CN")) {
|
||||
if (!strcmp(line_break_property[prop1].enumname, "TMP_EXTENDED_PICTOGRAPHIC") ||
|
||||
!strcmp(line_break_property[prop2].enumname, "TMP_EXTENDED_PICTOGRAPHIC")) {
|
||||
target = "BOTH_CN_EXTPICT";
|
||||
} else {
|
||||
/* ignore Cn for all the other properties */
|
||||
if (!strcmp(line_break_property[prop1].enumname, "TMP_CN")) {
|
||||
result = prop2;
|
||||
} else {
|
||||
result = prop1;
|
||||
}
|
||||
}
|
||||
} else if (!strcmp(line_break_property[prop1].enumname, "TMP_EXTENDED_PICTOGRAPHIC") ||
|
||||
!strcmp(line_break_property[prop2].enumname, "TMP_EXTENDED_PICTOGRAPHIC")) {
|
||||
if (!strcmp(line_break_property[prop1].enumname, "TMP_CN") ||
|
||||
!strcmp(line_break_property[prop2].enumname, "TMP_CN")) {
|
||||
target = "BOTH_CN_EXTPICT";
|
||||
} else {
|
||||
/* ignore Extended_Pictographic for all the other properties */
|
||||
if (!strcmp(line_break_property[prop1].enumname, "TMP_EXTENDED_PICTOGRAPHIC")) {
|
||||
result = prop2;
|
||||
} else {
|
||||
result = prop1;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
fprintf(stderr, "handle_conflict: Cannot handle conflict %s <- %s.\n",
|
||||
line_break_property[prop1].enumname, line_break_property[prop2].enumname);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (target) {
|
||||
for (result = 0; result < LEN(line_break_property); result++) {
|
||||
if (!strcmp(line_break_property[result].enumname,
|
||||
target)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (result == LEN(line_break_property)) {
|
||||
fprintf(stderr, "handle_conflict: Internal error.\n");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static uint_least8_t
|
||||
post_process(uint_least8_t prop)
|
||||
{
|
||||
const char *target = NULL;
|
||||
uint_least8_t result;
|
||||
|
||||
/* LB1 */
|
||||
if (!strcmp(line_break_property[prop].enumname, "TMP_AI") ||
|
||||
!strcmp(line_break_property[prop].enumname, "TMP_SG") ||
|
||||
!strcmp(line_break_property[prop].enumname, "TMP_XX")) {
|
||||
/* map AI, SG and XX to AL */
|
||||
target = "AL";
|
||||
} else if (!strcmp(line_break_property[prop].enumname, "TMP_SA_WITH_MN_OR_MC")) {
|
||||
/* map SA (with General_Category Mn or Mc) to CM */
|
||||
target = "CM";
|
||||
} else if (!strcmp(line_break_property[prop].enumname, "TMP_SA_WITHOUT_MN_OR_MC")) {
|
||||
/* map SA (without General_Category Mn or Mc) to AL */
|
||||
target = "AL";
|
||||
} else if (!strcmp(line_break_property[prop].enumname, "TMP_CJ")) {
|
||||
/* map CJ to NS */
|
||||
target = "NS";
|
||||
} else if (!strcmp(line_break_property[prop].enumname, "TMP_CN") ||
|
||||
!strcmp(line_break_property[prop].enumname, "TMP_EXTENDED_PICTOGRAPHIC") ||
|
||||
!strcmp(line_break_property[prop].enumname, "TMP_MN") ||
|
||||
!strcmp(line_break_property[prop].enumname, "TMP_MC") ||
|
||||
!strcmp(line_break_property[prop].enumname, "TMP_EAW_H") ||
|
||||
!strcmp(line_break_property[prop].enumname, "TMP_EAW_W") ||
|
||||
!strcmp(line_break_property[prop].enumname, "TMP_EAW_F")) {
|
||||
/* map all the temporary classes "residue" to AL */
|
||||
target = "AL";
|
||||
}
|
||||
|
||||
if (target) {
|
||||
for (result = 0; result < LEN(line_break_property); result++) {
|
||||
if (!strcmp(line_break_property[result].enumname,
|
||||
target)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (result == LEN(line_break_property)) {
|
||||
fprintf(stderr, "handle_conflict: Internal error.\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
return result;
|
||||
} else {
|
||||
return prop;
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
main(int argc, char *argv[])
|
||||
{
|
||||
(void)argc;
|
||||
|
||||
properties_generate_break_property(line_break_property,
|
||||
LEN(line_break_property),
|
||||
handle_conflict, post_process,
|
||||
"line_break", argv[0]);
|
||||
|
||||
return 0;
|
||||
}
|
4738
libs/libgrapheme-2.0.2/gen/line.h
Normal file
4738
libs/libgrapheme-2.0.2/gen/line.h
Normal file
File diff suppressed because it is too large
Load Diff
BIN
libs/libgrapheme-2.0.2/gen/line.o
Normal file
BIN
libs/libgrapheme-2.0.2/gen/line.o
Normal file
Binary file not shown.
BIN
libs/libgrapheme-2.0.2/gen/sentence
Executable file
BIN
libs/libgrapheme-2.0.2/gen/sentence
Executable file
Binary file not shown.
19
libs/libgrapheme-2.0.2/gen/sentence-test.c
Normal file
19
libs/libgrapheme-2.0.2/gen/sentence-test.c
Normal file
@@ -0,0 +1,19 @@
|
||||
/* See LICENSE file for copyright and license details. */
|
||||
#include <stddef.h>
|
||||
|
||||
#include "util.h"
|
||||
|
||||
int
|
||||
main(int argc, char *argv[])
|
||||
{
|
||||
struct break_test *test = NULL;
|
||||
size_t testlen = 0;
|
||||
|
||||
(void)argc;
|
||||
|
||||
break_test_list_parse("data/SentenceBreakTest.txt", &test, &testlen);
|
||||
break_test_list_print(test, testlen, "sentence_break_test", argv[0]);
|
||||
break_test_list_free(test, testlen);
|
||||
|
||||
return 0;
|
||||
}
|
94
libs/libgrapheme-2.0.2/gen/sentence.c
Normal file
94
libs/libgrapheme-2.0.2/gen/sentence.c
Normal file
@@ -0,0 +1,94 @@
|
||||
/* See LICENSE file for copyright and license details. */
|
||||
#include "util.h"
|
||||
|
||||
#define FILE_SENTENCE "data/SentenceBreakProperty.txt"
|
||||
|
||||
static const struct property_spec sentence_break_property[] = {
|
||||
{
|
||||
.enumname = "OTHER",
|
||||
.file = NULL,
|
||||
.ucdname = NULL,
|
||||
},
|
||||
{
|
||||
.enumname = "CR",
|
||||
.file = FILE_SENTENCE,
|
||||
.ucdname = "CR",
|
||||
},
|
||||
{
|
||||
.enumname = "LF",
|
||||
.file = FILE_SENTENCE,
|
||||
.ucdname = "LF",
|
||||
},
|
||||
{
|
||||
.enumname = "EXTEND",
|
||||
.file = FILE_SENTENCE,
|
||||
.ucdname = "Extend",
|
||||
},
|
||||
{
|
||||
.enumname = "SEP",
|
||||
.file = FILE_SENTENCE,
|
||||
.ucdname = "Sep",
|
||||
},
|
||||
{
|
||||
.enumname = "FORMAT",
|
||||
.file = FILE_SENTENCE,
|
||||
.ucdname = "Format",
|
||||
},
|
||||
{
|
||||
.enumname = "SP",
|
||||
.file = FILE_SENTENCE,
|
||||
.ucdname = "Sp",
|
||||
},
|
||||
{
|
||||
.enumname = "LOWER",
|
||||
.file = FILE_SENTENCE,
|
||||
.ucdname = "Lower",
|
||||
},
|
||||
{
|
||||
.enumname = "UPPER",
|
||||
.file = FILE_SENTENCE,
|
||||
.ucdname = "Upper",
|
||||
},
|
||||
{
|
||||
.enumname = "OLETTER",
|
||||
.file = FILE_SENTENCE,
|
||||
.ucdname = "OLetter",
|
||||
},
|
||||
{
|
||||
.enumname = "NUMERIC",
|
||||
.file = FILE_SENTENCE,
|
||||
.ucdname = "Numeric",
|
||||
},
|
||||
{
|
||||
.enumname = "ATERM",
|
||||
.file = FILE_SENTENCE,
|
||||
.ucdname = "ATerm",
|
||||
},
|
||||
{
|
||||
.enumname = "SCONTINUE",
|
||||
.file = FILE_SENTENCE,
|
||||
.ucdname = "SContinue",
|
||||
},
|
||||
{
|
||||
.enumname = "STERM",
|
||||
.file = FILE_SENTENCE,
|
||||
.ucdname = "STerm",
|
||||
},
|
||||
{
|
||||
.enumname = "CLOSE",
|
||||
.file = FILE_SENTENCE,
|
||||
.ucdname = "Close",
|
||||
},
|
||||
};
|
||||
|
||||
int
|
||||
main(int argc, char *argv[])
|
||||
{
|
||||
(void)argc;
|
||||
|
||||
properties_generate_break_property(sentence_break_property,
|
||||
LEN(sentence_break_property),
|
||||
NULL, NULL, "sentence_break", argv[0]);
|
||||
|
||||
return 0;
|
||||
}
|
4731
libs/libgrapheme-2.0.2/gen/sentence.h
Normal file
4731
libs/libgrapheme-2.0.2/gen/sentence.h
Normal file
File diff suppressed because it is too large
Load Diff
BIN
libs/libgrapheme-2.0.2/gen/sentence.o
Normal file
BIN
libs/libgrapheme-2.0.2/gen/sentence.o
Normal file
Binary file not shown.
16
libs/libgrapheme-2.0.2/gen/types.h
Normal file
16
libs/libgrapheme-2.0.2/gen/types.h
Normal file
@@ -0,0 +1,16 @@
|
||||
/* See LICENSE file for copyright and license details. */
|
||||
#ifndef TYPES_H
|
||||
#define TYPES_H
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
struct break_test {
|
||||
uint_least32_t *cp;
|
||||
size_t cplen;
|
||||
size_t *len;
|
||||
size_t lenlen;
|
||||
char *descr;
|
||||
};
|
||||
|
||||
#endif /* TYPES_H */
|
704
libs/libgrapheme-2.0.2/gen/util.c
Normal file
704
libs/libgrapheme-2.0.2/gen/util.c
Normal file
@@ -0,0 +1,704 @@
|
||||
/* See LICENSE file for copyright and license details. */
|
||||
#include <stdbool.h>
|
||||
#include <ctype.h>
|
||||
#include <errno.h>
|
||||
#include <inttypes.h>
|
||||
#include <stdbool.h>
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "util.h"
|
||||
|
||||
struct range {
|
||||
uint_least32_t lower;
|
||||
uint_least32_t upper;
|
||||
};
|
||||
|
||||
struct properties_payload {
|
||||
struct properties *prop;
|
||||
const struct property_spec *spec;
|
||||
uint_least8_t speclen;
|
||||
int (*set_value)(struct properties_payload *, uint_least32_t, int_least64_t);
|
||||
uint_least8_t (*handle_conflict)(uint_least32_t, uint_least8_t, uint_least8_t);
|
||||
};
|
||||
|
||||
struct break_test_payload
|
||||
{
|
||||
struct break_test **test;
|
||||
size_t *testlen;
|
||||
};
|
||||
|
||||
static void *
|
||||
reallocate_array(void *p, size_t len, size_t size)
|
||||
{
|
||||
if (len > 0 && size > SIZE_MAX / len) {
|
||||
errno = ENOMEM;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return realloc(p, len * size);
|
||||
}
|
||||
|
||||
int
|
||||
hextocp(const char *str, size_t len, uint_least32_t *cp)
|
||||
{
|
||||
size_t i;
|
||||
int off;
|
||||
char relative;
|
||||
|
||||
/* the maximum valid codepoint is 0x10FFFF */
|
||||
if (len > 6) {
|
||||
fprintf(stderr, "hextocp: '%.*s' is too long.\n",
|
||||
(int)len, str);
|
||||
return 1;
|
||||
}
|
||||
|
||||
for (i = 0, *cp = 0; i < len; i++) {
|
||||
if (str[i] >= '0' && str[i] <= '9') {
|
||||
relative = '0';
|
||||
off = 0;
|
||||
} else if (str[i] >= 'a' && str[i] <= 'f') {
|
||||
relative = 'a';
|
||||
off = 10;
|
||||
} else if (str[i] >= 'A' && str[i] <= 'F') {
|
||||
relative = 'A';
|
||||
off = 10;
|
||||
} else {
|
||||
fprintf(stderr, "hextocp: '%.*s' is not hexadecimal.\n",
|
||||
(int)len, str);
|
||||
return 1;
|
||||
}
|
||||
|
||||
*cp += ((uint_least32_t)1 << (4 * (len - i - 1))) *
|
||||
(uint_least32_t)(str[i] - relative + off);
|
||||
}
|
||||
|
||||
if (*cp > UINT32_C(0x10FFFF)) {
|
||||
fprintf(stderr, "hextocp: '%.*s' is too large.\n",
|
||||
(int)len, str);
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
range_parse(const char *str, struct range *range)
|
||||
{
|
||||
char *p;
|
||||
|
||||
if ((p = strstr(str, "..")) == NULL) {
|
||||
/* input has the form "XXXXXX" */
|
||||
if (hextocp(str, strlen(str), &range->lower)) {
|
||||
return 1;
|
||||
}
|
||||
range->upper = range->lower;
|
||||
} else {
|
||||
/* input has the form "XXXXXX..XXXXXX" */
|
||||
if (hextocp(str, (size_t)(p - str), &range->lower) ||
|
||||
hextocp(p + 2, strlen(p + 2), &range->upper)) {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void
|
||||
parse_file_with_callback(const char *fname, int (*callback)(const char *,
|
||||
char **, size_t, char *, void *), void *payload)
|
||||
{
|
||||
FILE *fp;
|
||||
char *line = NULL, **field = NULL, *comment;
|
||||
size_t linebufsize = 0, i, fieldbufsize = 0, j, nfields;
|
||||
ssize_t len;
|
||||
|
||||
/* open file */
|
||||
if (!(fp = fopen(fname, "r"))) {
|
||||
fprintf(stderr, "parse_file_with_callback: fopen '%s': %s.\n",
|
||||
fname, strerror(errno));
|
||||
exit(1);
|
||||
}
|
||||
|
||||
while ((len = getline(&line, &linebufsize, fp)) >= 0) {
|
||||
/* remove trailing newline */
|
||||
if (len > 0 && line[len - 1] == '\n') {
|
||||
line[len - 1] = '\0';
|
||||
len--;
|
||||
}
|
||||
|
||||
/* skip empty lines and comment lines */
|
||||
if (len == 0 || line[0] == '#') {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* tokenize line into fields */
|
||||
for (i = 0, nfields = 0, comment = NULL; i < (size_t)len; i++) {
|
||||
/* skip leading whitespace */
|
||||
while (line[i] == ' ') {
|
||||
i++;
|
||||
}
|
||||
|
||||
/* check if we crashed into the comment */
|
||||
if (line[i] != '#') {
|
||||
/* extend field buffer, if necessary */
|
||||
if (++nfields > fieldbufsize) {
|
||||
if ((field = realloc(field, nfields *
|
||||
sizeof(*field))) == NULL) {
|
||||
fprintf(stderr, "parse_file_with_"
|
||||
"callback: realloc: %s.\n",
|
||||
strerror(errno));
|
||||
exit(1);
|
||||
}
|
||||
fieldbufsize = nfields;
|
||||
}
|
||||
|
||||
/* set current position as field start */
|
||||
field[nfields - 1] = &line[i];
|
||||
|
||||
/* continue until we reach ';' or '#' or end */
|
||||
while (line[i] != ';' && line[i] != '#' &&
|
||||
line[i] != '\0') {
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
if (line[i] == '#') {
|
||||
/* set comment-variable for later */
|
||||
comment = &line[i + 1];
|
||||
}
|
||||
|
||||
/* go back whitespace and terminate field there */
|
||||
if (i > 0) {
|
||||
for (j = i - 1; line[j] == ' '; j--)
|
||||
;
|
||||
line[j + 1] = '\0';
|
||||
} else {
|
||||
line[i] = '\0';
|
||||
}
|
||||
|
||||
/* if comment is set, we are done */
|
||||
if (comment != NULL) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* skip leading whitespace in comment */
|
||||
while (comment != NULL && comment[0] == ' ') {
|
||||
comment++;
|
||||
}
|
||||
|
||||
/* call callback function */
|
||||
if (callback(fname, field, nfields, comment, payload)) {
|
||||
fprintf(stderr, "parse_file_with_callback: "
|
||||
"Malformed input.\n");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
free(line);
|
||||
free(field);
|
||||
}
|
||||
|
||||
static int
|
||||
properties_callback(const char *file, char **field, size_t nfields,
|
||||
char *comment, void *payload)
|
||||
{
|
||||
/* prop always has the length 0x110000 */
|
||||
struct properties_payload *p = (struct properties_payload *)payload;
|
||||
struct range r;
|
||||
uint_least8_t i;
|
||||
uint_least32_t cp;
|
||||
|
||||
(void)comment;
|
||||
|
||||
if (nfields < 2) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
for (i = 0; i < p->speclen; i++) {
|
||||
/* identify fitting file and identifier */
|
||||
if (p->spec[i].file &&
|
||||
!strcmp(p->spec[i].file, file) &&
|
||||
(!strcmp(p->spec[i].ucdname, field[1]) ||
|
||||
(comment != NULL && !strncmp(p->spec[i].ucdname, comment, strlen(p->spec[i].ucdname)) &&
|
||||
comment[strlen(p->spec[i].ucdname)] == ' '))) {
|
||||
/* parse range in first field */
|
||||
if (range_parse(field[0], &r)) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* apply to all codepoints in the range */
|
||||
for (cp = r.lower; cp <= r.upper; cp++) {
|
||||
if (p->set_value(payload, cp, i)) {
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void
|
||||
properties_compress(const struct properties *prop,
|
||||
struct properties_compressed *comp)
|
||||
{
|
||||
uint_least32_t cp, i;
|
||||
|
||||
/* initialization */
|
||||
if (!(comp->offset = malloc((size_t)UINT32_C(0x110000) * sizeof(*(comp->offset))))) {
|
||||
fprintf(stderr, "malloc: %s\n", strerror(errno));
|
||||
exit(1);
|
||||
}
|
||||
comp->data = NULL;
|
||||
comp->datalen = 0;
|
||||
|
||||
for (cp = 0; cp < UINT32_C(0x110000); cp++) {
|
||||
for (i = 0; i < comp->datalen; i++) {
|
||||
if (!memcmp(&(prop[cp]), &(comp->data[i]), sizeof(*prop))) {
|
||||
/* found a match! */
|
||||
comp->offset[cp] = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (i == comp->datalen) {
|
||||
/*
|
||||
* found no matching properties-struct, so
|
||||
* add current properties to data and add the
|
||||
* offset in the offset-table
|
||||
*/
|
||||
if (!(comp->data = reallocate_array(comp->data,
|
||||
++(comp->datalen),
|
||||
sizeof(*(comp->data))))) {
|
||||
fprintf(stderr, "reallocate_array: %s\n",
|
||||
strerror(errno));
|
||||
exit(1);
|
||||
}
|
||||
memcpy(&(comp->data[comp->datalen - 1]), &(prop[cp]),
|
||||
sizeof(*prop));
|
||||
comp->offset[cp] = comp->datalen - 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
double
|
||||
properties_get_major_minor(const struct properties_compressed *comp,
|
||||
struct properties_major_minor *mm)
|
||||
{
|
||||
size_t i, j, compression_count = 0;
|
||||
|
||||
/*
|
||||
* we currently have an array comp->offset which maps the
|
||||
* codepoints 0..0x110000 to offsets into comp->data.
|
||||
* To improve cache-locality instead and allow a bit of
|
||||
* compressing, instead of directly mapping a codepoint
|
||||
* 0xAAAABB with comp->offset, we generate two arrays major
|
||||
* and minor such that
|
||||
* comp->offset(0xAAAABB) == minor[major[0xAAAA] + 0xBB]
|
||||
* This yields a major-array of length 2^16 and a minor array
|
||||
* of variable length depending on how many common subsequences
|
||||
* can be filtered out.
|
||||
*/
|
||||
|
||||
/* initialize */
|
||||
if (!(mm->major = malloc((size_t)0x1100 * sizeof(*(mm->major))))) {
|
||||
fprintf(stderr, "malloc: %s\n", strerror(errno));
|
||||
exit(1);
|
||||
}
|
||||
mm->minor = NULL;
|
||||
mm->minorlen = 0;
|
||||
|
||||
for (i = 0; i < (size_t)0x1100; i++) {
|
||||
/*
|
||||
* we now look at the cp-range (i << 8)..(i << 8 + 0xFF)
|
||||
* and check if its corresponding offset-data already
|
||||
* exists in minor (because then we just point there
|
||||
* and need less storage)
|
||||
*/
|
||||
for (j = 0; j + 0xFF < mm->minorlen; j++) {
|
||||
if (!memcmp(&(comp->offset[i << 8]),
|
||||
&(mm->minor[j]),
|
||||
sizeof(*(comp->offset)) * 0x100)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (j + 0xFF < mm->minorlen) {
|
||||
/* found an index */
|
||||
compression_count++;
|
||||
mm->major[i] = j;
|
||||
} else {
|
||||
/*
|
||||
* add "new" sequence to minor and point to it
|
||||
* in major
|
||||
*/
|
||||
mm->minorlen += 0x100;
|
||||
if (!(mm->minor = reallocate_array(mm->minor,
|
||||
mm->minorlen,
|
||||
sizeof(*(mm->minor))))) {
|
||||
fprintf(stderr, "reallocate_array: %s\n",
|
||||
strerror(errno));
|
||||
exit(1);
|
||||
}
|
||||
memcpy(&(mm->minor[mm->minorlen - 0x100]),
|
||||
&(comp->offset[i << 8]),
|
||||
sizeof(*(mm->minor)) * 0x100);
|
||||
mm->major[i] = mm->minorlen - 0x100;
|
||||
}
|
||||
}
|
||||
|
||||
/* return compression ratio */
|
||||
return (double)compression_count / 0x1100 * 100;
|
||||
}
|
||||
|
||||
void
|
||||
properties_print_lookup_table(char *name, size_t *data, size_t datalen)
|
||||
{
|
||||
char *type;
|
||||
size_t i, maxval;
|
||||
|
||||
for (i = 0, maxval = 0; i < datalen; i++) {
|
||||
if (data[i] > maxval) {
|
||||
maxval = data[i];
|
||||
}
|
||||
}
|
||||
|
||||
type = (maxval <= UINT_LEAST8_MAX) ? "uint_least8_t" :
|
||||
(maxval <= UINT_LEAST16_MAX) ? "uint_least16_t" :
|
||||
(maxval <= UINT_LEAST32_MAX) ? "uint_least32_t" :
|
||||
"uint_least64_t";
|
||||
|
||||
printf("static const %s %s[] = {\n\t", type, name);
|
||||
for (i = 0; i < datalen; i++) {
|
||||
printf("%zu", data[i]);
|
||||
if (i + 1 == datalen) {
|
||||
printf("\n");
|
||||
} else if ((i + 1) % 8 != 0) {
|
||||
printf(", ");
|
||||
} else {
|
||||
printf(",\n\t");
|
||||
}
|
||||
|
||||
}
|
||||
printf("};\n");
|
||||
}
|
||||
|
||||
void
|
||||
properties_print_derived_lookup_table(char *name, char *type, size_t *offset, size_t offsetlen,
|
||||
int_least64_t (*get_value)(const struct properties *,
|
||||
size_t), const void *payload)
|
||||
{
|
||||
size_t i;
|
||||
|
||||
printf("static const %s %s[] = {\n\t", type, name);
|
||||
for (i = 0; i < offsetlen; i++) {
|
||||
printf("%"PRIiLEAST64, get_value(payload, offset[i]));
|
||||
if (i + 1 == offsetlen) {
|
||||
printf("\n");
|
||||
} else if ((i + 1) % 8 != 0) {
|
||||
printf(", ");
|
||||
} else {
|
||||
printf(",\n\t");
|
||||
}
|
||||
|
||||
}
|
||||
printf("};\n");
|
||||
}
|
||||
|
||||
static void
|
||||
properties_print_enum(const struct property_spec *spec, size_t speclen,
|
||||
const char *enumname, const char *enumprefix)
|
||||
{
|
||||
size_t i;
|
||||
|
||||
printf("enum %s {\n", enumname);
|
||||
for (i = 0; i < speclen; i++) {
|
||||
printf("\t%s_%s,\n", enumprefix, spec[i].enumname);
|
||||
}
|
||||
printf("\tNUM_%sS,\n};\n\n", enumprefix);
|
||||
}
|
||||
|
||||
static int
|
||||
set_value_bp(struct properties_payload *payload, uint_least32_t cp,
|
||||
int_least64_t value)
|
||||
{
|
||||
if (payload->prop[cp].property != 0) {
|
||||
if (payload->handle_conflict == NULL) {
|
||||
fprintf(stderr, "set_value_bp: "
|
||||
"Unhandled character break property "
|
||||
"overwrite for 0x%06X (%s <- %s).\n",
|
||||
cp, payload->spec[payload->prop[cp].
|
||||
property].enumname,
|
||||
payload->spec[value].enumname);
|
||||
return 1;
|
||||
} else {
|
||||
value = payload->handle_conflict(cp,
|
||||
(uint_least8_t)payload->prop[cp].property,
|
||||
(uint_least8_t)value);
|
||||
}
|
||||
}
|
||||
payload->prop[cp].property = value;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int_least64_t
|
||||
get_value_bp(const struct properties *prop, size_t offset)
|
||||
{
|
||||
return (uint_least8_t)prop[offset].property;
|
||||
}
|
||||
|
||||
void
|
||||
properties_generate_break_property(const struct property_spec *spec,
|
||||
uint_least8_t speclen,
|
||||
uint_least8_t (*handle_conflict)(
|
||||
uint_least32_t, uint_least8_t,
|
||||
uint_least8_t), uint_least8_t
|
||||
(*post_process)(uint_least8_t),
|
||||
const char *prefix, const char *argv0)
|
||||
{
|
||||
struct properties_compressed comp;
|
||||
struct properties_major_minor mm;
|
||||
struct properties_payload payload;
|
||||
struct properties *prop;
|
||||
size_t i, j, prefixlen = strlen(prefix);
|
||||
char buf1[64], prefix_uc[64], buf2[64], buf3[64], buf4[64];
|
||||
|
||||
/* allocate property buffer for all 0x110000 codepoints */
|
||||
if (!(prop = calloc(UINT32_C(0x110000), sizeof(*prop)))) {
|
||||
fprintf(stderr, "calloc: %s\n", strerror(errno));
|
||||
exit(1);
|
||||
}
|
||||
|
||||
/* generate data */
|
||||
payload.prop = prop;
|
||||
payload.spec = spec;
|
||||
payload.speclen = speclen;
|
||||
payload.set_value = set_value_bp;
|
||||
payload.handle_conflict = handle_conflict;
|
||||
|
||||
/* parse each file exactly once and ignore NULL-fields */
|
||||
for (i = 0; i < speclen; i++) {
|
||||
for (j = 0; j < i; j++) {
|
||||
if (spec[i].file && spec[j].file &&
|
||||
!strcmp(spec[i].file, spec[j].file)) {
|
||||
/* file has already been parsed */
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (i == j && spec[i].file) {
|
||||
/* file has not been processed yet */
|
||||
parse_file_with_callback(spec[i].file,
|
||||
properties_callback,
|
||||
&payload);
|
||||
}
|
||||
}
|
||||
|
||||
/* post-processing */
|
||||
if (post_process != NULL) {
|
||||
for (i = 0; i < UINT32_C(0x110000); i++) {
|
||||
payload.prop[i].property =
|
||||
post_process((uint_least8_t)payload.prop[i].property);
|
||||
}
|
||||
}
|
||||
|
||||
/* compress data */
|
||||
printf("/* Automatically generated by %s */\n#include <stdint.h>\n\n", argv0);
|
||||
properties_compress(prop, &comp);
|
||||
|
||||
fprintf(stderr, "%s: %s-LUT compression-ratio: %.2f%%\n", argv0,
|
||||
prefix, properties_get_major_minor(&comp, &mm));
|
||||
|
||||
/* prepare names */
|
||||
if ((size_t)snprintf(buf1, LEN(buf1), "%s_property", prefix) >= LEN(buf1)) {
|
||||
fprintf(stderr, "snprintf: String truncated.\n");
|
||||
exit(1);
|
||||
}
|
||||
if (LEN(prefix_uc) + 1 < prefixlen) {
|
||||
fprintf(stderr, "snprintf: Buffer too small.\n");
|
||||
exit(1);
|
||||
}
|
||||
for (i = 0; i < prefixlen; i++) {
|
||||
prefix_uc[i] = (char)toupper(prefix[i]);
|
||||
}
|
||||
prefix_uc[prefixlen] = '\0';
|
||||
if ((size_t)snprintf(buf2, LEN(buf2), "%s_PROP", prefix_uc) >= LEN(buf2) ||
|
||||
(size_t)snprintf(buf3, LEN(buf3), "%s_major", prefix) >= LEN(buf3) ||
|
||||
(size_t)snprintf(buf4, LEN(buf4), "%s_minor", prefix) >= LEN(buf4)) {
|
||||
fprintf(stderr, "snprintf: String truncated.\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
/* print data */
|
||||
properties_print_enum(spec, speclen, buf1, buf2);
|
||||
properties_print_lookup_table(buf3, mm.major, 0x1100);
|
||||
printf("\n");
|
||||
properties_print_derived_lookup_table(buf4, "uint_least8_t", mm.minor, mm.minorlen,
|
||||
get_value_bp, comp.data);
|
||||
|
||||
/* free data */
|
||||
free(prop);
|
||||
free(comp.data);
|
||||
free(comp.offset);
|
||||
free(mm.major);
|
||||
free(mm.minor);
|
||||
}
|
||||
|
||||
static int
|
||||
break_test_callback(const char *fname, char **field, size_t nfields,
|
||||
char *comment, void *payload)
|
||||
{
|
||||
struct break_test *t,
|
||||
**test = ((struct break_test_payload *)payload)->test;
|
||||
size_t i, *testlen = ((struct break_test_payload *)payload)->testlen;
|
||||
char *token;
|
||||
|
||||
(void)fname;
|
||||
|
||||
if (nfields < 1) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* append new testcase and initialize with zeroes */
|
||||
if ((*test = realloc(*test, ++(*testlen) * sizeof(**test))) == NULL) {
|
||||
fprintf(stderr, "break_test_callback: realloc: %s.\n",
|
||||
strerror(errno));
|
||||
return 1;
|
||||
}
|
||||
t = &(*test)[*testlen - 1];
|
||||
memset(t, 0, sizeof(*t));
|
||||
|
||||
/* parse testcase "<÷|×> <cp> <÷|×> ... <cp> <÷|×>" */
|
||||
for (token = strtok(field[0], " "), i = 0; token != NULL; i++,
|
||||
token = strtok(NULL, " ")) {
|
||||
if (i % 2 == 0) {
|
||||
/* delimiter or start of sequence */
|
||||
if (i == 0 || !strncmp(token, "\xC3\xB7", 2)) { /* UTF-8 */
|
||||
/*
|
||||
* '÷' indicates a breakpoint,
|
||||
* the current length is done; allocate
|
||||
* a new length field and set it to 0
|
||||
*/
|
||||
if ((t->len = realloc(t->len,
|
||||
++t->lenlen * sizeof(*t->len))) == NULL) {
|
||||
fprintf(stderr, "break_test_"
|
||||
"callback: realloc: %s.\n",
|
||||
strerror(errno));
|
||||
return 1;
|
||||
}
|
||||
t->len[t->lenlen - 1] = 0;
|
||||
} else if (!strncmp(token, "\xC3\x97", 2)) { /* UTF-8 */
|
||||
/*
|
||||
* '×' indicates a non-breakpoint, do nothing
|
||||
*/
|
||||
} else {
|
||||
fprintf(stderr, "break_test_callback: "
|
||||
"Malformed delimiter '%s'.\n", token);
|
||||
return 1;
|
||||
}
|
||||
} else {
|
||||
/* add codepoint to cp-array */
|
||||
if ((t->cp = realloc(t->cp, ++t->cplen *
|
||||
sizeof(*t->cp))) == NULL) {
|
||||
fprintf(stderr, "break_test_callback: "
|
||||
"realloc: %s.\n", strerror(errno));
|
||||
return 1;
|
||||
}
|
||||
if (hextocp(token, strlen(token), &t->cp[t->cplen - 1])) {
|
||||
return 1;
|
||||
}
|
||||
if (t->lenlen > 0) {
|
||||
t->len[t->lenlen - 1]++;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (t->len[t->lenlen - 1] == 0) {
|
||||
/*
|
||||
* we allocated one more length than we needed because
|
||||
* the breakpoint was at the end
|
||||
*/
|
||||
t->lenlen--;
|
||||
}
|
||||
|
||||
/* store comment */
|
||||
if (((*test)[*testlen - 1].descr = strdup(comment)) == NULL) {
|
||||
fprintf(stderr, "break_test_callback: strdup: %s.\n",
|
||||
strerror(errno));
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void
|
||||
break_test_list_parse(char *fname, struct break_test **test,
|
||||
size_t *testlen)
|
||||
{
|
||||
struct break_test_payload pl = {
|
||||
.test = test,
|
||||
.testlen = testlen,
|
||||
};
|
||||
*test = NULL;
|
||||
*testlen = 0;
|
||||
|
||||
parse_file_with_callback(fname, break_test_callback, &pl);
|
||||
}
|
||||
|
||||
void
|
||||
break_test_list_print(const struct break_test *test, size_t testlen,
|
||||
const char *identifier, const char *progname)
|
||||
{
|
||||
size_t i, j;
|
||||
|
||||
printf("/* Automatically generated by %s */\n"
|
||||
"#include <stdint.h>\n#include <stddef.h>\n\n"
|
||||
"#include \"../gen/types.h\"\n\n", progname);
|
||||
|
||||
printf("static const struct break_test %s[] = {\n", identifier);
|
||||
for (i = 0; i < testlen; i++) {
|
||||
printf("\t{\n");
|
||||
|
||||
printf("\t\t.cp = (uint_least32_t[]){");
|
||||
for (j = 0; j < test[i].cplen; j++) {
|
||||
printf(" UINT32_C(0x%06X)", test[i].cp[j]);
|
||||
if (j + 1 < test[i].cplen) {
|
||||
putchar(',');
|
||||
}
|
||||
}
|
||||
printf(" },\n");
|
||||
printf("\t\t.cplen = %zu,\n", test[i].cplen);
|
||||
|
||||
printf("\t\t.len = (size_t[]){");
|
||||
for (j = 0; j < test[i].lenlen; j++) {
|
||||
printf(" %zu", test[i].len[j]);
|
||||
if (j + 1 < test[i].lenlen) {
|
||||
putchar(',');
|
||||
}
|
||||
}
|
||||
printf(" },\n");
|
||||
printf("\t\t.lenlen = %zu,\n", test[i].lenlen);
|
||||
|
||||
printf("\t\t.descr = \"%s\",\n", test[i].descr);
|
||||
|
||||
printf("\t},\n");
|
||||
}
|
||||
printf("};\n");
|
||||
}
|
||||
|
||||
void
|
||||
break_test_list_free(struct break_test *test, size_t testlen)
|
||||
{
|
||||
size_t i;
|
||||
|
||||
for (i = 0; i < testlen; i++) {
|
||||
free(test[i].cp);
|
||||
free(test[i].len);
|
||||
free(test[i].descr);
|
||||
}
|
||||
|
||||
free(test);
|
||||
}
|
60
libs/libgrapheme-2.0.2/gen/util.h
Normal file
60
libs/libgrapheme-2.0.2/gen/util.h
Normal file
@@ -0,0 +1,60 @@
|
||||
/* See LICENSE file for copyright and license details. */
|
||||
#ifndef UTIL_H
|
||||
#define UTIL_H
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "types.h"
|
||||
|
||||
#define LEN(x) (sizeof (x) / sizeof *(x))
|
||||
|
||||
struct property_spec {
|
||||
const char *enumname;
|
||||
const char *file;
|
||||
const char *ucdname;
|
||||
};
|
||||
|
||||
struct properties {
|
||||
int_least64_t property;
|
||||
};
|
||||
|
||||
struct properties_compressed {
|
||||
size_t *offset;
|
||||
struct properties *data;
|
||||
size_t datalen;
|
||||
};
|
||||
|
||||
struct properties_major_minor {
|
||||
size_t *major;
|
||||
size_t *minor;
|
||||
size_t minorlen;
|
||||
};
|
||||
|
||||
int hextocp(const char *, size_t, uint_least32_t *cp);
|
||||
|
||||
void parse_file_with_callback(const char *, int (*callback)(const char *,
|
||||
char **, size_t, char *, void *), void *payload);
|
||||
|
||||
void properties_compress(const struct properties *, struct properties_compressed *comp);
|
||||
double properties_get_major_minor(const struct properties_compressed *,
|
||||
struct properties_major_minor *);
|
||||
void properties_print_lookup_table(char *, size_t *, size_t);
|
||||
void properties_print_derived_lookup_table(char *, char *, size_t *, size_t,
|
||||
int_least64_t (*get_value)(const struct properties *,
|
||||
size_t), const void *);
|
||||
|
||||
void properties_generate_break_property(const struct property_spec *,
|
||||
uint_least8_t, uint_least8_t
|
||||
(*handle_conflict)(uint_least32_t,
|
||||
uint_least8_t, uint_least8_t),
|
||||
uint_least8_t (*post_process)
|
||||
(uint_least8_t), const char *,
|
||||
const char *);
|
||||
|
||||
void break_test_list_parse(char *, struct break_test **, size_t *);
|
||||
void break_test_list_print(const struct break_test *, size_t,
|
||||
const char *, const char *);
|
||||
void break_test_list_free(struct break_test *, size_t);
|
||||
|
||||
#endif /* UTIL_H */
|
BIN
libs/libgrapheme-2.0.2/gen/util.o
Normal file
BIN
libs/libgrapheme-2.0.2/gen/util.o
Normal file
Binary file not shown.
BIN
libs/libgrapheme-2.0.2/gen/word
Executable file
BIN
libs/libgrapheme-2.0.2/gen/word
Executable file
Binary file not shown.
19
libs/libgrapheme-2.0.2/gen/word-test.c
Normal file
19
libs/libgrapheme-2.0.2/gen/word-test.c
Normal file
@@ -0,0 +1,19 @@
|
||||
/* See LICENSE file for copyright and license details. */
|
||||
#include <stddef.h>
|
||||
|
||||
#include "util.h"
|
||||
|
||||
int
|
||||
main(int argc, char *argv[])
|
||||
{
|
||||
struct break_test *test = NULL;
|
||||
size_t testlen = 0;
|
||||
|
||||
(void)argc;
|
||||
|
||||
break_test_list_parse("data/WordBreakTest.txt", &test, &testlen);
|
||||
break_test_list_print(test, testlen, "word_break_test", argv[0]);
|
||||
break_test_list_free(test, testlen);
|
||||
|
||||
return 0;
|
||||
}
|
159
libs/libgrapheme-2.0.2/gen/word.c
Normal file
159
libs/libgrapheme-2.0.2/gen/word.c
Normal file
@@ -0,0 +1,159 @@
|
||||
/* See LICENSE file for copyright and license details. */
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "util.h"
|
||||
|
||||
#define FILE_EMOJI "data/emoji-data.txt"
|
||||
#define FILE_WORD "data/WordBreakProperty.txt"
|
||||
|
||||
static const struct property_spec word_break_property[] = {
|
||||
{
|
||||
.enumname = "OTHER",
|
||||
.file = NULL,
|
||||
.ucdname = NULL,
|
||||
},
|
||||
{
|
||||
.enumname = "ALETTER",
|
||||
.file = FILE_WORD,
|
||||
.ucdname = "ALetter",
|
||||
},
|
||||
{
|
||||
.enumname = "BOTH_ALETTER_EXTPICT",
|
||||
.file = NULL,
|
||||
.ucdname = NULL,
|
||||
},
|
||||
{
|
||||
.enumname = "CR",
|
||||
.file = FILE_WORD,
|
||||
.ucdname = "CR",
|
||||
},
|
||||
{
|
||||
.enumname = "DOUBLE_QUOTE",
|
||||
.file = FILE_WORD,
|
||||
.ucdname = "Double_Quote",
|
||||
},
|
||||
{
|
||||
.enumname = "EXTEND",
|
||||
.file = FILE_WORD,
|
||||
.ucdname = "Extend",
|
||||
},
|
||||
{
|
||||
.enumname = "EXTENDED_PICTOGRAPHIC",
|
||||
.file = FILE_EMOJI,
|
||||
.ucdname = "Extended_Pictographic",
|
||||
},
|
||||
{
|
||||
.enumname = "EXTENDNUMLET",
|
||||
.file = FILE_WORD,
|
||||
.ucdname = "ExtendNumLet",
|
||||
},
|
||||
{
|
||||
.enumname = "FORMAT",
|
||||
.file = FILE_WORD,
|
||||
.ucdname = "Format",
|
||||
},
|
||||
{
|
||||
.enumname = "HEBREW_LETTER",
|
||||
.file = FILE_WORD,
|
||||
.ucdname = "Hebrew_Letter",
|
||||
},
|
||||
{
|
||||
.enumname = "KATAKANA",
|
||||
.file = FILE_WORD,
|
||||
.ucdname = "Katakana",
|
||||
},
|
||||
{
|
||||
.enumname = "LF",
|
||||
.file = FILE_WORD,
|
||||
.ucdname = "LF",
|
||||
},
|
||||
{
|
||||
.enumname = "MIDLETTER",
|
||||
.file = FILE_WORD,
|
||||
.ucdname = "MidLetter",
|
||||
},
|
||||
{
|
||||
.enumname = "MIDNUM",
|
||||
.file = FILE_WORD,
|
||||
.ucdname = "MidNum",
|
||||
},
|
||||
{
|
||||
.enumname = "MIDNUMLET",
|
||||
.file = FILE_WORD,
|
||||
.ucdname = "MidNumLet",
|
||||
},
|
||||
{
|
||||
.enumname = "NEWLINE",
|
||||
.file = FILE_WORD,
|
||||
.ucdname = "Newline",
|
||||
},
|
||||
{
|
||||
.enumname = "NUMERIC",
|
||||
.file = FILE_WORD,
|
||||
.ucdname = "Numeric",
|
||||
},
|
||||
{
|
||||
.enumname = "REGIONAL_INDICATOR",
|
||||
.file = FILE_WORD,
|
||||
.ucdname = "Regional_Indicator",
|
||||
},
|
||||
{
|
||||
.enumname = "SINGLE_QUOTE",
|
||||
.file = FILE_WORD,
|
||||
.ucdname = "Single_Quote",
|
||||
},
|
||||
{
|
||||
.enumname = "WSEGSPACE",
|
||||
.file = FILE_WORD,
|
||||
.ucdname = "WSegSpace",
|
||||
},
|
||||
{
|
||||
.enumname = "ZWJ",
|
||||
.file = FILE_WORD,
|
||||
.ucdname = "ZWJ",
|
||||
},
|
||||
};
|
||||
|
||||
static uint_least8_t
|
||||
handle_conflict(uint_least32_t cp, uint_least8_t prop1, uint_least8_t prop2)
|
||||
{
|
||||
uint_least8_t result;
|
||||
|
||||
(void)cp;
|
||||
|
||||
if ((!strcmp(word_break_property[prop1].enumname, "ALETTER") &&
|
||||
!strcmp(word_break_property[prop2].enumname, "EXTENDED_PICTOGRAPHIC")) ||
|
||||
(!strcmp(word_break_property[prop1].enumname, "EXTENDED_PICTOGRAPHIC") &&
|
||||
!strcmp(word_break_property[prop2].enumname, "ALETTER"))) {
|
||||
for (result = 0; result < LEN(word_break_property); result++) {
|
||||
if (!strcmp(word_break_property[result].enumname,
|
||||
"BOTH_ALETTER_EXTPICT")) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (result == LEN(word_break_property)) {
|
||||
fprintf(stderr, "handle_conflict: Internal error.\n");
|
||||
exit(1);
|
||||
}
|
||||
} else {
|
||||
fprintf(stderr, "handle_conflict: Cannot handle conflict.\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
int
|
||||
main(int argc, char *argv[])
|
||||
{
|
||||
(void)argc;
|
||||
|
||||
properties_generate_break_property(word_break_property,
|
||||
LEN(word_break_property),
|
||||
handle_conflict, NULL, "word_break",
|
||||
argv[0]);
|
||||
|
||||
return 0;
|
||||
}
|
4705
libs/libgrapheme-2.0.2/gen/word.h
Normal file
4705
libs/libgrapheme-2.0.2/gen/word.h
Normal file
File diff suppressed because it is too large
Load Diff
BIN
libs/libgrapheme-2.0.2/gen/word.o
Normal file
BIN
libs/libgrapheme-2.0.2/gen/word.o
Normal file
Binary file not shown.
Reference in New Issue
Block a user