Initial Commit
This commit is contained in:
456
libs/libgrapheme-2.0.2/gen/line.c
Normal file
456
libs/libgrapheme-2.0.2/gen/line.c
Normal file
@@ -0,0 +1,456 @@
|
||||
/* See LICENSE file for copyright and license details. */
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "util.h"
|
||||
|
||||
#define FILE_EAW "data/EastAsianWidth.txt"
|
||||
#define FILE_EMOJI "data/emoji-data.txt"
|
||||
#define FILE_LINE "data/LineBreak.txt"
|
||||
|
||||
static const struct property_spec line_break_property[] = {
|
||||
{
|
||||
.enumname = "AL",
|
||||
.file = FILE_LINE,
|
||||
.ucdname = "AL",
|
||||
},
|
||||
/*
|
||||
* Both extended pictographic and cn are large classes,
|
||||
* but we are only interested in their intersection for LB30b,
|
||||
* so we have the following two temporary classes. At first
|
||||
* the extpict-class is filled, then the cn-class, which leads
|
||||
* to conflicts (that we handle by putting them in the "proper"
|
||||
* class BOTH_CN_EXTPICT). We make use of the fact that there
|
||||
* is no intersection between AL and Cn.
|
||||
*
|
||||
* Any consecutive conflicts are permitted to overwrite
|
||||
* TMP_EXTENDED_PICTOGRAPHIC and TMP_CN, because we don't need
|
||||
* them, and in the final postprocessing we "reset" all
|
||||
* remaining matches (that then didn't fit any of the other
|
||||
* classes) to the generic class AL.
|
||||
*/
|
||||
{
|
||||
.enumname = "TMP_CN",
|
||||
.file = FILE_LINE,
|
||||
.ucdname = "Cn",
|
||||
},
|
||||
{
|
||||
.enumname = "TMP_EXTENDED_PICTOGRAPHIC",
|
||||
.file = FILE_EMOJI,
|
||||
.ucdname = "Extended_Pictographic",
|
||||
},
|
||||
/* end of special block */
|
||||
{
|
||||
.enumname = "B2",
|
||||
.file = FILE_LINE,
|
||||
.ucdname = "B2",
|
||||
},
|
||||
{
|
||||
.enumname = "BA",
|
||||
.file = FILE_LINE,
|
||||
.ucdname = "BA",
|
||||
},
|
||||
{
|
||||
.enumname = "BB",
|
||||
.file = FILE_LINE,
|
||||
.ucdname = "BB",
|
||||
},
|
||||
{
|
||||
.enumname = "BK",
|
||||
.file = FILE_LINE,
|
||||
.ucdname = "BK",
|
||||
},
|
||||
{
|
||||
.enumname = "BOTH_CN_EXTPICT",
|
||||
.file = NULL,
|
||||
.ucdname = NULL,
|
||||
},
|
||||
{
|
||||
.enumname = "CB",
|
||||
.file = FILE_LINE,
|
||||
.ucdname = "CB",
|
||||
},
|
||||
{
|
||||
.enumname = "CL",
|
||||
.file = FILE_LINE,
|
||||
.ucdname = "CL",
|
||||
},
|
||||
{
|
||||
.enumname = "CM",
|
||||
.file = FILE_LINE,
|
||||
.ucdname = "CM",
|
||||
},
|
||||
{
|
||||
.enumname = "CP_WITHOUT_EAW_HWF",
|
||||
.file = FILE_LINE,
|
||||
.ucdname = "CP",
|
||||
},
|
||||
{
|
||||
.enumname = "CP_WITH_EAW_HWF",
|
||||
.file = NULL,
|
||||
.ucdname = NULL,
|
||||
},
|
||||
{
|
||||
.enumname = "CR",
|
||||
.file = FILE_LINE,
|
||||
.ucdname = "CR",
|
||||
},
|
||||
{
|
||||
.enumname = "EB",
|
||||
.file = FILE_LINE,
|
||||
.ucdname = "EB",
|
||||
},
|
||||
{
|
||||
.enumname = "EM",
|
||||
.file = FILE_LINE,
|
||||
.ucdname = "EM",
|
||||
},
|
||||
{
|
||||
.enumname = "EX",
|
||||
.file = FILE_LINE,
|
||||
.ucdname = "EX",
|
||||
},
|
||||
{
|
||||
.enumname = "GL",
|
||||
.file = FILE_LINE,
|
||||
.ucdname = "GL",
|
||||
},
|
||||
{
|
||||
.enumname = "H2",
|
||||
.file = FILE_LINE,
|
||||
.ucdname = "H2",
|
||||
},
|
||||
{
|
||||
.enumname = "H3",
|
||||
.file = FILE_LINE,
|
||||
.ucdname = "H3",
|
||||
},
|
||||
{
|
||||
.enumname = "HL",
|
||||
.file = FILE_LINE,
|
||||
.ucdname = "HL",
|
||||
},
|
||||
{
|
||||
.enumname = "HY",
|
||||
.file = FILE_LINE,
|
||||
.ucdname = "HY",
|
||||
},
|
||||
{
|
||||
.enumname = "ID",
|
||||
.file = FILE_LINE,
|
||||
.ucdname = "ID",
|
||||
},
|
||||
{
|
||||
.enumname = "IN",
|
||||
.file = FILE_LINE,
|
||||
.ucdname = "IN",
|
||||
},
|
||||
{
|
||||
.enumname = "IS",
|
||||
.file = FILE_LINE,
|
||||
.ucdname = "IS",
|
||||
},
|
||||
{
|
||||
.enumname = "JL",
|
||||
.file = FILE_LINE,
|
||||
.ucdname = "JL",
|
||||
},
|
||||
{
|
||||
.enumname = "JT",
|
||||
.file = FILE_LINE,
|
||||
.ucdname = "JT",
|
||||
},
|
||||
{
|
||||
.enumname = "JV",
|
||||
.file = FILE_LINE,
|
||||
.ucdname = "JV",
|
||||
},
|
||||
{
|
||||
.enumname = "LF",
|
||||
.file = FILE_LINE,
|
||||
.ucdname = "LF",
|
||||
},
|
||||
{
|
||||
.enumname = "NL",
|
||||
.file = FILE_LINE,
|
||||
.ucdname = "NL",
|
||||
},
|
||||
{
|
||||
.enumname = "NS",
|
||||
.file = FILE_LINE,
|
||||
.ucdname = "NS",
|
||||
},
|
||||
{
|
||||
.enumname = "NU",
|
||||
.file = FILE_LINE,
|
||||
.ucdname = "NU",
|
||||
},
|
||||
{
|
||||
.enumname = "OP_WITHOUT_EAW_HWF",
|
||||
.file = FILE_LINE,
|
||||
.ucdname = "OP",
|
||||
},
|
||||
{
|
||||
.enumname = "OP_WITH_EAW_HWF",
|
||||
.file = NULL,
|
||||
.ucdname = NULL,
|
||||
},
|
||||
{
|
||||
.enumname = "PO",
|
||||
.file = FILE_LINE,
|
||||
.ucdname = "PO",
|
||||
},
|
||||
{
|
||||
.enumname = "PR",
|
||||
.file = FILE_LINE,
|
||||
.ucdname = "PR",
|
||||
},
|
||||
{
|
||||
.enumname = "QU",
|
||||
.file = FILE_LINE,
|
||||
.ucdname = "QU",
|
||||
},
|
||||
{
|
||||
.enumname = "RI",
|
||||
.file = FILE_LINE,
|
||||
.ucdname = "RI",
|
||||
},
|
||||
{
|
||||
.enumname = "SP",
|
||||
.file = FILE_LINE,
|
||||
.ucdname = "SP",
|
||||
},
|
||||
{
|
||||
.enumname = "SY",
|
||||
.file = FILE_LINE,
|
||||
.ucdname = "SY",
|
||||
},
|
||||
{
|
||||
.enumname = "WJ",
|
||||
.file = FILE_LINE,
|
||||
.ucdname = "WJ",
|
||||
},
|
||||
{
|
||||
.enumname = "ZW",
|
||||
.file = FILE_LINE,
|
||||
.ucdname = "ZW",
|
||||
},
|
||||
{
|
||||
.enumname = "ZWJ",
|
||||
.file = FILE_LINE,
|
||||
.ucdname = "ZWJ",
|
||||
},
|
||||
{
|
||||
.enumname = "TMP_AI",
|
||||
.file = FILE_LINE,
|
||||
.ucdname = "AI",
|
||||
},
|
||||
{
|
||||
.enumname = "TMP_CJ",
|
||||
.file = FILE_LINE,
|
||||
.ucdname = "CJ",
|
||||
},
|
||||
{
|
||||
.enumname = "TMP_XX",
|
||||
.file = NULL,
|
||||
.ucdname = NULL,
|
||||
},
|
||||
{
|
||||
.enumname = "TMP_MN",
|
||||
.file = FILE_LINE,
|
||||
.ucdname = "Mn",
|
||||
},
|
||||
{
|
||||
.enumname = "TMP_MC",
|
||||
.file = FILE_LINE,
|
||||
.ucdname = "Mc",
|
||||
},
|
||||
{
|
||||
.enumname = "TMP_SA_WITHOUT_MN_OR_MC",
|
||||
.file = FILE_LINE,
|
||||
.ucdname = "SA",
|
||||
},
|
||||
{
|
||||
.enumname = "TMP_SA_WITH_MN_OR_MC",
|
||||
.file = FILE_LINE,
|
||||
.ucdname = "SA",
|
||||
},
|
||||
{
|
||||
.enumname = "TMP_SG",
|
||||
.file = FILE_LINE,
|
||||
.ucdname = "SG",
|
||||
},
|
||||
{
|
||||
.enumname = "TMP_EAW_H",
|
||||
.file = FILE_EAW,
|
||||
.ucdname = "H",
|
||||
},
|
||||
{
|
||||
.enumname = "TMP_EAW_W",
|
||||
.file = FILE_EAW,
|
||||
.ucdname = "W",
|
||||
},
|
||||
{
|
||||
.enumname = "TMP_EAW_F",
|
||||
.file = FILE_EAW,
|
||||
.ucdname = "F",
|
||||
},
|
||||
};
|
||||
|
||||
static uint_least8_t
|
||||
handle_conflict(uint_least32_t cp, uint_least8_t prop1, uint_least8_t prop2)
|
||||
{
|
||||
uint_least8_t result = prop2;
|
||||
char *target = NULL;
|
||||
|
||||
(void)cp;
|
||||
|
||||
if ((!strcmp(line_break_property[prop1].enumname, "TMP_EAW_H") ||
|
||||
!strcmp(line_break_property[prop1].enumname, "TMP_EAW_W") ||
|
||||
!strcmp(line_break_property[prop1].enumname, "TMP_EAW_F")) ||
|
||||
(!strcmp(line_break_property[prop2].enumname, "TMP_EAW_H") ||
|
||||
!strcmp(line_break_property[prop2].enumname, "TMP_EAW_W") ||
|
||||
!strcmp(line_break_property[prop2].enumname, "TMP_EAW_F"))) {
|
||||
if (!strcmp(line_break_property[prop1].enumname, "CP_WITHOUT_EAW_HWF") ||
|
||||
!strcmp(line_break_property[prop2].enumname, "CP_WITHOUT_EAW_HWF")) {
|
||||
target = "CP_WITH_EAW_HWF";
|
||||
} else if (!strcmp(line_break_property[prop1].enumname, "OP_WITHOUT_EAW_HWF") ||
|
||||
!strcmp(line_break_property[prop2].enumname, "OP_WITHOUT_EAW_HWF")) {
|
||||
target = "OP_WITH_EAW_HWF";
|
||||
} else {
|
||||
/* ignore EAW for the rest */
|
||||
if ((!strcmp(line_break_property[prop1].enumname, "TMP_EAW_H") ||
|
||||
!strcmp(line_break_property[prop1].enumname, "TMP_EAW_W") ||
|
||||
!strcmp(line_break_property[prop1].enumname, "TMP_EAW_F"))) {
|
||||
result = prop2;
|
||||
} else {
|
||||
result = prop1;
|
||||
}
|
||||
}
|
||||
} else if ((!strcmp(line_break_property[prop1].enumname, "TMP_MN") ||
|
||||
!strcmp(line_break_property[prop1].enumname, "TMP_MC")) ||
|
||||
(!strcmp(line_break_property[prop2].enumname, "TMP_MN") ||
|
||||
!strcmp(line_break_property[prop2].enumname, "TMP_MC"))) {
|
||||
if (!strcmp(line_break_property[prop1].enumname, "SA_WITHOUT_MN_OR_MC") ||
|
||||
!strcmp(line_break_property[prop2].enumname, "SA_WITHOUT_MN_OR_MC")) {
|
||||
target = "SA_WITH_MN_OR_MC";
|
||||
} else {
|
||||
/* ignore Mn and Mc for the rest */
|
||||
if ((!strcmp(line_break_property[prop1].enumname, "TMP_MN") ||
|
||||
!strcmp(line_break_property[prop1].enumname, "TMP_MC"))) {
|
||||
result = prop2;
|
||||
} else {
|
||||
result = prop1;
|
||||
}
|
||||
}
|
||||
} else if (!strcmp(line_break_property[prop1].enumname, "TMP_CN") ||
|
||||
!strcmp(line_break_property[prop2].enumname, "TMP_CN")) {
|
||||
if (!strcmp(line_break_property[prop1].enumname, "TMP_EXTENDED_PICTOGRAPHIC") ||
|
||||
!strcmp(line_break_property[prop2].enumname, "TMP_EXTENDED_PICTOGRAPHIC")) {
|
||||
target = "BOTH_CN_EXTPICT";
|
||||
} else {
|
||||
/* ignore Cn for all the other properties */
|
||||
if (!strcmp(line_break_property[prop1].enumname, "TMP_CN")) {
|
||||
result = prop2;
|
||||
} else {
|
||||
result = prop1;
|
||||
}
|
||||
}
|
||||
} else if (!strcmp(line_break_property[prop1].enumname, "TMP_EXTENDED_PICTOGRAPHIC") ||
|
||||
!strcmp(line_break_property[prop2].enumname, "TMP_EXTENDED_PICTOGRAPHIC")) {
|
||||
if (!strcmp(line_break_property[prop1].enumname, "TMP_CN") ||
|
||||
!strcmp(line_break_property[prop2].enumname, "TMP_CN")) {
|
||||
target = "BOTH_CN_EXTPICT";
|
||||
} else {
|
||||
/* ignore Extended_Pictographic for all the other properties */
|
||||
if (!strcmp(line_break_property[prop1].enumname, "TMP_EXTENDED_PICTOGRAPHIC")) {
|
||||
result = prop2;
|
||||
} else {
|
||||
result = prop1;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
fprintf(stderr, "handle_conflict: Cannot handle conflict %s <- %s.\n",
|
||||
line_break_property[prop1].enumname, line_break_property[prop2].enumname);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (target) {
|
||||
for (result = 0; result < LEN(line_break_property); result++) {
|
||||
if (!strcmp(line_break_property[result].enumname,
|
||||
target)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (result == LEN(line_break_property)) {
|
||||
fprintf(stderr, "handle_conflict: Internal error.\n");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static uint_least8_t
|
||||
post_process(uint_least8_t prop)
|
||||
{
|
||||
const char *target = NULL;
|
||||
uint_least8_t result;
|
||||
|
||||
/* LB1 */
|
||||
if (!strcmp(line_break_property[prop].enumname, "TMP_AI") ||
|
||||
!strcmp(line_break_property[prop].enumname, "TMP_SG") ||
|
||||
!strcmp(line_break_property[prop].enumname, "TMP_XX")) {
|
||||
/* map AI, SG and XX to AL */
|
||||
target = "AL";
|
||||
} else if (!strcmp(line_break_property[prop].enumname, "TMP_SA_WITH_MN_OR_MC")) {
|
||||
/* map SA (with General_Category Mn or Mc) to CM */
|
||||
target = "CM";
|
||||
} else if (!strcmp(line_break_property[prop].enumname, "TMP_SA_WITHOUT_MN_OR_MC")) {
|
||||
/* map SA (without General_Category Mn or Mc) to AL */
|
||||
target = "AL";
|
||||
} else if (!strcmp(line_break_property[prop].enumname, "TMP_CJ")) {
|
||||
/* map CJ to NS */
|
||||
target = "NS";
|
||||
} else if (!strcmp(line_break_property[prop].enumname, "TMP_CN") ||
|
||||
!strcmp(line_break_property[prop].enumname, "TMP_EXTENDED_PICTOGRAPHIC") ||
|
||||
!strcmp(line_break_property[prop].enumname, "TMP_MN") ||
|
||||
!strcmp(line_break_property[prop].enumname, "TMP_MC") ||
|
||||
!strcmp(line_break_property[prop].enumname, "TMP_EAW_H") ||
|
||||
!strcmp(line_break_property[prop].enumname, "TMP_EAW_W") ||
|
||||
!strcmp(line_break_property[prop].enumname, "TMP_EAW_F")) {
|
||||
/* map all the temporary classes "residue" to AL */
|
||||
target = "AL";
|
||||
}
|
||||
|
||||
if (target) {
|
||||
for (result = 0; result < LEN(line_break_property); result++) {
|
||||
if (!strcmp(line_break_property[result].enumname,
|
||||
target)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (result == LEN(line_break_property)) {
|
||||
fprintf(stderr, "handle_conflict: Internal error.\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
return result;
|
||||
} else {
|
||||
return prop;
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
main(int argc, char *argv[])
|
||||
{
|
||||
(void)argc;
|
||||
|
||||
properties_generate_break_property(line_break_property,
|
||||
LEN(line_break_property),
|
||||
handle_conflict, post_process,
|
||||
"line_break", argv[0]);
|
||||
|
||||
return 0;
|
||||
}
|
Reference in New Issue
Block a user