10 #include "../stdafx.h"
11 #include "../core/endian_func.hpp"
12 #include "../string_func.h"
13 #include "../table/control_codes.h"
18 #include "../table/strgen_tables.h"
20 #include "../safeguards.h"
26 const char *
_file =
"(unknown file)";
28 int _errors, _warnings, _show_todo;
32 static const CmdStruct *ParseCommandString(
const char **str,
char *param,
int *argno,
int *casei);
41 caseidx(caseidx), string(
stredup(string)), next(next)
60 name(
stredup(name)), english(
stredup(english)), translated(nullptr),
61 hash_next(0), index(index), line(line), translated_case(nullptr)
120 for (; *s !=
'\0'; s++) hash =
ROL(hash, 3) ^ *s;
150 if (strcmp(ls->
name, s) == 0)
return ls;
164 for (; *s !=
'\0'; s++) {
165 hash =
ROL(hash, 3) ^ *s;
166 hash = (hash & 1 ? hash >> 1 ^ 0xDEADBEEF : hash >> 1);
190 hash ^= i * 0x717239;
191 hash = (hash & 1 ? hash >> 1 ^ 0xDEADBEEF : hash >> 1);
195 while ((cs = ParseCommandString(&s, buf, &argno, &casei)) !=
nullptr) {
198 hash ^= (cs - _cmd_structs) * 0x1234567;
199 hash = (hash & 1 ? hash >> 1 ^ 0xF00BAA4 : hash >> 1);
218 static const char *_cur_ident;
233 static int _cur_argidx;
243 this->push_back(value);
253 this->push_back(value);
254 }
else if (value < 0x800) {
255 this->push_back(0xC0 +
GB(value, 6, 5));
256 this->push_back(0x80 +
GB(value, 0, 6));
257 }
else if (value < 0x10000) {
258 this->push_back(0xE0 +
GB(value, 12, 4));
259 this->push_back(0x80 +
GB(value, 6, 6));
260 this->push_back(0x80 +
GB(value, 0, 6));
261 }
else if (value < 0x110000) {
262 this->push_back(0xF0 +
GB(value, 18, 3));
263 this->push_back(0x80 +
GB(value, 12, 6));
264 this->push_back(0x80 +
GB(value, 6, 6));
265 this->push_back(0x80 +
GB(value, 0, 6));
267 strgen_warning(
"Invalid unicode value U+0x%X", value);
272 size_t Utf8Validate(
const char *s)
279 }
else if (
GB(s[0], 5, 3) == 6 && IsUtf8Part(s[1])) {
281 c =
GB(s[0], 0, 5) << 6 |
GB(s[1], 0, 6);
282 if (c >= 0x80)
return 2;
283 }
else if (
GB(s[0], 4, 4) == 14 && IsUtf8Part(s[1]) && IsUtf8Part(s[2])) {
285 c =
GB(s[0], 0, 4) << 12 |
GB(s[1], 0, 6) << 6 |
GB(s[2], 0, 6);
286 if (c >= 0x800)
return 3;
287 }
else if (
GB(s[0], 3, 5) == 30 && IsUtf8Part(s[1]) && IsUtf8Part(s[2]) && IsUtf8Part(s[3])) {
289 c =
GB(s[0], 0, 3) << 18 |
GB(s[1], 0, 6) << 12 |
GB(s[2], 0, 6) << 6 |
GB(s[3], 0, 6);
290 if (c >= 0x10000 && c <= 0x10FFFF)
return 4;
297 void EmitSingleChar(
Buffer *buffer,
char *buf,
int value)
299 if (*buf !=
'\0') strgen_warning(
"Ignoring trailing letters in command");
310 bool ParseRelNum(
char **buf,
int *value,
int *offset)
312 const char *s = *buf;
316 while (*s ==
' ' || *s ==
'\t') s++;
321 int v = strtol(s, &end, 0);
322 if (end == s)
return false;
328 if (offset !=
nullptr && *end ==
':') {
331 *offset = strtol(s, &end, 0);
332 if (end == s)
return false;
339 char *ParseWord(
char **buf)
343 while (*s ==
' ' || *s ==
'\t') s++;
344 if (*s ==
'\0')
return nullptr;
350 if (*s ==
'\0')
break;
361 if (*s ==
'\0')
break;
362 if (*s ==
' ' || *s ==
'\t') {
374 static int TranslateArgumentIdx(
int arg,
int offset = 0);
376 static void EmitWordList(
Buffer *buffer,
const char *
const *words, uint nw)
379 for (uint i = 0; i < nw; i++) buffer->
AppendByte((
byte)strlen(words[i]) + 1);
380 for (uint i = 0; i < nw; i++) {
381 for (uint j = 0; words[i][j] !=
'\0'; j++) buffer->
AppendByte(words[i][j]);
386 void EmitPlural(
Buffer *buffer,
char *buf,
int value)
388 int argidx = _cur_argidx;
395 if (!ParseRelNum(&buf, &argidx, &offset)) argidx--;
397 const CmdStruct *cmd = _cur_pcs.cmd[argidx];
400 if (cmd ==
nullptr || cmd->default_plural_offset < 0) {
401 strgen_fatal(
"Command '%s' has no (default) plural position", cmd ==
nullptr ?
"<empty>" : cmd->cmd);
403 offset = cmd->default_plural_offset;
408 words[nw] = ParseWord(&buf);
409 if (words[nw] ==
nullptr)
break;
413 strgen_fatal(
"%s: No plural words", _cur_ident);
416 if (expected != nw) {
418 strgen_fatal(
"%s: Invalid number of plural forms. Expecting %d, found %d.", _cur_ident,
421 if ((_show_todo & 2) != 0) strgen_warning(
"'%s' is untranslated. Tweaking english string to allow compilation for plural forms", _cur_ident);
425 for (; nw < expected; nw++) {
426 words[nw] = words[nw - 1];
434 buffer->
AppendByte(TranslateArgumentIdx(argidx, offset));
435 EmitWordList(buffer, words, nw);
439 void EmitGender(
Buffer *buffer,
char *buf,
int value)
441 int argidx = _cur_argidx;
450 if (nw >=
MAX_NUM_GENDERS) strgen_fatal(
"G argument '%s' invalid", buf);
460 ParseRelNum(&buf, &argidx, &offset);
462 const CmdStruct *cmd = _cur_pcs.cmd[argidx];
463 if (cmd ==
nullptr || (cmd->flags &
C_GENDER) == 0) {
464 strgen_fatal(
"Command '%s' can't have a gender", cmd ==
nullptr ?
"<empty>" : cmd->cmd);
468 words[nw] = ParseWord(&buf);
469 if (words[nw] ==
nullptr)
break;
471 if (nw !=
_lang.
num_genders) strgen_fatal(
"Bad # of arguments for gender command");
473 assert(
IsInsideBS(cmd->value, SCC_CONTROL_START, UINT8_MAX));
475 buffer->
AppendByte(TranslateArgumentIdx(argidx, offset));
476 EmitWordList(buffer, words, nw);
480 static const CmdStruct *FindCmd(
const char *s,
int len)
482 for (
const CmdStruct *cs = _cmd_structs; cs !=
endof(_cmd_structs); cs++) {
483 if (strncmp(cs->cmd, s, len) == 0 && cs->cmd[len] ==
'\0')
return cs;
488 static uint ResolveCaseName(
const char *str,
size_t len)
492 len = std::min(
lengthof(case_str) - 1, len);
493 memcpy(case_str, str, len);
494 case_str[len] =
'\0';
497 if (case_idx >=
MAX_NUM_CASES) strgen_fatal(
"Invalid case-name '%s'", case_str);
504 static const CmdStruct *ParseCommandString(
const char **str,
char *param,
int *argno,
int *casei)
506 const char *s = *str, *start;
513 for (; *s !=
'{'; s++) {
514 if (*s ==
'\0')
return nullptr;
518 if (*s >=
'0' && *s <=
'9') {
521 *argno = strtoul(s, &end, 0);
522 if (*end !=
':') strgen_fatal(
"missing arg #");
530 }
while (c !=
'}' && c !=
' ' && c !=
'=' && c !=
'.' && c != 0);
532 const CmdStruct *cmd = FindCmd(start, s - start - 1);
533 if (cmd ==
nullptr) {
534 strgen_error(
"Undefined command '%.*s'", (
int)(s - start - 1), start);
539 const char *casep = s;
541 if (!(cmd->flags &
C_CASE)) {
542 strgen_fatal(
"Command '%s' can't have a case", cmd->cmd);
547 }
while (c !=
'}' && c !=
' ' && c !=
'\0');
548 *casei = ResolveCaseName(casep, s - casep - 1);
552 strgen_error(
"Missing } from command '%s'", start);
565 strgen_error(
"Missing } from command '%s'", start);
587 data(data), file(
stredup(file)), master(master), translation(translation)
604 memset(p, 0,
sizeof(*p));
608 const CmdStruct *ar = ParseCommandString(&s, param, &argno, &casei);
610 if (ar ==
nullptr)
break;
613 if (argno != -1 && ar->consumes == 0) strgen_fatal(
"Non consumer param can't have a paramindex");
616 if (argno != -1) argidx = argno;
617 if (argidx < 0 || (uint)argidx >=
lengthof(p->cmd)) strgen_fatal(
"invalid param idx %d", argidx);
618 if (p->cmd[argidx] !=
nullptr && p->cmd[argidx] != ar) strgen_fatal(
"duplicate param idx %d", argidx);
620 p->cmd[argidx++] = ar;
622 if (p->np >=
lengthof(p->pairs)) strgen_fatal(
"too many commands in string, max " PRINTF_SIZE,
lengthof(p->pairs));
623 p->pairs[p->np].a = ar;
624 p->pairs[p->np].v = param[0] !=
'\0' ?
stredup(param) :
"";
633 if (a ==
nullptr)
return nullptr;
635 if (strcmp(a->cmd,
"STRING1") == 0 ||
636 strcmp(a->cmd,
"STRING2") == 0 ||
637 strcmp(a->cmd,
"STRING3") == 0 ||
638 strcmp(a->cmd,
"STRING4") == 0 ||
639 strcmp(a->cmd,
"STRING5") == 0 ||
640 strcmp(a->cmd,
"STRING6") == 0 ||
641 strcmp(a->cmd,
"STRING7") == 0 ||
642 strcmp(a->cmd,
"RAW_STRING") == 0) {
643 return FindCmd(
"STRING", 6);
650 static bool CheckCommandsMatch(
char *a,
char *b,
const char *name)
662 ExtractCommandString(&templ, b,
true);
663 ExtractCommandString(&lang, a,
true);
666 if (templ.np != lang.np) {
667 strgen_warning(
"%s: template string and language string have a different # of commands", name);
671 for (uint i = 0; i < templ.np; i++) {
674 for (uint j = 0; j < lang.np; j++) {
675 if (templ.pairs[i].a == lang.pairs[j].a &&
676 strcmp(templ.pairs[i].v, lang.pairs[j].v) == 0) {
678 lang.pairs[j].a =
nullptr;
685 strgen_warning(
"%s: command '%s' exists in template file but not in language file", name, templ.pairs[i].a->cmd);
692 for (uint i = 0; i <
lengthof(templ.cmd); i++) {
693 if (TranslateCmdForCompare(templ.cmd[i]) != lang.cmd[i]) {
694 strgen_warning(
"%s: Param idx #%d '%s' doesn't match with template command '%s'", name, i,
695 lang.cmd[i] ==
nullptr ?
"<empty>" : TranslateCmdForCompare(lang.cmd[i])->cmd,
696 templ.cmd[i] ==
nullptr ?
"<empty>" : templ.cmd[i]->cmd);
704 void StringReader::HandleString(
char *str)
707 if (str[1] ==
'#' && str[2] !=
'#') this->
HandlePragma(str + 2);
712 if (*str ==
';' || *str ==
' ' || *str ==
'\0')
return;
714 char *s = strchr(str,
':');
716 strgen_error(
"Line has no ':' delimiter");
723 for (t = s; t > str && (t[-1] ==
' ' || t[-1] ==
'\t'); t--) {}
729 for (tmp = s; *tmp !=
'\0';) {
730 size_t len = Utf8Validate(tmp);
731 if (len == 0) strgen_fatal(
"Invalid UTF-8 sequence in '%s'", s);
737 (c >= 0xE000 && c <= 0xF8FF) ||
738 (c >= 0xFFF0 && c <= 0xFFFF)) {
739 strgen_fatal(
"Unwanted UTF-8 character U+%04X in sequence '%s'", c, s);
747 char *casep = strchr(str,
'.');
748 if (casep !=
nullptr) *casep++ =
'\0';
754 if (casep !=
nullptr) {
755 strgen_error(
"Cases in the base translation are not supported.");
759 if (ent !=
nullptr) {
760 strgen_error(
"String name '%s' is used multiple times", str);
764 if (this->
data.
strings[this->data.next_string_id] !=
nullptr) {
765 strgen_error(
"String ID 0x" PRINTF_SIZEX
" for '%s' already in use by '%s'", this->
data.
next_string_id, str, this->data.strings[this->data.next_string_id]->name);
772 if (ent ==
nullptr) {
773 strgen_warning(
"String name '%s' does not exist in master file", str);
778 strgen_error(
"String name '%s' is used multiple times", str);
783 if (!CheckCommandsMatch(s, ent->
english, str))
return;
785 if (casep !=
nullptr) {
799 if (!memcmp(str,
"plural ", 7)) {
805 strgen_fatal(
"unknown pragma '%s'", str);
809 static void rstrip(
char *buf)
811 size_t i = strlen(buf);
812 while (i > 0 && (buf[i - 1] ==
'\r' || buf[i - 1] ==
'\n' || buf[i - 1] ==
' ')) i--;
819 _warnings = _errors = 0;
837 this->HandleString(buf);
842 strgen_error(
"Too many strings, maximum allowed is " PRINTF_SIZE, this->
data.
max_strings);
854 if (data.
strings[i] !=
nullptr) {
863 static int TranslateArgumentIdx(
int argidx,
int offset)
867 if (argidx < 0 || (uint)argidx >=
lengthof(_cur_pcs.cmd)) {
868 strgen_fatal(
"invalid argidx %d", argidx);
870 const CmdStruct *cs = _cur_pcs.cmd[argidx];
871 if (cs !=
nullptr && cs->consumes <= offset) {
872 strgen_fatal(
"invalid argidx offset %d:%d", argidx, offset);
875 if (_cur_pcs.cmd[argidx] ==
nullptr) {
876 strgen_fatal(
"no command for this argidx %d", argidx);
879 for (
int i = sum = 0; i < argidx; i++) {
882 sum += (cs !=
nullptr) ? cs->consumes : 1;
888 static void PutArgidxCommand(
Buffer *buffer)
891 buffer->
AppendByte(TranslateArgumentIdx(_cur_argidx));
895 static void PutCommandString(
Buffer *buffer,
const char *str)
899 while (*str !=
'\0') {
909 const CmdStruct *cs = ParseCommandString(&str, param, &argno, &casei);
910 if (cs ==
nullptr)
break;
918 if (cs->consumes > 0) {
920 if (argno != -1 && argno != _cur_argidx) {
922 PutArgidxCommand(buffer);
926 cs = _cur_pcs.cmd[_cur_argidx++];
928 strgen_fatal(
"%s: No argument exists at position %d", _cur_ident, _cur_argidx - 1);
932 cs->proc(buffer, param, cs->value);
944 if (length >= 0x4000) {
945 strgen_fatal(
"string too long");
948 if (length >= 0xC0) {
949 buffer[offs++] = (length >> 8) | 0xC0;
951 buffer[offs++] = length & 0xFF;
952 this->
Write((
byte*)buffer, offs);
962 for (
size_t tab = 0; tab < data.
tabs; tab++) {
968 for (uint j = 0; j != in_use[tab]; j++) {
982 for (
size_t tab = 0; tab < data.
tabs; tab++) {
983 for (uint j = 0; j != in_use[tab]; j++) {
994 _cur_ident = ls->
name;
998 if (_show_todo > 0 && ls->
translated ==
nullptr) {
999 if ((_show_todo & 2) != 0) {
1000 strgen_warning(
"'%s' is untranslated", ls->
name);
1002 if ((_show_todo & 1) != 0) {
1003 const char *s =
"<TODO> ";
1009 ExtractCommandString(&_cur_pcs, ls->
english,
false);
1021 if (casep !=
nullptr) {
1031 for (num = 0, c = casep; c; c = c->
next) num++;
1035 for (c = casep; c !=
nullptr; c = c->
next) {
1038 uint pos = (uint)buffer.size();
1042 PutCommandString(&buffer, c->
string);
1045 uint size = (uint)buffer.size() - (pos + 2);
1046 buffer[pos + 0] =
GB(size, 8, 8);
1047 buffer[pos + 1] =
GB(size, 0, 8);
1051 if (cmdp !=
nullptr) PutCommandString(&buffer, cmdp);
1054 this->
Write(buffer.data(), buffer.size());