OpenTTD Source  12.0-beta2
strgen_base.cpp
Go to the documentation of this file.
1 /*
2  * This file is part of OpenTTD.
3  * OpenTTD is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, version 2.
4  * OpenTTD is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
5  * See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with OpenTTD. If not, see <http://www.gnu.org/licenses/>.
6  */
7 
10 #include "../stdafx.h"
11 #include "../core/endian_func.hpp"
12 #include "../string_func.h"
13 #include "../table/control_codes.h"
14 
15 #include "strgen.h"
16 
17 
18 #include "../table/strgen_tables.h"
19 
20 #include "../safeguards.h"
21 
22 /* Compiles a list of strings into a compiled string list */
23 
24 static bool _translated;
25 static bool _translation;
26 const char *_file = "(unknown file)";
27 int _cur_line;
28 int _errors, _warnings, _show_todo;
30 
31 static const ptrdiff_t MAX_COMMAND_PARAM_SIZE = 100;
32 static const CmdStruct *ParseCommandString(const char **str, char *param, int *argno, int *casei);
33 
40 Case::Case(int caseidx, const char *string, Case *next) :
41  caseidx(caseidx), string(stredup(string)), next(next)
42 {
43 }
44 
47 {
48  free(this->string);
49  delete this->next;
50 }
51 
59 LangString::LangString(const char *name, const char *english, size_t index, int line) :
60  name(stredup(name)), english(stredup(english)), translated(nullptr),
61  hash_next(0), index(index), line(line), translated_case(nullptr)
62 {
63 }
64 
67 {
68  free(this->name);
69  free(this->english);
70  free(this->translated);
71  delete this->translated_case;
72 }
73 
76 {
77  free(this->translated);
78  this->translated = nullptr;
79 
80  delete this->translated_case;
81  this->translated_case = nullptr;
82 }
83 
88 StringData::StringData(size_t tabs) : tabs(tabs), max_strings(tabs * TAB_SIZE)
89 {
90  this->strings = CallocT<LangString *>(max_strings);
91  this->hash_heads = CallocT<size_t>(max_strings);
92  this->next_string_id = 0;
93 }
94 
97 {
98  for (size_t i = 0; i < this->max_strings; i++) delete this->strings[i];
99  free(this->strings);
100  free(this->hash_heads);
101 }
102 
105 {
106  for (size_t i = 0; i < this->max_strings; i++) {
107  LangString *ls = this->strings[i];
108  if (ls != nullptr) ls->FreeTranslation();
109  }
110 }
111 
117 uint StringData::HashStr(const char *s) const
118 {
119  uint hash = 0;
120  for (; *s != '\0'; s++) hash = ROL(hash, 3) ^ *s;
121  return hash % this->max_strings;
122 }
123 
129 void StringData::Add(const char *s, LangString *ls)
130 {
131  uint hash = this->HashStr(s);
132  ls->hash_next = this->hash_heads[hash];
133  /* Off-by-one for hash find. */
134  this->hash_heads[hash] = ls->index + 1;
135  this->strings[ls->index] = ls;
136 }
137 
144 {
145  size_t idx = this->hash_heads[this->HashStr(s)];
146 
147  while (idx-- > 0) {
148  LangString *ls = this->strings[idx];
149 
150  if (strcmp(ls->name, s) == 0) return ls;
151  idx = ls->hash_next;
152  }
153  return nullptr;
154 }
155 
162 uint StringData::VersionHashStr(uint hash, const char *s) const
163 {
164  for (; *s != '\0'; s++) {
165  hash = ROL(hash, 3) ^ *s;
166  hash = (hash & 1 ? hash >> 1 ^ 0xDEADBEEF : hash >> 1);
167  }
168  return hash;
169 }
170 
176 {
177  uint hash = 0;
178 
179  for (size_t i = 0; i < this->max_strings; i++) {
180  const LangString *ls = this->strings[i];
181 
182  if (ls != nullptr) {
183  const CmdStruct *cs;
184  const char *s;
185  char buf[MAX_COMMAND_PARAM_SIZE];
186  int argno;
187  int casei;
188 
189  s = ls->name;
190  hash ^= i * 0x717239;
191  hash = (hash & 1 ? hash >> 1 ^ 0xDEADBEEF : hash >> 1);
192  hash = this->VersionHashStr(hash, s + 1);
193 
194  s = ls->english;
195  while ((cs = ParseCommandString(&s, buf, &argno, &casei)) != nullptr) {
196  if (cs->flags & C_DONTCOUNT) continue;
197 
198  hash ^= (cs - _cmd_structs) * 0x1234567;
199  hash = (hash & 1 ? hash >> 1 ^ 0xF00BAA4 : hash >> 1);
200  }
201  }
202  }
203 
204  return hash;
205 }
206 
211 uint StringData::CountInUse(uint tab) const
212 {
213  int i;
214  for (i = TAB_SIZE; --i >= 0;) if (this->strings[(tab * TAB_SIZE) + i] != nullptr) break;
215  return i + 1;
216 }
217 
218 static const char *_cur_ident;
219 
220 struct CmdPair {
221  const CmdStruct *a;
222  const char *v;
223 };
224 
226  uint np;
227  CmdPair pairs[32];
228  const CmdStruct *cmd[32]; // ordered by param #
229 };
230 
231 /* Used when generating some advanced commands. */
232 static ParsedCommandStruct _cur_pcs;
233 static int _cur_argidx;
234 
236 struct Buffer : std::vector<byte> {
241  void AppendByte(byte value)
242  {
243  this->push_back(value);
244  }
245 
250  void AppendUtf8(uint32 value)
251  {
252  if (value < 0x80) {
253  this->push_back(value);
254  } else if (value < 0x800) {
255  this->push_back(0xC0 + GB(value, 6, 5));
256  this->push_back(0x80 + GB(value, 0, 6));
257  } else if (value < 0x10000) {
258  this->push_back(0xE0 + GB(value, 12, 4));
259  this->push_back(0x80 + GB(value, 6, 6));
260  this->push_back(0x80 + GB(value, 0, 6));
261  } else if (value < 0x110000) {
262  this->push_back(0xF0 + GB(value, 18, 3));
263  this->push_back(0x80 + GB(value, 12, 6));
264  this->push_back(0x80 + GB(value, 6, 6));
265  this->push_back(0x80 + GB(value, 0, 6));
266  } else {
267  strgen_warning("Invalid unicode value U+0x%X", value);
268  }
269  }
270 };
271 
272 size_t Utf8Validate(const char *s)
273 {
274  uint32 c;
275 
276  if (!HasBit(s[0], 7)) {
277  /* 1 byte */
278  return 1;
279  } else if (GB(s[0], 5, 3) == 6 && IsUtf8Part(s[1])) {
280  /* 2 bytes */
281  c = GB(s[0], 0, 5) << 6 | GB(s[1], 0, 6);
282  if (c >= 0x80) return 2;
283  } else if (GB(s[0], 4, 4) == 14 && IsUtf8Part(s[1]) && IsUtf8Part(s[2])) {
284  /* 3 bytes */
285  c = GB(s[0], 0, 4) << 12 | GB(s[1], 0, 6) << 6 | GB(s[2], 0, 6);
286  if (c >= 0x800) return 3;
287  } else if (GB(s[0], 3, 5) == 30 && IsUtf8Part(s[1]) && IsUtf8Part(s[2]) && IsUtf8Part(s[3])) {
288  /* 4 bytes */
289  c = GB(s[0], 0, 3) << 18 | GB(s[1], 0, 6) << 12 | GB(s[2], 0, 6) << 6 | GB(s[3], 0, 6);
290  if (c >= 0x10000 && c <= 0x10FFFF) return 4;
291  }
292 
293  return 0;
294 }
295 
296 
297 void EmitSingleChar(Buffer *buffer, char *buf, int value)
298 {
299  if (*buf != '\0') strgen_warning("Ignoring trailing letters in command");
300  buffer->AppendUtf8(value);
301 }
302 
303 
304 /* The plural specifier looks like
305  * {NUM} {PLURAL -1 passenger passengers} then it picks either passenger/passengers depending on the count in NUM */
306 
307 /* This is encoded like
308  * CommandByte <ARG#> <NUM> {Length of each string} {each string} */
309 
310 bool ParseRelNum(char **buf, int *value, int *offset)
311 {
312  const char *s = *buf;
313  char *end;
314  bool rel = false;
315 
316  while (*s == ' ' || *s == '\t') s++;
317  if (*s == '+') {
318  rel = true;
319  s++;
320  }
321  int v = strtol(s, &end, 0);
322  if (end == s) return false;
323  if (rel || v < 0) {
324  *value += v;
325  } else {
326  *value = v;
327  }
328  if (offset != nullptr && *end == ':') {
329  /* Take the Nth within */
330  s = end + 1;
331  *offset = strtol(s, &end, 0);
332  if (end == s) return false;
333  }
334  *buf = end;
335  return true;
336 }
337 
338 /* Parse out the next word, or nullptr */
339 char *ParseWord(char **buf)
340 {
341  char *s = *buf, *r;
342 
343  while (*s == ' ' || *s == '\t') s++;
344  if (*s == '\0') return nullptr;
345 
346  if (*s == '"') {
347  r = ++s;
348  /* parse until next " or NUL */
349  for (;;) {
350  if (*s == '\0') break;
351  if (*s == '"') {
352  *s++ = '\0';
353  break;
354  }
355  s++;
356  }
357  } else {
358  /* proceed until whitespace or NUL */
359  r = s;
360  for (;;) {
361  if (*s == '\0') break;
362  if (*s == ' ' || *s == '\t') {
363  *s++ = '\0';
364  break;
365  }
366  s++;
367  }
368  }
369  *buf = s;
370  return r;
371 }
372 
373 /* Forward declaration */
374 static int TranslateArgumentIdx(int arg, int offset = 0);
375 
376 static void EmitWordList(Buffer *buffer, const char * const *words, uint nw)
377 {
378  buffer->AppendByte(nw);
379  for (uint i = 0; i < nw; i++) buffer->AppendByte((byte)strlen(words[i]) + 1);
380  for (uint i = 0; i < nw; i++) {
381  for (uint j = 0; words[i][j] != '\0'; j++) buffer->AppendByte(words[i][j]);
382  buffer->AppendByte(0);
383  }
384 }
385 
386 void EmitPlural(Buffer *buffer, char *buf, int value)
387 {
388  int argidx = _cur_argidx;
389  int offset = -1;
391  const char **words = AllocaM(const char *, std::max(expected, MAX_PLURALS));
392  int nw = 0;
393 
394  /* Parse out the number, if one exists. Otherwise default to prev arg. */
395  if (!ParseRelNum(&buf, &argidx, &offset)) argidx--;
396 
397  const CmdStruct *cmd = _cur_pcs.cmd[argidx];
398  if (offset == -1) {
399  /* Use default offset */
400  if (cmd == nullptr || cmd->default_plural_offset < 0) {
401  strgen_fatal("Command '%s' has no (default) plural position", cmd == nullptr ? "<empty>" : cmd->cmd);
402  }
403  offset = cmd->default_plural_offset;
404  }
405 
406  /* Parse each string */
407  for (nw = 0; nw < MAX_PLURALS; nw++) {
408  words[nw] = ParseWord(&buf);
409  if (words[nw] == nullptr) break;
410  }
411 
412  if (nw == 0) {
413  strgen_fatal("%s: No plural words", _cur_ident);
414  }
415 
416  if (expected != nw) {
417  if (_translated) {
418  strgen_fatal("%s: Invalid number of plural forms. Expecting %d, found %d.", _cur_ident,
419  expected, nw);
420  } else {
421  if ((_show_todo & 2) != 0) strgen_warning("'%s' is untranslated. Tweaking english string to allow compilation for plural forms", _cur_ident);
422  if (nw > expected) {
423  nw = expected;
424  } else {
425  for (; nw < expected; nw++) {
426  words[nw] = words[nw - 1];
427  }
428  }
429  }
430  }
431 
432  buffer->AppendUtf8(SCC_PLURAL_LIST);
433  buffer->AppendByte(_lang.plural_form);
434  buffer->AppendByte(TranslateArgumentIdx(argidx, offset));
435  EmitWordList(buffer, words, nw);
436 }
437 
438 
439 void EmitGender(Buffer *buffer, char *buf, int value)
440 {
441  int argidx = _cur_argidx;
442  int offset = 0;
443  uint nw;
444 
445  if (buf[0] == '=') {
446  buf++;
447 
448  /* This is a {G=DER} command */
449  nw = _lang.GetGenderIndex(buf);
450  if (nw >= MAX_NUM_GENDERS) strgen_fatal("G argument '%s' invalid", buf);
451 
452  /* now nw contains the gender index */
453  buffer->AppendUtf8(SCC_GENDER_INDEX);
454  buffer->AppendByte(nw);
455  } else {
456  const char *words[MAX_NUM_GENDERS];
457 
458  /* This is a {G 0 foo bar two} command.
459  * If no relative number exists, default to +0 */
460  ParseRelNum(&buf, &argidx, &offset);
461 
462  const CmdStruct *cmd = _cur_pcs.cmd[argidx];
463  if (cmd == nullptr || (cmd->flags & C_GENDER) == 0) {
464  strgen_fatal("Command '%s' can't have a gender", cmd == nullptr ? "<empty>" : cmd->cmd);
465  }
466 
467  for (nw = 0; nw < MAX_NUM_GENDERS; nw++) {
468  words[nw] = ParseWord(&buf);
469  if (words[nw] == nullptr) break;
470  }
471  if (nw != _lang.num_genders) strgen_fatal("Bad # of arguments for gender command");
472 
473  assert(IsInsideBS(cmd->value, SCC_CONTROL_START, UINT8_MAX));
474  buffer->AppendUtf8(SCC_GENDER_LIST);
475  buffer->AppendByte(TranslateArgumentIdx(argidx, offset));
476  EmitWordList(buffer, words, nw);
477  }
478 }
479 
480 static const CmdStruct *FindCmd(const char *s, int len)
481 {
482  for (const CmdStruct *cs = _cmd_structs; cs != endof(_cmd_structs); cs++) {
483  if (strncmp(cs->cmd, s, len) == 0 && cs->cmd[len] == '\0') return cs;
484  }
485  return nullptr;
486 }
487 
488 static uint ResolveCaseName(const char *str, size_t len)
489 {
490  /* First get a clean copy of only the case name, then resolve it. */
491  char case_str[CASE_GENDER_LEN];
492  len = std::min(lengthof(case_str) - 1, len);
493  memcpy(case_str, str, len);
494  case_str[len] = '\0';
495 
496  uint8 case_idx = _lang.GetCaseIndex(case_str);
497  if (case_idx >= MAX_NUM_CASES) strgen_fatal("Invalid case-name '%s'", case_str);
498  return case_idx + 1;
499 }
500 
501 
502 /* returns nullptr on eof
503  * else returns command struct */
504 static const CmdStruct *ParseCommandString(const char **str, char *param, int *argno, int *casei)
505 {
506  const char *s = *str, *start;
507  char c;
508 
509  *argno = -1;
510  *casei = -1;
511 
512  /* Scan to the next command, exit if there's no next command. */
513  for (; *s != '{'; s++) {
514  if (*s == '\0') return nullptr;
515  }
516  s++; // Skip past the {
517 
518  if (*s >= '0' && *s <= '9') {
519  char *end;
520 
521  *argno = strtoul(s, &end, 0);
522  if (*end != ':') strgen_fatal("missing arg #");
523  s = end + 1;
524  }
525 
526  /* parse command name */
527  start = s;
528  do {
529  c = *s++;
530  } while (c != '}' && c != ' ' && c != '=' && c != '.' && c != 0);
531 
532  const CmdStruct *cmd = FindCmd(start, s - start - 1);
533  if (cmd == nullptr) {
534  strgen_error("Undefined command '%.*s'", (int)(s - start - 1), start);
535  return nullptr;
536  }
537 
538  if (c == '.') {
539  const char *casep = s;
540 
541  if (!(cmd->flags & C_CASE)) {
542  strgen_fatal("Command '%s' can't have a case", cmd->cmd);
543  }
544 
545  do {
546  c = *s++;
547  } while (c != '}' && c != ' ' && c != '\0');
548  *casei = ResolveCaseName(casep, s - casep - 1);
549  }
550 
551  if (c == '\0') {
552  strgen_error("Missing } from command '%s'", start);
553  return nullptr;
554  }
555 
556 
557  if (c != '}') {
558  if (c == '=') s--;
559  /* copy params */
560  start = s;
561  for (;;) {
562  c = *s++;
563  if (c == '}') break;
564  if (c == '\0') {
565  strgen_error("Missing } from command '%s'", start);
566  return nullptr;
567  }
568  if (s - start == MAX_COMMAND_PARAM_SIZE) error("param command too long");
569  *param++ = c;
570  }
571  }
572  *param = '\0';
573 
574  *str = s;
575 
576  return cmd;
577 }
578 
586 StringReader::StringReader(StringData &data, const char *file, bool master, bool translation) :
587  data(data), file(stredup(file)), master(master), translation(translation)
588 {
589 }
590 
593 {
594  free(file);
595 }
596 
597 static void ExtractCommandString(ParsedCommandStruct *p, const char *s, bool warnings)
598 {
599  char param[MAX_COMMAND_PARAM_SIZE];
600  int argno;
601  int argidx = 0;
602  int casei;
603 
604  memset(p, 0, sizeof(*p));
605 
606  for (;;) {
607  /* read until next command from a. */
608  const CmdStruct *ar = ParseCommandString(&s, param, &argno, &casei);
609 
610  if (ar == nullptr) break;
611 
612  /* Sanity checking */
613  if (argno != -1 && ar->consumes == 0) strgen_fatal("Non consumer param can't have a paramindex");
614 
615  if (ar->consumes) {
616  if (argno != -1) argidx = argno;
617  if (argidx < 0 || (uint)argidx >= lengthof(p->cmd)) strgen_fatal("invalid param idx %d", argidx);
618  if (p->cmd[argidx] != nullptr && p->cmd[argidx] != ar) strgen_fatal("duplicate param idx %d", argidx);
619 
620  p->cmd[argidx++] = ar;
621  } else if (!(ar->flags & C_DONTCOUNT)) { // Ignore some of them
622  if (p->np >= lengthof(p->pairs)) strgen_fatal("too many commands in string, max " PRINTF_SIZE, lengthof(p->pairs));
623  p->pairs[p->np].a = ar;
624  p->pairs[p->np].v = param[0] != '\0' ? stredup(param) : "";
625  p->np++;
626  }
627  }
628 }
629 
630 
631 static const CmdStruct *TranslateCmdForCompare(const CmdStruct *a)
632 {
633  if (a == nullptr) return nullptr;
634 
635  if (strcmp(a->cmd, "STRING1") == 0 ||
636  strcmp(a->cmd, "STRING2") == 0 ||
637  strcmp(a->cmd, "STRING3") == 0 ||
638  strcmp(a->cmd, "STRING4") == 0 ||
639  strcmp(a->cmd, "STRING5") == 0 ||
640  strcmp(a->cmd, "STRING6") == 0 ||
641  strcmp(a->cmd, "STRING7") == 0 ||
642  strcmp(a->cmd, "RAW_STRING") == 0) {
643  return FindCmd("STRING", 6);
644  }
645 
646  return a;
647 }
648 
649 
650 static bool CheckCommandsMatch(char *a, char *b, const char *name)
651 {
652  /* If we're not translating, i.e. we're compiling the base language,
653  * it is pointless to do all these checks as it'll always be correct.
654  * After all, all checks are based on the base language.
655  */
656  if (!_translation) return true;
657 
658  ParsedCommandStruct templ;
659  ParsedCommandStruct lang;
660  bool result = true;
661 
662  ExtractCommandString(&templ, b, true);
663  ExtractCommandString(&lang, a, true);
664 
665  /* For each string in templ, see if we find it in lang */
666  if (templ.np != lang.np) {
667  strgen_warning("%s: template string and language string have a different # of commands", name);
668  result = false;
669  }
670 
671  for (uint i = 0; i < templ.np; i++) {
672  /* see if we find it in lang, and zero it out */
673  bool found = false;
674  for (uint j = 0; j < lang.np; j++) {
675  if (templ.pairs[i].a == lang.pairs[j].a &&
676  strcmp(templ.pairs[i].v, lang.pairs[j].v) == 0) {
677  /* it was found in both. zero it out from lang so we don't find it again */
678  lang.pairs[j].a = nullptr;
679  found = true;
680  break;
681  }
682  }
683 
684  if (!found) {
685  strgen_warning("%s: command '%s' exists in template file but not in language file", name, templ.pairs[i].a->cmd);
686  result = false;
687  }
688  }
689 
690  /* if we reach here, all non consumer commands match up.
691  * Check if the non consumer commands match up also. */
692  for (uint i = 0; i < lengthof(templ.cmd); i++) {
693  if (TranslateCmdForCompare(templ.cmd[i]) != lang.cmd[i]) {
694  strgen_warning("%s: Param idx #%d '%s' doesn't match with template command '%s'", name, i,
695  lang.cmd[i] == nullptr ? "<empty>" : TranslateCmdForCompare(lang.cmd[i])->cmd,
696  templ.cmd[i] == nullptr ? "<empty>" : templ.cmd[i]->cmd);
697  result = false;
698  }
699  }
700 
701  return result;
702 }
703 
704 void StringReader::HandleString(char *str)
705 {
706  if (*str == '#') {
707  if (str[1] == '#' && str[2] != '#') this->HandlePragma(str + 2);
708  return;
709  }
710 
711  /* Ignore comments & blank lines */
712  if (*str == ';' || *str == ' ' || *str == '\0') return;
713 
714  char *s = strchr(str, ':');
715  if (s == nullptr) {
716  strgen_error("Line has no ':' delimiter");
717  return;
718  }
719 
720  char *t;
721  /* Trim spaces.
722  * After this str points to the command name, and s points to the command contents */
723  for (t = s; t > str && (t[-1] == ' ' || t[-1] == '\t'); t--) {}
724  *t = 0;
725  s++;
726 
727  /* Check string is valid UTF-8 */
728  const char *tmp;
729  for (tmp = s; *tmp != '\0';) {
730  size_t len = Utf8Validate(tmp);
731  if (len == 0) strgen_fatal("Invalid UTF-8 sequence in '%s'", s);
732 
733  WChar c;
734  Utf8Decode(&c, tmp);
735  if (c <= 0x001F || // ASCII control character range
736  c == 0x200B || // Zero width space
737  (c >= 0xE000 && c <= 0xF8FF) || // Private range
738  (c >= 0xFFF0 && c <= 0xFFFF)) { // Specials range
739  strgen_fatal("Unwanted UTF-8 character U+%04X in sequence '%s'", c, s);
740  }
741 
742  tmp += len;
743  }
744 
745  /* Check if the string has a case..
746  * The syntax for cases is IDENTNAME.case */
747  char *casep = strchr(str, '.');
748  if (casep != nullptr) *casep++ = '\0';
749 
750  /* Check if this string already exists.. */
751  LangString *ent = this->data.Find(str);
752 
753  if (this->master) {
754  if (casep != nullptr) {
755  strgen_error("Cases in the base translation are not supported.");
756  return;
757  }
758 
759  if (ent != nullptr) {
760  strgen_error("String name '%s' is used multiple times", str);
761  return;
762  }
763 
764  if (this->data.strings[this->data.next_string_id] != nullptr) {
765  strgen_error("String ID 0x" PRINTF_SIZEX " for '%s' already in use by '%s'", this->data.next_string_id, str, this->data.strings[this->data.next_string_id]->name);
766  return;
767  }
768 
769  /* Allocate a new LangString */
770  this->data.Add(str, new LangString(str, s, this->data.next_string_id++, _cur_line));
771  } else {
772  if (ent == nullptr) {
773  strgen_warning("String name '%s' does not exist in master file", str);
774  return;
775  }
776 
777  if (ent->translated && casep == nullptr) {
778  strgen_error("String name '%s' is used multiple times", str);
779  return;
780  }
781 
782  /* make sure that the commands match */
783  if (!CheckCommandsMatch(s, ent->english, str)) return;
784 
785  if (casep != nullptr) {
786  ent->translated_case = new Case(ResolveCaseName(casep, strlen(casep)), s, ent->translated_case);
787  } else {
788  ent->translated = stredup(s);
789  /* If the string was translated, use the line from the
790  * translated language so errors in the translated file
791  * are properly referenced to. */
792  ent->line = _cur_line;
793  }
794  }
795 }
796 
798 {
799  if (!memcmp(str, "plural ", 7)) {
800  _lang.plural_form = atoi(str + 7);
802  strgen_fatal("Invalid pluralform %d", _lang.plural_form);
803  }
804  } else {
805  strgen_fatal("unknown pragma '%s'", str);
806  }
807 }
808 
809 static void rstrip(char *buf)
810 {
811  size_t i = strlen(buf);
812  while (i > 0 && (buf[i - 1] == '\r' || buf[i - 1] == '\n' || buf[i - 1] == ' ')) i--;
813  buf[i] = '\0';
814 }
815 
817 {
818  char buf[2048];
819  _warnings = _errors = 0;
820 
821  _translation = this->translation;
822  _file = this->file;
823 
824  /* Abusing _show_todo to replace "warning" with "info" for translations. */
825  _show_todo &= 3;
826  if (!this->translation) _show_todo |= 4;
827 
828  /* For each new file we parse, reset the genders, and language codes. */
829  MemSetT(&_lang, 0);
833 
834  _cur_line = 1;
835  while (this->data.next_string_id < this->data.max_strings && this->ReadLine(buf, lastof(buf)) != nullptr) {
836  rstrip(buf);
837  this->HandleString(buf);
838  _cur_line++;
839  }
840 
841  if (this->data.next_string_id == this->data.max_strings) {
842  strgen_error("Too many strings, maximum allowed is " PRINTF_SIZE, this->data.max_strings);
843  }
844 }
845 
851 {
852  int last = 0;
853  for (size_t i = 0; i < data.max_strings; i++) {
854  if (data.strings[i] != nullptr) {
855  this->WriteStringID(data.strings[i]->name, (int)i);
856  last = (int)i;
857  }
858  }
859 
860  this->WriteStringID("STR_LAST_STRINGID", last);
861 }
862 
863 static int TranslateArgumentIdx(int argidx, int offset)
864 {
865  int sum;
866 
867  if (argidx < 0 || (uint)argidx >= lengthof(_cur_pcs.cmd)) {
868  strgen_fatal("invalid argidx %d", argidx);
869  }
870  const CmdStruct *cs = _cur_pcs.cmd[argidx];
871  if (cs != nullptr && cs->consumes <= offset) {
872  strgen_fatal("invalid argidx offset %d:%d", argidx, offset);
873  }
874 
875  if (_cur_pcs.cmd[argidx] == nullptr) {
876  strgen_fatal("no command for this argidx %d", argidx);
877  }
878 
879  for (int i = sum = 0; i < argidx; i++) {
880  const CmdStruct *cs = _cur_pcs.cmd[i];
881 
882  sum += (cs != nullptr) ? cs->consumes : 1;
883  }
884 
885  return sum + offset;
886 }
887 
888 static void PutArgidxCommand(Buffer *buffer)
889 {
890  buffer->AppendUtf8(SCC_ARG_INDEX);
891  buffer->AppendByte(TranslateArgumentIdx(_cur_argidx));
892 }
893 
894 
895 static void PutCommandString(Buffer *buffer, const char *str)
896 {
897  _cur_argidx = 0;
898 
899  while (*str != '\0') {
900  /* Process characters as they are until we encounter a { */
901  if (*str != '{') {
902  buffer->AppendByte(*str++);
903  continue;
904  }
905 
906  char param[MAX_COMMAND_PARAM_SIZE];
907  int argno;
908  int casei;
909  const CmdStruct *cs = ParseCommandString(&str, param, &argno, &casei);
910  if (cs == nullptr) break;
911 
912  if (casei != -1) {
913  buffer->AppendUtf8(SCC_SET_CASE); // {SET_CASE}
914  buffer->AppendByte(casei);
915  }
916 
917  /* For params that consume values, we need to handle the argindex properly */
918  if (cs->consumes > 0) {
919  /* Check if we need to output a move-param command */
920  if (argno != -1 && argno != _cur_argidx) {
921  _cur_argidx = argno;
922  PutArgidxCommand(buffer);
923  }
924 
925  /* Output the one from the master string... it's always accurate. */
926  cs = _cur_pcs.cmd[_cur_argidx++];
927  if (cs == nullptr) {
928  strgen_fatal("%s: No argument exists at position %d", _cur_ident, _cur_argidx - 1);
929  }
930  }
931 
932  cs->proc(buffer, param, cs->value);
933  }
934 }
935 
941 {
942  char buffer[2];
943  int offs = 0;
944  if (length >= 0x4000) {
945  strgen_fatal("string too long");
946  }
947 
948  if (length >= 0xC0) {
949  buffer[offs++] = (length >> 8) | 0xC0;
950  }
951  buffer[offs++] = length & 0xFF;
952  this->Write((byte*)buffer, offs);
953 }
954 
960 {
961  uint *in_use = AllocaM(uint, data.tabs);
962  for (size_t tab = 0; tab < data.tabs; tab++) {
963  uint n = data.CountInUse((uint)tab);
964 
965  in_use[tab] = n;
966  _lang.offsets[tab] = TO_LE16(n);
967 
968  for (uint j = 0; j != in_use[tab]; j++) {
969  const LangString *ls = data.strings[(tab * TAB_SIZE) + j];
970  if (ls != nullptr && ls->translated == nullptr) _lang.missing++;
971  }
972  }
973 
975  _lang.version = TO_LE32(data.Version());
976  _lang.missing = TO_LE16(_lang.missing);
977  _lang.winlangid = TO_LE16(_lang.winlangid);
978 
979  this->WriteHeader(&_lang);
980  Buffer buffer;
981 
982  for (size_t tab = 0; tab < data.tabs; tab++) {
983  for (uint j = 0; j != in_use[tab]; j++) {
984  const LangString *ls = data.strings[(tab * TAB_SIZE) + j];
985  const Case *casep;
986  const char *cmdp;
987 
988  /* For undefined strings, just set that it's an empty string */
989  if (ls == nullptr) {
990  this->WriteLength(0);
991  continue;
992  }
993 
994  _cur_ident = ls->name;
995  _cur_line = ls->line;
996 
997  /* Produce a message if a string doesn't have a translation. */
998  if (_show_todo > 0 && ls->translated == nullptr) {
999  if ((_show_todo & 2) != 0) {
1000  strgen_warning("'%s' is untranslated", ls->name);
1001  }
1002  if ((_show_todo & 1) != 0) {
1003  const char *s = "<TODO> ";
1004  while (*s != '\0') buffer.AppendByte(*s++);
1005  }
1006  }
1007 
1008  /* Extract the strings and stuff from the english command string */
1009  ExtractCommandString(&_cur_pcs, ls->english, false);
1010 
1011  if (ls->translated_case != nullptr || ls->translated != nullptr) {
1012  casep = ls->translated_case;
1013  cmdp = ls->translated;
1014  } else {
1015  casep = nullptr;
1016  cmdp = ls->english;
1017  }
1018 
1019  _translated = cmdp != ls->english;
1020 
1021  if (casep != nullptr) {
1022  const Case *c;
1023  uint num;
1024 
1025  /* Need to output a case-switch.
1026  * It has this format
1027  * <0x9E> <NUM CASES> <CASE1> <LEN1> <STRING1> <CASE2> <LEN2> <STRING2> <CASE3> <LEN3> <STRING3> <STRINGDEFAULT>
1028  * Each LEN is printed using 2 bytes in big endian order. */
1029  buffer.AppendUtf8(SCC_SWITCH_CASE);
1030  /* Count the number of cases */
1031  for (num = 0, c = casep; c; c = c->next) num++;
1032  buffer.AppendByte(num);
1033 
1034  /* Write each case */
1035  for (c = casep; c != nullptr; c = c->next) {
1036  buffer.AppendByte(c->caseidx);
1037  /* Make some space for the 16-bit length */
1038  uint pos = (uint)buffer.size();
1039  buffer.AppendByte(0);
1040  buffer.AppendByte(0);
1041  /* Write string */
1042  PutCommandString(&buffer, c->string);
1043  buffer.AppendByte(0); // terminate with a zero
1044  /* Fill in the length */
1045  uint size = (uint)buffer.size() - (pos + 2);
1046  buffer[pos + 0] = GB(size, 8, 8);
1047  buffer[pos + 1] = GB(size, 0, 8);
1048  }
1049  }
1050 
1051  if (cmdp != nullptr) PutCommandString(&buffer, cmdp);
1052 
1053  this->WriteLength((uint)buffer.size());
1054  this->Write(buffer.data(), buffer.size());
1055  buffer.clear();
1056  }
1057  }
1058 }
StringData::HashStr
uint HashStr(const char *s) const
Create a hash of the string for finding them back quickly.
Definition: strgen_base.cpp:117
LanguagePackHeader::missing
uint16 missing
number of missing strings.
Definition: language.h:40
StringReader::HandlePragma
virtual void HandlePragma(char *str)
Handle the pragma of the file.
Definition: strgen_base.cpp:797
MAX_NUM_GENDERS
static const uint8 MAX_NUM_GENDERS
Maximum number of supported genders.
Definition: language.h:20
LanguagePackHeader::version
uint32 version
32-bits of auto generated version info which is basically a hash of strings.h
Definition: language.h:28
WChar
char32_t WChar
Type for wide characters, i.e.
Definition: string_type.h:35
StringData::max_strings
size_t max_strings
The maximum number of strings.
Definition: strgen.h:45
Case::Case
Case(int caseidx, const char *string, Case *next)
Create a new case.
Definition: strgen_base.cpp:40
GB
static uint GB(const T x, const uint8 s, const uint8 n)
Fetch n bits from x, started at bit s.
Definition: bitmath_func.hpp:32
LanguagePackHeader::plural_form
byte plural_form
plural form index
Definition: language.h:41
StringData::VersionHashStr
uint VersionHashStr(uint hash, const char *s) const
Create a compound hash.
Definition: strgen_base.cpp:162
MAX_PLURALS
static const int MAX_PLURALS
The maximum number of plurals.
Definition: strgen_tables.h:161
StringData::Add
void Add(const char *s, LangString *ls)
Add a newly created LangString.
Definition: strgen_base.cpp:129
Buffer
The buffer for writing a single string.
Definition: strgen_base.cpp:236
LangString::translated
char * translated
Translated text.
Definition: strgen.h:29
Buffer::AppendByte
void AppendByte(byte value)
Convenience method for adding a byte.
Definition: strgen_base.cpp:241
LanguageWriter::Write
virtual void Write(const byte *buffer, size_t length)=0
Write a number of bytes.
_lang
LanguagePackHeader _lang
Header information about a language.
Definition: strgen_base.cpp:29
HasBit
static bool HasBit(const T x, const uint8 y)
Checks if a bit in a value is set.
Definition: bitmath_func.hpp:103
_cur_line
int _cur_line
The current line we're parsing in the input file.
Definition: strgen_base.cpp:27
StringData::StringData
StringData(size_t tabs)
Create a new string data container.
Definition: strgen_base.cpp:88
StringReader::StringReader
StringReader(StringData &data, const char *file, bool master, bool translation)
Prepare reading.
Definition: strgen_base.cpp:586
StringReader::~StringReader
virtual ~StringReader()
Make sure the right reader gets freed.
Definition: strgen_base.cpp:592
LangString::~LangString
~LangString()
Free everything we allocated.
Definition: strgen_base.cpp:66
LangString::LangString
LangString(const char *name, const char *english, size_t index, int line)
Create a new string.
Definition: strgen_base.cpp:59
C_DONTCOUNT
@ C_DONTCOUNT
These commands aren't counted for comparison.
Definition: strgen_tables.h:14
Case::caseidx
int caseidx
The index of the case.
Definition: strgen.h:17
C_GENDER
@ C_GENDER
These commands support genders.
Definition: strgen_tables.h:16
LanguageWriter::WriteLang
virtual void WriteLang(const StringData &data)
Actually write the language.
Definition: strgen_base.cpp:959
StringData::Find
LangString * Find(const char *s)
Find a LangString based on the string name.
Definition: strgen_base.cpp:143
StringReader::master
bool master
Are we reading the master file?
Definition: strgen.h:63
StringData::next_string_id
size_t next_string_id
The next string ID to allocate.
Definition: strgen.h:46
LanguagePackHeader::IDENT
static const uint32 IDENT
Identifier for OpenTTD language files, big endian for "LANG".
Definition: language.h:25
TAB_SIZE
static const uint TAB_SIZE
Number of strings per StringTab.
Definition: strings_type.h:46
LangString
Information about a single string.
Definition: strgen.h:26
StringData::Version
uint Version() const
Make a hash of the file to get a unique "version number".
Definition: strgen_base.cpp:175
LangString::english
char * english
English text.
Definition: strgen.h:28
LangString::name
char * name
Name of the string.
Definition: strgen.h:27
StringData
Information about the currently known strings.
Definition: strgen.h:41
C_CASE
@ C_CASE
These commands support cases.
Definition: strgen_tables.h:15
PluralForm::plural_count
int plural_count
The number of plural forms.
Definition: strgen_tables.h:155
Case::~Case
~Case()
Free everything we allocated.
Definition: strgen_base.cpp:46
IsInsideBS
static bool IsInsideBS(const T x, const size_t base, const size_t size)
Checks if a value is between a window started at some base point.
Definition: math_func.hpp:188
StringData::CountInUse
uint CountInUse(uint tab) const
Count the number of tab elements that are in use.
Definition: strgen_base.cpp:211
strgen.h
MAX_COMMAND_PARAM_SIZE
static const ptrdiff_t MAX_COMMAND_PARAM_SIZE
Maximum size of every command block, not counting the name of the command itself.
Definition: strgen_base.cpp:31
StringData::FreeTranslation
void FreeTranslation()
Free all data related to the translation.
Definition: strgen_base.cpp:104
LanguagePackHeader::offsets
uint16 offsets[TEXT_TAB_END]
the offsets
Definition: language.h:32
StringData::~StringData
~StringData()
Free everything we allocated.
Definition: strgen_base.cpp:96
CmdStruct
Definition: strgen_tables.h:23
StringData::strings
LangString ** strings
Array of all known strings.
Definition: strgen.h:42
ROL
static T ROL(const T x, const uint8 n)
ROtate x Left by n.
Definition: bitmath_func.hpp:301
LanguagePackHeader::digit_decimal_separator
char digit_decimal_separator[8]
Decimal separator.
Definition: language.h:39
Case::next
Case * next
The next, chained, case.
Definition: strgen.h:19
Utf8Decode
size_t Utf8Decode(WChar *c, const char *s)
Decode and consume the next UTF-8 encoded character.
Definition: string.cpp:574
LanguageWriter::WriteHeader
virtual void WriteHeader(const LanguagePackHeader *header)=0
Write the header metadata.
LanguagePackHeader::GetGenderIndex
uint8 GetGenderIndex(const char *gender_str) const
Get the index for the given gender.
Definition: language.h:68
LanguagePackHeader::GetCaseIndex
uint8 GetCaseIndex(const char *case_str) const
Get the index for the given case.
Definition: language.h:81
CmdPair
Definition: strgen_base.cpp:220
Case::string
char * string
The translation of the case.
Definition: strgen.h:18
LangString::index
size_t index
The index in the language file.
Definition: strgen.h:31
StringData::tabs
size_t tabs
The number of 'tabs' of strings.
Definition: strgen.h:44
LangString::hash_next
size_t hash_next
Next hash entry.
Definition: strgen.h:30
endof
#define endof(x)
Get the end element of an fixed size array.
Definition: stdafx.h:386
StringReader::data
StringData & data
The data to fill during reading.
Definition: strgen.h:61
LanguagePackHeader::digit_group_separator_currency
char digit_group_separator_currency[8]
Thousand separator used for currencies.
Definition: language.h:37
LanguagePackHeader::winlangid
uint16 winlangid
Windows language ID: Windows cannot and will not convert isocodes to something it can use to determin...
Definition: language.h:51
LangString::line
int line
Line of string in source-file.
Definition: strgen.h:32
StringReader::translation
bool translation
Are we reading a translation, implies !master. However, the base translation will have this false.
Definition: strgen.h:64
Buffer::AppendUtf8
void AppendUtf8(uint32 value)
Add an Unicode character encoded in UTF-8 to the buffer.
Definition: strgen_base.cpp:250
StringReader::ParseFile
virtual void ParseFile()
Start parsing the file.
Definition: strgen_base.cpp:816
stredup
char * stredup(const char *s, const char *last)
Create a duplicate of the given string.
Definition: string.cpp:137
LanguagePackHeader::num_genders
uint8 num_genders
the number of genders of this language
Definition: language.h:53
error
void CDECL error(const char *s,...)
Error handling for fatal non-user errors.
Definition: openttd.cpp:132
_translation
static bool _translation
Is the current file actually a translation or not.
Definition: strgen_base.cpp:25
lengthof
#define lengthof(x)
Return the length of an fixed size array.
Definition: stdafx.h:378
ParsedCommandStruct
Definition: strgen_base.cpp:225
HeaderWriter::WriteHeader
void WriteHeader(const StringData &data)
Write the header information.
Definition: strgen_base.cpp:850
MemSetT
static void MemSetT(T *ptr, byte value, size_t num=1)
Type-safe version of memset().
Definition: mem_func.hpp:49
LanguagePackHeader::digit_group_separator
char digit_group_separator[8]
Thousand separator used for anything not currencies.
Definition: language.h:35
LangString::translated_case
Case * translated_case
Cases of the translation.
Definition: strgen.h:33
LanguagePackHeader::ident
uint32 ident
32-bits identifier
Definition: language.h:27
StringReader::file
const char * file
The file we are reading.
Definition: strgen.h:62
StringData::hash_heads
size_t * hash_heads
Hash table for the strings.
Definition: strgen.h:43
strecpy
char * strecpy(char *dst, const char *src, const char *last)
Copies characters from one buffer to another.
Definition: string.cpp:112
free
static void free(const void *ptr)
Version of the standard free that accepts const pointers.
Definition: stdafx.h:460
HeaderWriter::WriteStringID
virtual void WriteStringID(const char *name, int stringid)=0
Write the string ID.
Case
Container for the different cases of a string.
Definition: strgen.h:16
lastof
#define lastof(x)
Get the last element of an fixed size array.
Definition: stdafx.h:394
MAX_NUM_CASES
static const uint8 MAX_NUM_CASES
Maximum number of supported cases.
Definition: language.h:21
_plural_forms
static const PluralForm _plural_forms[]
All plural forms used.
Definition: strgen_tables.h:164
LangString::FreeTranslation
void FreeTranslation()
Free all data related to the translation.
Definition: strgen_base.cpp:75
LanguageWriter::WriteLength
virtual void WriteLength(uint length)
Write the length as a simple gamma.
Definition: strgen_base.cpp:940
_file
const char * _file
The filename of the input, so we can refer to it in errors/warnings.
Definition: strgen_base.cpp:26
_translated
static bool _translated
Whether the current language is not the master language.
Definition: strgen_base.cpp:24
LanguagePackHeader
Header of a language file.
Definition: language.h:24
CASE_GENDER_LEN
static const uint8 CASE_GENDER_LEN
The (maximum) length of a case/gender string.
Definition: language.h:19
AllocaM
#define AllocaM(T, num_elements)
alloca() has to be called in the parent function, so define AllocaM() as a macro
Definition: alloc_func.hpp:132