OpenTTD Source  1.11.2
string.cpp
Go to the documentation of this file.
1 /*
2  * This file is part of OpenTTD.
3  * OpenTTD is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, version 2.
4  * OpenTTD is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
5  * See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with OpenTTD. If not, see <http://www.gnu.org/licenses/>.
6  */
7 
10 #include "stdafx.h"
11 #include "debug.h"
12 #include "core/alloc_func.hpp"
13 #include "core/math_func.hpp"
14 #include "string_func.h"
15 #include "string_base.h"
16 
17 #include "table/control_codes.h"
18 
19 #include <stdarg.h>
20 #include <ctype.h> /* required for tolower() */
21 #include <sstream>
22 
23 #ifdef _MSC_VER
24 #include <errno.h> // required by vsnprintf implementation for MSVC
25 #endif
26 
27 #ifdef _WIN32
28 #include "os/windows/win32.h"
29 #endif
30 
31 #ifdef WITH_UNISCRIBE
33 #endif
34 
35 #ifdef WITH_ICU_I18N
36 /* Required by strnatcmp. */
37 #include <unicode/ustring.h>
38 #include "language.h"
39 #include "gfx_func.h"
40 #endif /* WITH_ICU_I18N */
41 
42 #if defined(WITH_COCOA)
43 #include "os/macosx/string_osx.h"
44 #endif
45 
46 /* The function vsnprintf is used internally to perform the required formatting
47  * tasks. As such this one must be allowed, and makes sure it's terminated. */
48 #include "safeguards.h"
49 #undef vsnprintf
50 
61 int CDECL vseprintf(char *str, const char *last, const char *format, va_list ap)
62 {
63  ptrdiff_t diff = last - str;
64  if (diff < 0) return 0;
65  return std::min(static_cast<int>(diff), vsnprintf(str, diff + 1, format, ap));
66 }
67 
84 char *strecat(char *dst, const char *src, const char *last)
85 {
86  assert(dst <= last);
87  while (*dst != '\0') {
88  if (dst == last) return dst;
89  dst++;
90  }
91 
92  return strecpy(dst, src, last);
93 }
94 
95 
112 char *strecpy(char *dst, const char *src, const char *last)
113 {
114  assert(dst <= last);
115  while (dst != last && *src != '\0') {
116  *dst++ = *src++;
117  }
118  *dst = '\0';
119 
120  if (dst == last && *src != '\0') {
121 #if defined(STRGEN) || defined(SETTINGSGEN)
122  error("String too long for destination buffer");
123 #else /* STRGEN || SETTINGSGEN */
124  DEBUG(misc, 0, "String too long for destination buffer");
125 #endif /* STRGEN || SETTINGSGEN */
126  }
127  return dst;
128 }
129 
137 char *stredup(const char *s, const char *last)
138 {
139  size_t len = last == nullptr ? strlen(s) : ttd_strnlen(s, last - s + 1);
140  char *tmp = CallocT<char>(len + 1);
141  memcpy(tmp, s, len);
142  return tmp;
143 }
144 
150 char *CDECL str_fmt(const char *str, ...)
151 {
152  char buf[4096];
153  va_list va;
154 
155  va_start(va, str);
156  int len = vseprintf(buf, lastof(buf), str, va);
157  va_end(va);
158  char *p = MallocT<char>(len + 1);
159  memcpy(p, buf, len + 1);
160  return p;
161 }
162 
169 void str_fix_scc_encoded(char *str, const char *last)
170 {
171  while (str <= last && *str != '\0') {
172  size_t len = Utf8EncodedCharLen(*str);
173  if ((len == 0 && str + 4 > last) || str + len > last) break;
174 
175  WChar c;
176  Utf8Decode(&c, str);
177  if (c == '\0') break;
178 
179  if (c == 0xE028 || c == 0xE02A) {
180  c = SCC_ENCODED;
181  }
182  str += Utf8Encode(str, c);
183  }
184  *str = '\0';
185 }
186 
187 
188 template <class T>
189 static void str_validate(T &dst, const char *str, const char *last, StringValidationSettings settings)
190 {
191  /* Assume the ABSOLUTE WORST to be in str as it comes from the outside. */
192 
193  while (str <= last && *str != '\0') {
194  size_t len = Utf8EncodedCharLen(*str);
195  WChar c;
196  /* If the first byte does not look like the first byte of an encoded
197  * character, i.e. encoded length is 0, then this byte is definitely bad
198  * and it should be skipped.
199  * When the first byte looks like the first byte of an encoded character,
200  * then the remaining bytes in the string are checked whether the whole
201  * encoded character can be there. If that is not the case, this byte is
202  * skipped.
203  * Finally we attempt to decode the encoded character, which does certain
204  * extra validations to see whether the correct number of bytes were used
205  * to encode the character. If that is not the case, the byte is probably
206  * invalid and it is skipped. We could emit a question mark, but then the
207  * logic below cannot just copy bytes, it would need to re-encode the
208  * decoded characters as the length in bytes may have changed.
209  *
210  * The goals here is to get as much valid Utf8 encoded characters from the
211  * source string to the destination string.
212  *
213  * Note: a multi-byte encoded termination ('\0') will trigger the encoded
214  * char length and the decoded length to differ, so it will be ignored as
215  * invalid character data. If it were to reach the termination, then we
216  * would also reach the "last" byte of the string and a normal '\0'
217  * termination will be placed after it.
218  */
219  if (len == 0 || str + len > last || len != Utf8Decode(&c, str)) {
220  /* Maybe the next byte is still a valid character? */
221  str++;
222  continue;
223  }
224 
225  if ((IsPrintable(c) && (c < SCC_SPRITE_START || c > SCC_SPRITE_END)) || ((settings & SVS_ALLOW_CONTROL_CODE) != 0 && c == SCC_ENCODED)) {
226  /* Copy the character back. Even if dst is current the same as str
227  * (i.e. no characters have been changed) this is quicker than
228  * moving the pointers ahead by len */
229  do {
230  *dst++ = *str++;
231  } while (--len != 0);
232  } else if ((settings & SVS_ALLOW_NEWLINE) != 0 && c == '\n') {
233  *dst++ = *str++;
234  } else {
235  if ((settings & SVS_ALLOW_NEWLINE) != 0 && c == '\r' && str[1] == '\n') {
236  str += len;
237  continue;
238  }
239  /* Replace the undesirable character with a question mark */
240  str += len;
241  if ((settings & SVS_REPLACE_WITH_QUESTION_MARK) != 0) *dst++ = '?';
242  }
243  }
244 
245  /* String termination, if needed, is left to the caller of this function. */
246 }
247 
255 void str_validate(char *str, const char *last, StringValidationSettings settings)
256 {
257  char *dst = str;
258  str_validate(dst, str, last, settings);
259  *dst = '\0';
260 }
261 
268 std::string str_validate(const std::string &str, StringValidationSettings settings)
269 {
270  auto buf = str.data();
271  auto last = buf + str.size();
272 
273  std::ostringstream dst;
274  std::ostreambuf_iterator<char> dst_iter(dst);
275  str_validate(dst_iter, buf, last, settings);
276 
277  return dst.str();
278 }
279 
285 void ValidateString(const char *str)
286 {
287  /* We know it is '\0' terminated. */
288  str_validate(const_cast<char *>(str), str + strlen(str) + 1);
289 }
290 
291 
299 bool StrValid(const char *str, const char *last)
300 {
301  /* Assume the ABSOLUTE WORST to be in str as it comes from the outside. */
302 
303  while (str <= last && *str != '\0') {
304  size_t len = Utf8EncodedCharLen(*str);
305  /* Encoded length is 0 if the character isn't known.
306  * The length check is needed to prevent Utf8Decode to read
307  * over the terminating '\0' if that happens to be placed
308  * within the encoding of an UTF8 character. */
309  if (len == 0 || str + len > last) return false;
310 
311  WChar c;
312  len = Utf8Decode(&c, str);
313  if (!IsPrintable(c) || (c >= SCC_SPRITE_START && c <= SCC_SPRITE_END)) {
314  return false;
315  }
316 
317  str += len;
318  }
319 
320  return *str == '\0';
321 }
322 
324 void str_strip_colours(char *str)
325 {
326  char *dst = str;
327  WChar c;
328  size_t len;
329 
330  for (len = Utf8Decode(&c, str); c != '\0'; len = Utf8Decode(&c, str)) {
331  if (c < SCC_BLUE || c > SCC_BLACK) {
332  /* Copy the character back. Even if dst is current the same as str
333  * (i.e. no characters have been changed) this is quicker than
334  * moving the pointers ahead by len */
335  do {
336  *dst++ = *str++;
337  } while (--len != 0);
338  } else {
339  /* Just skip (strip) the colour codes */
340  str += len;
341  }
342  }
343  *dst = '\0';
344 }
345 
352 size_t Utf8StringLength(const char *s)
353 {
354  size_t len = 0;
355  const char *t = s;
356  while (Utf8Consume(&t) != 0) len++;
357  return len;
358 }
359 
360 
372 bool strtolower(char *str)
373 {
374  bool changed = false;
375  for (; *str != '\0'; str++) {
376  char new_str = tolower(*str);
377  changed |= new_str != *str;
378  *str = new_str;
379  }
380  return changed;
381 }
382 
383 bool strtolower(std::string &str, std::string::size_type offs)
384 {
385  bool changed = false;
386  for (auto ch = str.begin() + offs; ch != str.end(); ++ch) {
387  auto new_ch = static_cast<char>(tolower(static_cast<unsigned char>(*ch)));
388  changed |= new_ch != *ch;
389  *ch = new_ch;
390  }
391  return changed;
392 }
393 
401 bool IsValidChar(WChar key, CharSetFilter afilter)
402 {
403  switch (afilter) {
404  case CS_ALPHANUMERAL: return IsPrintable(key);
405  case CS_NUMERAL: return (key >= '0' && key <= '9');
406  case CS_NUMERAL_SPACE: return (key >= '0' && key <= '9') || key == ' ';
407  case CS_ALPHA: return IsPrintable(key) && !(key >= '0' && key <= '9');
408  case CS_HEXADECIMAL: return (key >= '0' && key <= '9') || (key >= 'a' && key <= 'f') || (key >= 'A' && key <= 'F');
409  default: NOT_REACHED();
410  }
411 }
412 
413 #ifdef _WIN32
414 #if defined(_MSC_VER) && _MSC_VER < 1900
415 
422 int CDECL vsnprintf(char *str, size_t size, const char *format, va_list ap)
423 {
424  if (size == 0) return 0;
425 
426  errno = 0;
427  int ret = _vsnprintf(str, size, format, ap);
428 
429  if (ret < 0) {
430  if (errno != ERANGE) {
431  /* There's a formatting error, better get that looked
432  * at properly instead of ignoring it. */
433  NOT_REACHED();
434  }
435  } else if ((size_t)ret < size) {
436  /* The buffer is big enough for the number of
437  * characters stored (excluding null), i.e.
438  * the string has been null-terminated. */
439  return ret;
440  }
441 
442  /* The buffer is too small for _vsnprintf to write the
443  * null-terminator at its end and return size. */
444  str[size - 1] = '\0';
445  return (int)size;
446 }
447 #endif /* _MSC_VER */
448 
449 #endif /* _WIN32 */
450 
460 int CDECL seprintf(char *str, const char *last, const char *format, ...)
461 {
462  va_list ap;
463 
464  va_start(ap, format);
465  int ret = vseprintf(str, last, format, ap);
466  va_end(ap);
467  return ret;
468 }
469 
470 
478 char *md5sumToString(char *buf, const char *last, const uint8 md5sum[16])
479 {
480  char *p = buf;
481 
482  for (uint i = 0; i < 16; i++) {
483  p += seprintf(p, last, "%02X", md5sum[i]);
484  }
485 
486  return p;
487 }
488 
489 
490 /* UTF-8 handling routines */
491 
492 
499 size_t Utf8Decode(WChar *c, const char *s)
500 {
501  assert(c != nullptr);
502 
503  if (!HasBit(s[0], 7)) {
504  /* Single byte character: 0xxxxxxx */
505  *c = s[0];
506  return 1;
507  } else if (GB(s[0], 5, 3) == 6) {
508  if (IsUtf8Part(s[1])) {
509  /* Double byte character: 110xxxxx 10xxxxxx */
510  *c = GB(s[0], 0, 5) << 6 | GB(s[1], 0, 6);
511  if (*c >= 0x80) return 2;
512  }
513  } else if (GB(s[0], 4, 4) == 14) {
514  if (IsUtf8Part(s[1]) && IsUtf8Part(s[2])) {
515  /* Triple byte character: 1110xxxx 10xxxxxx 10xxxxxx */
516  *c = GB(s[0], 0, 4) << 12 | GB(s[1], 0, 6) << 6 | GB(s[2], 0, 6);
517  if (*c >= 0x800) return 3;
518  }
519  } else if (GB(s[0], 3, 5) == 30) {
520  if (IsUtf8Part(s[1]) && IsUtf8Part(s[2]) && IsUtf8Part(s[3])) {
521  /* 4 byte character: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
522  *c = GB(s[0], 0, 3) << 18 | GB(s[1], 0, 6) << 12 | GB(s[2], 0, 6) << 6 | GB(s[3], 0, 6);
523  if (*c >= 0x10000 && *c <= 0x10FFFF) return 4;
524  }
525  }
526 
527  /* DEBUG(misc, 1, "[utf8] invalid UTF-8 sequence"); */
528  *c = '?';
529  return 1;
530 }
531 
532 
540 template <class T>
541 inline size_t Utf8Encode(T buf, WChar c)
542 {
543  if (c < 0x80) {
544  *buf = c;
545  return 1;
546  } else if (c < 0x800) {
547  *buf++ = 0xC0 + GB(c, 6, 5);
548  *buf = 0x80 + GB(c, 0, 6);
549  return 2;
550  } else if (c < 0x10000) {
551  *buf++ = 0xE0 + GB(c, 12, 4);
552  *buf++ = 0x80 + GB(c, 6, 6);
553  *buf = 0x80 + GB(c, 0, 6);
554  return 3;
555  } else if (c < 0x110000) {
556  *buf++ = 0xF0 + GB(c, 18, 3);
557  *buf++ = 0x80 + GB(c, 12, 6);
558  *buf++ = 0x80 + GB(c, 6, 6);
559  *buf = 0x80 + GB(c, 0, 6);
560  return 4;
561  }
562 
563  /* DEBUG(misc, 1, "[utf8] can't UTF-8 encode value 0x%X", c); */
564  *buf = '?';
565  return 1;
566 }
567 
568 size_t Utf8Encode(char *buf, WChar c)
569 {
570  return Utf8Encode<char *>(buf, c);
571 }
572 
573 size_t Utf8Encode(std::ostreambuf_iterator<char> &buf, WChar c)
574 {
575  return Utf8Encode<std::ostreambuf_iterator<char> &>(buf, c);
576 }
577 
585 size_t Utf8TrimString(char *s, size_t maxlen)
586 {
587  size_t length = 0;
588 
589  for (const char *ptr = strchr(s, '\0'); *s != '\0';) {
590  size_t len = Utf8EncodedCharLen(*s);
591  /* Silently ignore invalid UTF8 sequences, our only concern trimming */
592  if (len == 0) len = 1;
593 
594  /* Take care when a hard cutoff was made for the string and
595  * the last UTF8 sequence is invalid */
596  if (length + len >= maxlen || (s + len > ptr)) break;
597  s += len;
598  length += len;
599  }
600 
601  *s = '\0';
602  return length;
603 }
604 
605 #ifdef DEFINE_STRCASESTR
606 char *strcasestr(const char *haystack, const char *needle)
607 {
608  size_t hay_len = strlen(haystack);
609  size_t needle_len = strlen(needle);
610  while (hay_len >= needle_len) {
611  if (strncasecmp(haystack, needle, needle_len) == 0) return const_cast<char *>(haystack);
612 
613  haystack++;
614  hay_len--;
615  }
616 
617  return nullptr;
618 }
619 #endif /* DEFINE_STRCASESTR */
620 
629 static const char *SkipGarbage(const char *str)
630 {
631  while (*str != '\0' && (*str < '0' || IsInsideMM(*str, ';', '@' + 1) || IsInsideMM(*str, '[', '`' + 1) || IsInsideMM(*str, '{', '~' + 1))) str++;
632  return str;
633 }
634 
643 int strnatcmp(const char *s1, const char *s2, bool ignore_garbage_at_front)
644 {
645  if (ignore_garbage_at_front) {
646  s1 = SkipGarbage(s1);
647  s2 = SkipGarbage(s2);
648  }
649 
650 #ifdef WITH_ICU_I18N
651  if (_current_collator) {
652  UErrorCode status = U_ZERO_ERROR;
653  int result = _current_collator->compareUTF8(s1, s2, status);
654  if (U_SUCCESS(status)) return result;
655  }
656 #endif /* WITH_ICU_I18N */
657 
658 #if defined(_WIN32) && !defined(STRGEN) && !defined(SETTINGSGEN)
659  int res = OTTDStringCompare(s1, s2);
660  if (res != 0) return res - 2; // Convert to normal C return values.
661 #endif
662 
663 #if defined(WITH_COCOA) && !defined(STRGEN) && !defined(SETTINGSGEN)
664  int res = MacOSStringCompare(s1, s2);
665  if (res != 0) return res - 2; // Convert to normal C return values.
666 #endif
667 
668  /* Do a normal comparison if ICU is missing or if we cannot create a collator. */
669  return strcasecmp(s1, s2);
670 }
671 
672 #ifdef WITH_UNISCRIBE
673 
675 {
676  return new UniscribeStringIterator();
677 }
678 
679 #elif defined(WITH_ICU_I18N)
680 
681 #include <unicode/utext.h>
682 #include <unicode/brkiter.h>
683 
686 {
687  icu::BreakIterator *char_itr;
688  icu::BreakIterator *word_itr;
689 
690  std::vector<UChar> utf16_str;
691  std::vector<size_t> utf16_to_utf8;
692 
693 public:
694  IcuStringIterator() : char_itr(nullptr), word_itr(nullptr)
695  {
696  UErrorCode status = U_ZERO_ERROR;
697  this->char_itr = icu::BreakIterator::createCharacterInstance(icu::Locale(_current_language != nullptr ? _current_language->isocode : "en"), status);
698  this->word_itr = icu::BreakIterator::createWordInstance(icu::Locale(_current_language != nullptr ? _current_language->isocode : "en"), status);
699 
700  this->utf16_str.push_back('\0');
701  this->utf16_to_utf8.push_back(0);
702  }
703 
704  ~IcuStringIterator() override
705  {
706  delete this->char_itr;
707  delete this->word_itr;
708  }
709 
710  void SetString(const char *s) override
711  {
712  const char *string_base = s;
713 
714  /* Unfortunately current ICU versions only provide rudimentary support
715  * for word break iterators (especially for CJK languages) in combination
716  * with UTF-8 input. As a work around we have to convert the input to
717  * UTF-16 and create a mapping back to UTF-8 character indices. */
718  this->utf16_str.clear();
719  this->utf16_to_utf8.clear();
720 
721  while (*s != '\0') {
722  size_t idx = s - string_base;
723 
724  WChar c = Utf8Consume(&s);
725  if (c < 0x10000) {
726  this->utf16_str.push_back((UChar)c);
727  } else {
728  /* Make a surrogate pair. */
729  this->utf16_str.push_back((UChar)(0xD800 + ((c - 0x10000) >> 10)));
730  this->utf16_str.push_back((UChar)(0xDC00 + ((c - 0x10000) & 0x3FF)));
731  this->utf16_to_utf8.push_back(idx);
732  }
733  this->utf16_to_utf8.push_back(idx);
734  }
735  this->utf16_str.push_back('\0');
736  this->utf16_to_utf8.push_back(s - string_base);
737 
738  UText text = UTEXT_INITIALIZER;
739  UErrorCode status = U_ZERO_ERROR;
740  utext_openUChars(&text, this->utf16_str.data(), this->utf16_str.size() - 1, &status);
741  this->char_itr->setText(&text, status);
742  this->word_itr->setText(&text, status);
743  this->char_itr->first();
744  this->word_itr->first();
745  }
746 
747  size_t SetCurPosition(size_t pos) override
748  {
749  /* Convert incoming position to an UTF-16 string index. */
750  uint utf16_pos = 0;
751  for (uint i = 0; i < this->utf16_to_utf8.size(); i++) {
752  if (this->utf16_to_utf8[i] == pos) {
753  utf16_pos = i;
754  break;
755  }
756  }
757 
758  /* isBoundary has the documented side-effect of setting the current
759  * position to the first valid boundary equal to or greater than
760  * the passed value. */
761  this->char_itr->isBoundary(utf16_pos);
762  return this->utf16_to_utf8[this->char_itr->current()];
763  }
764 
765  size_t Next(IterType what) override
766  {
767  int32_t pos;
768  switch (what) {
769  case ITER_CHARACTER:
770  pos = this->char_itr->next();
771  break;
772 
773  case ITER_WORD:
774  pos = this->word_itr->following(this->char_itr->current());
775  /* The ICU word iterator considers both the start and the end of a word a valid
776  * break point, but we only want word starts. Move to the next location in
777  * case the new position points to whitespace. */
778  while (pos != icu::BreakIterator::DONE &&
779  IsWhitespace(Utf16DecodeChar((const uint16 *)&this->utf16_str[pos]))) {
780  int32_t new_pos = this->word_itr->next();
781  /* Don't set it to DONE if it was valid before. Otherwise we'll return END
782  * even though the iterator wasn't at the end of the string before. */
783  if (new_pos == icu::BreakIterator::DONE) break;
784  pos = new_pos;
785  }
786 
787  this->char_itr->isBoundary(pos);
788  break;
789 
790  default:
791  NOT_REACHED();
792  }
793 
794  return pos == icu::BreakIterator::DONE ? END : this->utf16_to_utf8[pos];
795  }
796 
797  size_t Prev(IterType what) override
798  {
799  int32_t pos;
800  switch (what) {
801  case ITER_CHARACTER:
802  pos = this->char_itr->previous();
803  break;
804 
805  case ITER_WORD:
806  pos = this->word_itr->preceding(this->char_itr->current());
807  /* The ICU word iterator considers both the start and the end of a word a valid
808  * break point, but we only want word starts. Move to the previous location in
809  * case the new position points to whitespace. */
810  while (pos != icu::BreakIterator::DONE &&
811  IsWhitespace(Utf16DecodeChar((const uint16 *)&this->utf16_str[pos]))) {
812  int32_t new_pos = this->word_itr->previous();
813  /* Don't set it to DONE if it was valid before. Otherwise we'll return END
814  * even though the iterator wasn't at the start of the string before. */
815  if (new_pos == icu::BreakIterator::DONE) break;
816  pos = new_pos;
817  }
818 
819  this->char_itr->isBoundary(pos);
820  break;
821 
822  default:
823  NOT_REACHED();
824  }
825 
826  return pos == icu::BreakIterator::DONE ? END : this->utf16_to_utf8[pos];
827  }
828 };
829 
831 {
832  return new IcuStringIterator();
833 }
834 
835 #else
836 
838 class DefaultStringIterator : public StringIterator
839 {
840  const char *string;
841  size_t len;
842  size_t cur_pos;
843 
844 public:
845  DefaultStringIterator() : string(nullptr), len(0), cur_pos(0)
846  {
847  }
848 
849  virtual void SetString(const char *s)
850  {
851  this->string = s;
852  this->len = strlen(s);
853  this->cur_pos = 0;
854  }
855 
856  virtual size_t SetCurPosition(size_t pos)
857  {
858  assert(this->string != nullptr && pos <= this->len);
859  /* Sanitize in case we get a position inside an UTF-8 sequence. */
860  while (pos > 0 && IsUtf8Part(this->string[pos])) pos--;
861  return this->cur_pos = pos;
862  }
863 
864  virtual size_t Next(IterType what)
865  {
866  assert(this->string != nullptr);
867 
868  /* Already at the end? */
869  if (this->cur_pos >= this->len) return END;
870 
871  switch (what) {
872  case ITER_CHARACTER: {
873  WChar c;
874  this->cur_pos += Utf8Decode(&c, this->string + this->cur_pos);
875  return this->cur_pos;
876  }
877 
878  case ITER_WORD: {
879  WChar c;
880  /* Consume current word. */
881  size_t offs = Utf8Decode(&c, this->string + this->cur_pos);
882  while (this->cur_pos < this->len && !IsWhitespace(c)) {
883  this->cur_pos += offs;
884  offs = Utf8Decode(&c, this->string + this->cur_pos);
885  }
886  /* Consume whitespace to the next word. */
887  while (this->cur_pos < this->len && IsWhitespace(c)) {
888  this->cur_pos += offs;
889  offs = Utf8Decode(&c, this->string + this->cur_pos);
890  }
891 
892  return this->cur_pos;
893  }
894 
895  default:
896  NOT_REACHED();
897  }
898 
899  return END;
900  }
901 
902  virtual size_t Prev(IterType what)
903  {
904  assert(this->string != nullptr);
905 
906  /* Already at the beginning? */
907  if (this->cur_pos == 0) return END;
908 
909  switch (what) {
910  case ITER_CHARACTER:
911  return this->cur_pos = Utf8PrevChar(this->string + this->cur_pos) - this->string;
912 
913  case ITER_WORD: {
914  const char *s = this->string + this->cur_pos;
915  WChar c;
916  /* Consume preceding whitespace. */
917  do {
918  s = Utf8PrevChar(s);
919  Utf8Decode(&c, s);
920  } while (s > this->string && IsWhitespace(c));
921  /* Consume preceding word. */
922  while (s > this->string && !IsWhitespace(c)) {
923  s = Utf8PrevChar(s);
924  Utf8Decode(&c, s);
925  }
926  /* Move caret back to the beginning of the word. */
927  if (IsWhitespace(c)) Utf8Consume(&s);
928 
929  return this->cur_pos = s - this->string;
930  }
931 
932  default:
933  NOT_REACHED();
934  }
935 
936  return END;
937  }
938 };
939 
940 #if defined(WITH_COCOA) && !defined(STRGEN) && !defined(SETTINGSGEN)
942 {
943  StringIterator *i = OSXStringIterator::Create();
944  if (i != nullptr) return i;
945 
946  return new DefaultStringIterator();
947 }
948 #else
950 {
951  return new DefaultStringIterator();
952 }
953 #endif /* defined(WITH_COCOA) && !defined(STRGEN) && !defined(SETTINGSGEN) */
954 
955 #endif
StringIterator::Prev
virtual size_t Prev(IterType what=ITER_CHARACTER)=0
Move the cursor back by one iteration unit.
IcuStringIterator::utf16_to_utf8
std::vector< size_t > utf16_to_utf8
Mapping from UTF-16 code point position to index in the UTF-8 source string.
Definition: string.cpp:691
WChar
char32_t WChar
Type for wide characters, i.e.
Definition: string_type.h:35
SVS_ALLOW_NEWLINE
@ SVS_ALLOW_NEWLINE
Allow newlines.
Definition: string_type.h:51
GB
static uint GB(const T x, const uint8 s, const uint8 n)
Fetch n bits from x, started at bit s.
Definition: bitmath_func.hpp:32
StringIterator::IterType
IterType
Type of the iterator.
Definition: string_base.h:17
str_validate
void str_validate(char *str, const char *last, StringValidationSettings settings)
Scans the string for valid characters and if it finds invalid ones, replaces them with a question mar...
Definition: string.cpp:255
strtolower
bool strtolower(char *str)
Convert a given ASCII string to lowercase.
Definition: string.cpp:372
win32.h
StringIterator::END
static const size_t END
Sentinel to indicate end-of-iteration.
Definition: string_base.h:23
math_func.hpp
str_fix_scc_encoded
void str_fix_scc_encoded(char *str, const char *last)
Scan the string for old values of SCC_ENCODED and fix it to it's new, static value.
Definition: string.cpp:169
HasBit
static bool HasBit(const T x, const uint8 y)
Checks if a bit in a value is set.
Definition: bitmath_func.hpp:103
IcuStringIterator::word_itr
icu::BreakIterator * word_itr
ICU iterator for words.
Definition: string.cpp:688
_current_collator
std::unique_ptr< icu::Collator > _current_collator
Collator for the language currently in use.
Definition: strings.cpp:51
Utf8Encode
size_t Utf8Encode(T buf, WChar c)
Encode a unicode character and place it in the buffer.
Definition: string.cpp:541
StringIterator::Next
virtual size_t Next(IterType what=ITER_CHARACTER)=0
Advance the cursor by one iteration unit.
CS_ALPHA
@ CS_ALPHA
Only alphabetic values.
Definition: string_type.h:30
UniscribeStringIterator
String iterator using Uniscribe as a backend.
Definition: string_uniscribe.h:67
Utf16DecodeChar
static WChar Utf16DecodeChar(const uint16 *c)
Decode an UTF-16 character.
Definition: string_func.h:205
StringIterator::SetString
virtual void SetString(const char *s)=0
Set a new iteration string.
IcuStringIterator::char_itr
icu::BreakIterator * char_itr
ICU iterator for characters.
Definition: string.cpp:687
StringIterator::ITER_CHARACTER
@ ITER_CHARACTER
Iterate over characters (or more exactly grapheme clusters).
Definition: string_base.h:18
control_codes.h
IsInsideMM
static bool IsInsideMM(const T x, const size_t min, const size_t max)
Checks if a value is in an interval.
Definition: math_func.hpp:204
IcuStringIterator::Next
size_t Next(IterType what) override
Advance the cursor by one iteration unit.
Definition: string.cpp:765
string_osx.h
gfx_func.h
Utf8StringLength
size_t Utf8StringLength(const char *s)
Get the length of an UTF-8 encoded string in number of characters and thus not the number of bytes th...
Definition: string.cpp:352
StrValid
bool StrValid(const char *str, const char *last)
Checks whether the given string is valid, i.e.
Definition: string.cpp:299
StringIterator::Create
static StringIterator * Create()
Create a new iterator instance.
Definition: string.cpp:830
SVS_ALLOW_CONTROL_CODE
@ SVS_ALLOW_CONTROL_CODE
Allow the special control codes.
Definition: string_type.h:52
StringIterator
Class for iterating over different kind of parts of a string.
Definition: string_base.h:14
IcuStringIterator::SetCurPosition
size_t SetCurPosition(size_t pos) override
Change the current string cursor.
Definition: string.cpp:747
StringIterator::SetCurPosition
virtual size_t SetCurPosition(size_t pos)=0
Change the current string cursor.
DEBUG
#define DEBUG(name, level,...)
Output a line of debugging information.
Definition: debug.h:35
str_strip_colours
void str_strip_colours(char *str)
Scans the string for colour codes and strips them.
Definition: string.cpp:324
_current_language
const LanguageMetadata * _current_language
The currently loaded language.
Definition: strings.cpp:46
safeguards.h
IcuStringIterator::utf16_str
std::vector< UChar > utf16_str
UTF-16 copy of the string.
Definition: string.cpp:690
IsValidChar
bool IsValidChar(WChar key, CharSetFilter afilter)
Only allow certain keys.
Definition: string.cpp:401
ttd_strnlen
static size_t ttd_strnlen(const char *str, size_t maxlen)
Get the length of a string, within a limited buffer.
Definition: string_func.h:72
settings
fluid_settings_t * settings
FluidSynth settings handle.
Definition: fluidsynth.cpp:21
ValidateString
void ValidateString(const char *str)
Scans the string for valid characters and if it finds invalid ones, replaces them with a question mar...
Definition: string.cpp:285
vseprintf
int CDECL vseprintf(char *str, const char *last, const char *format, va_list ap)
Safer implementation of vsnprintf; same as vsnprintf except:
Definition: string.cpp:61
StringIterator::ITER_WORD
@ ITER_WORD
Iterate over words.
Definition: string_base.h:19
language.h
stdafx.h
CS_ALPHANUMERAL
@ CS_ALPHANUMERAL
Both numeric and alphabetic and spaces and stuff.
Definition: string_type.h:27
LanguagePackHeader::isocode
char isocode[16]
the ISO code for the language (not country code)
Definition: language.h:31
StringValidationSettings
StringValidationSettings
Settings for the string validation.
Definition: string_type.h:48
Utf8Decode
size_t Utf8Decode(WChar *c, const char *s)
Decode and consume the next UTF-8 encoded character.
Definition: string.cpp:499
string_func.h
str_fmt
char *CDECL str_fmt(const char *str,...)
Format, "printf", into a newly allocated string.
Definition: string.cpp:150
alloc_func.hpp
IcuStringIterator::SetString
void SetString(const char *s) override
Set a new iteration string.
Definition: string.cpp:710
IcuStringIterator
String iterator using ICU as a backend.
Definition: string.cpp:685
seprintf
int CDECL seprintf(char *str, const char *last, const char *format,...)
Safer implementation of snprintf; same as snprintf except:
Definition: string.cpp:460
stredup
char * stredup(const char *s, const char *last)
Create a duplicate of the given string.
Definition: string.cpp:137
error
void CDECL error(const char *s,...)
Error handling for fatal non-user errors.
Definition: openttd.cpp:132
Utf8TrimString
size_t Utf8TrimString(char *s, size_t maxlen)
Properly terminate an UTF8 string to some maximum length.
Definition: string.cpp:585
CS_NUMERAL_SPACE
@ CS_NUMERAL_SPACE
Only numbers and spaces.
Definition: string_type.h:29
SkipGarbage
static const char * SkipGarbage(const char *str)
Skip some of the 'garbage' in the string that we don't want to use to sort on.
Definition: string.cpp:629
IsWhitespace
static bool IsWhitespace(WChar c)
Check whether UNICODE character is whitespace or not, i.e.
Definition: string_func.h:252
strnatcmp
int strnatcmp(const char *s1, const char *s2, bool ignore_garbage_at_front)
Compares two strings using case insensitive natural sort.
Definition: string.cpp:643
md5sumToString
char * md5sumToString(char *buf, const char *last, const uint8 md5sum[16])
Convert the md5sum to a hexadecimal string representation.
Definition: string.cpp:478
strecpy
char * strecpy(char *dst, const char *src, const char *last)
Copies characters from one buffer to another.
Definition: string.cpp:112
CS_HEXADECIMAL
@ CS_HEXADECIMAL
Only hexadecimal characters.
Definition: string_type.h:31
strecat
char * strecat(char *dst, const char *src, const char *last)
Appends characters from one string to another.
Definition: string.cpp:84
MacOSStringCompare
int MacOSStringCompare(const char *s1, const char *s2)
Compares two strings using case insensitive natural sort.
Definition: string_osx.cpp:323
lastof
#define lastof(x)
Get the last element of an fixed size array.
Definition: stdafx.h:385
SVS_REPLACE_WITH_QUESTION_MARK
@ SVS_REPLACE_WITH_QUESTION_MARK
Replace the unknown/bad bits with question marks.
Definition: string_type.h:50
CS_NUMERAL
@ CS_NUMERAL
Only numeric ones.
Definition: string_type.h:28
CharSetFilter
CharSetFilter
Valid filter types for IsValidChar.
Definition: string_type.h:26
Utf8PrevChar
static char * Utf8PrevChar(char *s)
Retrieve the previous UNICODE character in an UTF-8 encoded string.
Definition: string_func.h:153
debug.h
string_uniscribe.h
Utf8EncodedCharLen
static int8 Utf8EncodedCharLen(char c)
Return the length of an UTF-8 encoded value based on a single char.
Definition: string_func.h:128
IcuStringIterator::Prev
size_t Prev(IterType what) override
Move the cursor back by one iteration unit.
Definition: string.cpp:797