Go to the documentation of this file.
15 #include "string_base.h"
37 #include <unicode/ustring.h>
42 #if defined(WITH_COCOA)
61 int CDECL
vseprintf(
char *str,
const char *last,
const char *format, va_list ap)
63 ptrdiff_t diff = last - str;
64 if (diff < 0)
return 0;
65 return std::min(
static_cast<int>(diff), vsnprintf(str, diff + 1, format, ap));
84 char *
strecat(
char *dst,
const char *src,
const char *last)
87 while (*dst !=
'\0') {
88 if (dst == last)
return dst;
112 char *
strecpy(
char *dst,
const char *src,
const char *last)
115 while (dst != last && *src !=
'\0') {
120 if (dst == last && *src !=
'\0') {
121 #if defined(STRGEN) || defined(SETTINGSGEN)
122 error(
"String too long for destination buffer");
124 DEBUG(misc, 0,
"String too long for destination buffer");
137 char *
stredup(
const char *s,
const char *last)
139 size_t len = last ==
nullptr ? strlen(s) :
ttd_strnlen(s, last - s + 1);
140 char *tmp = CallocT<char>(len + 1);
158 char *p = MallocT<char>(len + 1);
159 memcpy(p, buf, len + 1);
171 while (str <= last && *str !=
'\0') {
173 if ((len == 0 && str + 4 > last) || str + len > last)
break;
177 if (c ==
'\0')
break;
179 if (c == 0xE028 || c == 0xE02A) {
193 while (str <= last && *str !=
'\0') {
219 if (len == 0 || str + len > last || len !=
Utf8Decode(&c, str)) {
231 }
while (--len != 0);
270 auto buf = str.data();
271 auto last = buf + str.size();
273 std::ostringstream dst;
274 std::ostreambuf_iterator<char> dst_iter(dst);
288 str_validate(
const_cast<char *
>(str), str + strlen(str) + 1);
303 while (str <= last && *str !=
'\0') {
309 if (len == 0 || str + len > last)
return false;
313 if (!IsPrintable(c) || (c >= SCC_SPRITE_START && c <= SCC_SPRITE_END)) {
331 if (c < SCC_BLUE || c > SCC_BLACK) {
337 }
while (--len != 0);
356 while (Utf8Consume(&t) != 0) len++;
374 bool changed =
false;
375 for (; *str !=
'\0'; str++) {
376 char new_str = tolower(*str);
377 changed |= new_str != *str;
383 bool strtolower(std::string &str, std::string::size_type offs)
385 bool changed =
false;
386 for (
auto ch = str.begin() + offs; ch != str.end(); ++ch) {
387 auto new_ch =
static_cast<char>(tolower(
static_cast<unsigned char>(*ch)));
388 changed |= new_ch != *ch;
405 case CS_NUMERAL:
return (key >=
'0' && key <=
'9');
407 case CS_ALPHA:
return IsPrintable(key) && !(key >=
'0' && key <=
'9');
408 case CS_HEXADECIMAL:
return (key >=
'0' && key <=
'9') || (key >=
'a' && key <=
'f') || (key >=
'A' && key <=
'F');
409 default: NOT_REACHED();
414 #if defined(_MSC_VER) && _MSC_VER < 1900
422 int CDECL vsnprintf(
char *str,
size_t size,
const char *format, va_list ap)
424 if (size == 0)
return 0;
427 int ret = _vsnprintf(str, size, format, ap);
430 if (errno != ERANGE) {
435 }
else if ((
size_t)ret < size) {
444 str[size - 1] =
'\0';
460 int CDECL
seprintf(
char *str,
const char *last,
const char *format, ...)
464 va_start(ap, format);
465 int ret =
vseprintf(str, last, format, ap);
482 for (uint i = 0; i < 16; i++) {
483 p +=
seprintf(p, last,
"%02X", md5sum[i]);
501 assert(c !=
nullptr);
507 }
else if (
GB(s[0], 5, 3) == 6) {
508 if (IsUtf8Part(s[1])) {
510 *c =
GB(s[0], 0, 5) << 6 |
GB(s[1], 0, 6);
511 if (*c >= 0x80)
return 2;
513 }
else if (
GB(s[0], 4, 4) == 14) {
514 if (IsUtf8Part(s[1]) && IsUtf8Part(s[2])) {
516 *c =
GB(s[0], 0, 4) << 12 |
GB(s[1], 0, 6) << 6 |
GB(s[2], 0, 6);
517 if (*c >= 0x800)
return 3;
519 }
else if (
GB(s[0], 3, 5) == 30) {
520 if (IsUtf8Part(s[1]) && IsUtf8Part(s[2]) && IsUtf8Part(s[3])) {
522 *c =
GB(s[0], 0, 3) << 18 |
GB(s[1], 0, 6) << 12 |
GB(s[2], 0, 6) << 6 |
GB(s[3], 0, 6);
523 if (*c >= 0x10000 && *c <= 0x10FFFF)
return 4;
546 }
else if (c < 0x800) {
547 *buf++ = 0xC0 +
GB(c, 6, 5);
548 *buf = 0x80 +
GB(c, 0, 6);
550 }
else if (c < 0x10000) {
551 *buf++ = 0xE0 +
GB(c, 12, 4);
552 *buf++ = 0x80 +
GB(c, 6, 6);
553 *buf = 0x80 +
GB(c, 0, 6);
555 }
else if (c < 0x110000) {
556 *buf++ = 0xF0 +
GB(c, 18, 3);
557 *buf++ = 0x80 +
GB(c, 12, 6);
558 *buf++ = 0x80 +
GB(c, 6, 6);
559 *buf = 0x80 +
GB(c, 0, 6);
570 return Utf8Encode<char *>(buf, c);
575 return Utf8Encode<std::ostreambuf_iterator<char> &>(buf, c);
589 for (
const char *ptr = strchr(s,
'\0'); *s !=
'\0';) {
592 if (len == 0) len = 1;
596 if (length + len >= maxlen || (s + len > ptr))
break;
605 #ifdef DEFINE_STRCASESTR
606 char *strcasestr(
const char *haystack,
const char *needle)
608 size_t hay_len = strlen(haystack);
609 size_t needle_len = strlen(needle);
610 while (hay_len >= needle_len) {
611 if (strncasecmp(haystack, needle, needle_len) == 0)
return const_cast<char *
>(haystack);
643 int strnatcmp(
const char *s1,
const char *s2,
bool ignore_garbage_at_front)
645 if (ignore_garbage_at_front) {
652 UErrorCode status = U_ZERO_ERROR;
654 if (U_SUCCESS(status))
return result;
658 #if defined(_WIN32) && !defined(STRGEN) && !defined(SETTINGSGEN)
659 int res = OTTDStringCompare(s1, s2);
660 if (res != 0)
return res - 2;
663 #if defined(WITH_COCOA) && !defined(STRGEN) && !defined(SETTINGSGEN)
665 if (res != 0)
return res - 2;
669 return strcasecmp(s1, s2);
672 #ifdef WITH_UNISCRIBE
679 #elif defined(WITH_ICU_I18N)
681 #include <unicode/utext.h>
682 #include <unicode/brkiter.h>
696 UErrorCode status = U_ZERO_ERROR;
700 this->utf16_str.push_back(
'\0');
701 this->utf16_to_utf8.push_back(0);
712 const char *string_base = s;
718 this->utf16_str.clear();
719 this->utf16_to_utf8.clear();
722 size_t idx = s - string_base;
724 WChar c = Utf8Consume(&s);
726 this->utf16_str.push_back((UChar)c);
729 this->utf16_str.push_back((UChar)(0xD800 + ((c - 0x10000) >> 10)));
730 this->utf16_str.push_back((UChar)(0xDC00 + ((c - 0x10000) & 0x3FF)));
731 this->utf16_to_utf8.push_back(idx);
733 this->utf16_to_utf8.push_back(idx);
735 this->utf16_str.push_back(
'\0');
736 this->utf16_to_utf8.push_back(s - string_base);
738 UText text = UTEXT_INITIALIZER;
739 UErrorCode status = U_ZERO_ERROR;
740 utext_openUChars(&text, this->utf16_str.data(), this->utf16_str.size() - 1, &status);
741 this->char_itr->setText(&text, status);
742 this->word_itr->setText(&text, status);
743 this->char_itr->first();
744 this->word_itr->first();
751 for (uint i = 0; i < this->utf16_to_utf8.size(); i++) {
752 if (this->utf16_to_utf8[i] == pos) {
761 this->char_itr->isBoundary(utf16_pos);
762 return this->utf16_to_utf8[this->char_itr->current()];
770 pos = this->char_itr->next();
774 pos = this->word_itr->following(this->char_itr->current());
778 while (pos != icu::BreakIterator::DONE &&
780 int32_t new_pos = this->word_itr->next();
783 if (new_pos == icu::BreakIterator::DONE)
break;
787 this->char_itr->isBoundary(pos);
794 return pos == icu::BreakIterator::DONE ?
END : this->utf16_to_utf8[pos];
802 pos = this->char_itr->previous();
806 pos = this->word_itr->preceding(this->char_itr->current());
810 while (pos != icu::BreakIterator::DONE &&
812 int32_t new_pos = this->word_itr->previous();
815 if (new_pos == icu::BreakIterator::DONE)
break;
819 this->char_itr->isBoundary(pos);
826 return pos == icu::BreakIterator::DONE ?
END : this->utf16_to_utf8[pos];
845 DefaultStringIterator() : string(nullptr), len(0), cur_pos(0)
852 this->len = strlen(s);
858 assert(this->
string !=
nullptr && pos <= this->len);
860 while (pos > 0 && IsUtf8Part(this->
string[pos])) pos--;
861 return this->cur_pos = pos;
864 virtual size_t Next(IterType what)
866 assert(this->
string !=
nullptr);
869 if (this->cur_pos >= this->len)
return END;
874 this->cur_pos +=
Utf8Decode(&c, this->
string + this->cur_pos);
875 return this->cur_pos;
881 size_t offs =
Utf8Decode(&c, this->
string + this->cur_pos);
883 this->cur_pos += offs;
884 offs =
Utf8Decode(&c, this->
string + this->cur_pos);
888 this->cur_pos += offs;
889 offs =
Utf8Decode(&c, this->
string + this->cur_pos);
892 return this->cur_pos;
902 virtual size_t Prev(IterType what)
904 assert(this->
string !=
nullptr);
907 if (this->cur_pos == 0)
return END;
911 return this->cur_pos =
Utf8PrevChar(this->
string + this->cur_pos) - this->string;
914 const char *s = this->
string + this->cur_pos;
929 return this->cur_pos = s - this->string;
940 #if defined(WITH_COCOA) && !defined(STRGEN) && !defined(SETTINGSGEN)
944 if (i !=
nullptr)
return i;
946 return new DefaultStringIterator();
951 return new DefaultStringIterator();
virtual size_t Prev(IterType what=ITER_CHARACTER)=0
Move the cursor back by one iteration unit.
std::vector< size_t > utf16_to_utf8
Mapping from UTF-16 code point position to index in the UTF-8 source string.
char32_t WChar
Type for wide characters, i.e.
@ SVS_ALLOW_NEWLINE
Allow newlines.
static uint GB(const T x, const uint8 s, const uint8 n)
Fetch n bits from x, started at bit s.
IterType
Type of the iterator.
void str_validate(char *str, const char *last, StringValidationSettings settings)
Scans the string for valid characters and if it finds invalid ones, replaces them with a question mar...
bool strtolower(char *str)
Convert a given ASCII string to lowercase.
static const size_t END
Sentinel to indicate end-of-iteration.
void str_fix_scc_encoded(char *str, const char *last)
Scan the string for old values of SCC_ENCODED and fix it to it's new, static value.
static bool HasBit(const T x, const uint8 y)
Checks if a bit in a value is set.
icu::BreakIterator * word_itr
ICU iterator for words.
std::unique_ptr< icu::Collator > _current_collator
Collator for the language currently in use.
size_t Utf8Encode(T buf, WChar c)
Encode a unicode character and place it in the buffer.
virtual size_t Next(IterType what=ITER_CHARACTER)=0
Advance the cursor by one iteration unit.
@ CS_ALPHA
Only alphabetic values.
String iterator using Uniscribe as a backend.
static WChar Utf16DecodeChar(const uint16 *c)
Decode an UTF-16 character.
virtual void SetString(const char *s)=0
Set a new iteration string.
icu::BreakIterator * char_itr
ICU iterator for characters.
@ ITER_CHARACTER
Iterate over characters (or more exactly grapheme clusters).
static bool IsInsideMM(const T x, const size_t min, const size_t max)
Checks if a value is in an interval.
size_t Next(IterType what) override
Advance the cursor by one iteration unit.
size_t Utf8StringLength(const char *s)
Get the length of an UTF-8 encoded string in number of characters and thus not the number of bytes th...
bool StrValid(const char *str, const char *last)
Checks whether the given string is valid, i.e.
static StringIterator * Create()
Create a new iterator instance.
@ SVS_ALLOW_CONTROL_CODE
Allow the special control codes.
Class for iterating over different kind of parts of a string.
size_t SetCurPosition(size_t pos) override
Change the current string cursor.
virtual size_t SetCurPosition(size_t pos)=0
Change the current string cursor.
#define DEBUG(name, level,...)
Output a line of debugging information.
void str_strip_colours(char *str)
Scans the string for colour codes and strips them.
const LanguageMetadata * _current_language
The currently loaded language.
std::vector< UChar > utf16_str
UTF-16 copy of the string.
bool IsValidChar(WChar key, CharSetFilter afilter)
Only allow certain keys.
static size_t ttd_strnlen(const char *str, size_t maxlen)
Get the length of a string, within a limited buffer.
fluid_settings_t * settings
FluidSynth settings handle.
void ValidateString(const char *str)
Scans the string for valid characters and if it finds invalid ones, replaces them with a question mar...
int CDECL vseprintf(char *str, const char *last, const char *format, va_list ap)
Safer implementation of vsnprintf; same as vsnprintf except:
@ ITER_WORD
Iterate over words.
@ CS_ALPHANUMERAL
Both numeric and alphabetic and spaces and stuff.
StringValidationSettings
Settings for the string validation.
size_t Utf8Decode(WChar *c, const char *s)
Decode and consume the next UTF-8 encoded character.
char *CDECL str_fmt(const char *str,...)
Format, "printf", into a newly allocated string.
void SetString(const char *s) override
Set a new iteration string.
String iterator using ICU as a backend.
int CDECL seprintf(char *str, const char *last, const char *format,...)
Safer implementation of snprintf; same as snprintf except:
char * stredup(const char *s, const char *last)
Create a duplicate of the given string.
void CDECL error(const char *s,...)
Error handling for fatal non-user errors.
size_t Utf8TrimString(char *s, size_t maxlen)
Properly terminate an UTF8 string to some maximum length.
@ CS_NUMERAL_SPACE
Only numbers and spaces.
static const char * SkipGarbage(const char *str)
Skip some of the 'garbage' in the string that we don't want to use to sort on.
static bool IsWhitespace(WChar c)
Check whether UNICODE character is whitespace or not, i.e.
int strnatcmp(const char *s1, const char *s2, bool ignore_garbage_at_front)
Compares two strings using case insensitive natural sort.
char * md5sumToString(char *buf, const char *last, const uint8 md5sum[16])
Convert the md5sum to a hexadecimal string representation.
char * strecpy(char *dst, const char *src, const char *last)
Copies characters from one buffer to another.
@ CS_HEXADECIMAL
Only hexadecimal characters.
char * strecat(char *dst, const char *src, const char *last)
Appends characters from one string to another.
int MacOSStringCompare(const char *s1, const char *s2)
Compares two strings using case insensitive natural sort.
#define lastof(x)
Get the last element of an fixed size array.
@ SVS_REPLACE_WITH_QUESTION_MARK
Replace the unknown/bad bits with question marks.
@ CS_NUMERAL
Only numeric ones.
CharSetFilter
Valid filter types for IsValidChar.
static char * Utf8PrevChar(char *s)
Retrieve the previous UNICODE character in an UTF-8 encoded string.
static int8 Utf8EncodedCharLen(char c)
Return the length of an UTF-8 encoded value based on a single char.
size_t Prev(IterType what) override
Move the cursor back by one iteration unit.