OpenTTD Source
12.0-beta2
|
Go to the documentation of this file.
15 #include "string_base.h"
37 #include <unicode/ustring.h>
42 #if defined(WITH_COCOA)
61 int CDECL
vseprintf(
char *str,
const char *last,
const char *format, va_list ap)
63 ptrdiff_t diff = last - str;
64 if (diff < 0)
return 0;
65 return std::min(
static_cast<int>(diff), vsnprintf(str, diff + 1, format, ap));
84 char *
strecat(
char *dst,
const char *src,
const char *last)
87 while (*dst !=
'\0') {
88 if (dst == last)
return dst;
112 char *
strecpy(
char *dst,
const char *src,
const char *last)
115 while (dst != last && *src !=
'\0') {
120 if (dst == last && *src !=
'\0') {
121 #if defined(STRGEN) || defined(SETTINGSGEN)
122 error(
"String too long for destination buffer");
124 Debug(misc, 0,
"String too long for destination buffer");
137 char *
stredup(
const char *s,
const char *last)
139 size_t len = last ==
nullptr ? strlen(s) :
ttd_strnlen(s, last - s + 1);
140 char *tmp = CallocT<char>(len + 1);
158 char *p = MallocT<char>(len + 1);
159 memcpy(p, buf, len + 1);
171 while (str <= last && *str !=
'\0') {
173 if ((len == 0 && str + 4 > last) || str + len > last)
break;
177 if (c ==
'\0')
break;
179 if (c == 0xE028 || c == 0xE02A) {
193 while (str <= last && *str !=
'\0') {
219 if (len == 0 || str + len > last || len !=
Utf8Decode(&c, str)) {
231 }
while (--len != 0);
283 auto buf = str.data();
284 auto last = buf + str.size();
286 std::ostringstream dst;
287 std::ostreambuf_iterator<char> dst_iter(dst);
304 while (str <= last && *str !=
'\0') {
310 if (len == 0 || str + len > last)
return false;
314 if (!IsPrintable(c) || (c >= SCC_SPRITE_START && c <= SCC_SPRITE_END)) {
332 size_t pos = str.find_first_not_of(
' ');
344 size_t pos = str.find_last_not_of(
' ');
345 if (pos != std::string::npos) str.erase(pos + 1);
367 bool StrStartsWith(
const std::string_view str,
const std::string_view prefix)
369 size_t prefix_len = prefix.size();
370 if (str.size() < prefix_len)
return false;
371 return str.compare(0, prefix_len, prefix, 0, prefix_len) == 0;
380 bool StrEndsWith(
const std::string_view str,
const std::string_view suffix)
382 size_t suffix_len = suffix.size();
383 if (str.size() < suffix_len)
return false;
384 return str.compare(str.size() - suffix_len, suffix_len, suffix, 0, suffix_len) == 0;
396 if (c < SCC_BLUE || c > SCC_BLACK) {
402 }
while (--len != 0);
421 while (Utf8Consume(&t) != 0) len++;
449 bool changed =
false;
450 for (; *str !=
'\0'; str++) {
451 char new_str = tolower(*str);
452 changed |= new_str != *str;
458 bool strtolower(std::string &str, std::string::size_type offs)
460 bool changed =
false;
461 for (
auto ch = str.begin() + offs; ch != str.end(); ++ch) {
462 auto new_ch =
static_cast<char>(tolower(
static_cast<unsigned char>(*ch)));
463 changed |= new_ch != *ch;
480 case CS_NUMERAL:
return (key >=
'0' && key <=
'9');
482 case CS_ALPHA:
return IsPrintable(key) && !(key >=
'0' && key <=
'9');
483 case CS_HEXADECIMAL:
return (key >=
'0' && key <=
'9') || (key >=
'a' && key <=
'f') || (key >=
'A' && key <=
'F');
484 default: NOT_REACHED();
489 #if defined(_MSC_VER) && _MSC_VER < 1900
497 int CDECL vsnprintf(
char *str,
size_t size,
const char *format, va_list ap)
499 if (size == 0)
return 0;
502 int ret = _vsnprintf(str, size, format, ap);
505 if (errno != ERANGE) {
510 }
else if ((
size_t)ret < size) {
519 str[size - 1] =
'\0';
535 int CDECL
seprintf(
char *str,
const char *last,
const char *format, ...)
539 va_start(ap, format);
540 int ret =
vseprintf(str, last, format, ap);
557 for (uint i = 0; i < 16; i++) {
558 p +=
seprintf(p, last,
"%02X", md5sum[i]);
576 assert(c !=
nullptr);
582 }
else if (
GB(s[0], 5, 3) == 6) {
583 if (IsUtf8Part(s[1])) {
585 *c =
GB(s[0], 0, 5) << 6 |
GB(s[1], 0, 6);
586 if (*c >= 0x80)
return 2;
588 }
else if (
GB(s[0], 4, 4) == 14) {
589 if (IsUtf8Part(s[1]) && IsUtf8Part(s[2])) {
591 *c =
GB(s[0], 0, 4) << 12 |
GB(s[1], 0, 6) << 6 |
GB(s[2], 0, 6);
592 if (*c >= 0x800)
return 3;
594 }
else if (
GB(s[0], 3, 5) == 30) {
595 if (IsUtf8Part(s[1]) && IsUtf8Part(s[2]) && IsUtf8Part(s[3])) {
597 *c =
GB(s[0], 0, 3) << 18 |
GB(s[1], 0, 6) << 12 |
GB(s[2], 0, 6) << 6 |
GB(s[3], 0, 6);
598 if (*c >= 0x10000 && *c <= 0x10FFFF)
return 4;
621 }
else if (c < 0x800) {
622 *buf++ = 0xC0 +
GB(c, 6, 5);
623 *buf = 0x80 +
GB(c, 0, 6);
625 }
else if (c < 0x10000) {
626 *buf++ = 0xE0 +
GB(c, 12, 4);
627 *buf++ = 0x80 +
GB(c, 6, 6);
628 *buf = 0x80 +
GB(c, 0, 6);
630 }
else if (c < 0x110000) {
631 *buf++ = 0xF0 +
GB(c, 18, 3);
632 *buf++ = 0x80 +
GB(c, 12, 6);
633 *buf++ = 0x80 +
GB(c, 6, 6);
634 *buf = 0x80 +
GB(c, 0, 6);
645 return Utf8Encode<char *>(buf, c);
650 return Utf8Encode<std::ostreambuf_iterator<char> &>(buf, c);
664 for (
const char *ptr = strchr(s,
'\0'); *s !=
'\0';) {
667 if (len == 0) len = 1;
671 if (length + len >= maxlen || (s + len > ptr))
break;
680 #ifdef DEFINE_STRCASESTR
681 char *strcasestr(
const char *haystack,
const char *needle)
683 size_t hay_len = strlen(haystack);
684 size_t needle_len = strlen(needle);
685 while (hay_len >= needle_len) {
686 if (strncasecmp(haystack, needle, needle_len) == 0)
return const_cast<char *
>(haystack);
718 int strnatcmp(
const char *s1,
const char *s2,
bool ignore_garbage_at_front)
720 if (ignore_garbage_at_front) {
727 UErrorCode status = U_ZERO_ERROR;
729 if (U_SUCCESS(status))
return result;
733 #if defined(_WIN32) && !defined(STRGEN) && !defined(SETTINGSGEN)
734 int res = OTTDStringCompare(s1, s2);
735 if (res != 0)
return res - 2;
738 #if defined(WITH_COCOA) && !defined(STRGEN) && !defined(SETTINGSGEN)
740 if (res != 0)
return res - 2;
744 return strcasecmp(s1, s2);
747 #ifdef WITH_UNISCRIBE
754 #elif defined(WITH_ICU_I18N)
756 #include <unicode/utext.h>
757 #include <unicode/brkiter.h>
771 UErrorCode status = U_ZERO_ERROR;
775 this->utf16_str.push_back(
'\0');
776 this->utf16_to_utf8.push_back(0);
787 const char *string_base = s;
793 this->utf16_str.clear();
794 this->utf16_to_utf8.clear();
797 size_t idx = s - string_base;
799 WChar c = Utf8Consume(&s);
801 this->utf16_str.push_back((UChar)c);
804 this->utf16_str.push_back((UChar)(0xD800 + ((c - 0x10000) >> 10)));
805 this->utf16_str.push_back((UChar)(0xDC00 + ((c - 0x10000) & 0x3FF)));
806 this->utf16_to_utf8.push_back(idx);
808 this->utf16_to_utf8.push_back(idx);
810 this->utf16_str.push_back(
'\0');
811 this->utf16_to_utf8.push_back(s - string_base);
813 UText text = UTEXT_INITIALIZER;
814 UErrorCode status = U_ZERO_ERROR;
815 utext_openUChars(&text, this->utf16_str.data(), this->utf16_str.size() - 1, &status);
816 this->char_itr->setText(&text, status);
817 this->word_itr->setText(&text, status);
818 this->char_itr->first();
819 this->word_itr->first();
826 for (uint i = 0; i < this->utf16_to_utf8.size(); i++) {
827 if (this->utf16_to_utf8[i] == pos) {
836 this->char_itr->isBoundary(utf16_pos);
837 return this->utf16_to_utf8[this->char_itr->current()];
845 pos = this->char_itr->next();
849 pos = this->word_itr->following(this->char_itr->current());
853 while (pos != icu::BreakIterator::DONE &&
855 int32_t new_pos = this->word_itr->next();
858 if (new_pos == icu::BreakIterator::DONE)
break;
862 this->char_itr->isBoundary(pos);
869 return pos == icu::BreakIterator::DONE ?
END : this->utf16_to_utf8[pos];
877 pos = this->char_itr->previous();
881 pos = this->word_itr->preceding(this->char_itr->current());
885 while (pos != icu::BreakIterator::DONE &&
887 int32_t new_pos = this->word_itr->previous();
890 if (new_pos == icu::BreakIterator::DONE)
break;
894 this->char_itr->isBoundary(pos);
901 return pos == icu::BreakIterator::DONE ?
END : this->utf16_to_utf8[pos];
920 DefaultStringIterator() : string(nullptr), len(0), cur_pos(0)
927 this->len = strlen(s);
933 assert(this->
string !=
nullptr && pos <= this->len);
935 while (pos > 0 && IsUtf8Part(this->
string[pos])) pos--;
936 return this->cur_pos = pos;
939 virtual size_t Next(IterType what)
941 assert(this->
string !=
nullptr);
944 if (this->cur_pos >= this->len)
return END;
949 this->cur_pos +=
Utf8Decode(&c, this->
string + this->cur_pos);
950 return this->cur_pos;
956 size_t offs =
Utf8Decode(&c, this->
string + this->cur_pos);
958 this->cur_pos += offs;
959 offs =
Utf8Decode(&c, this->
string + this->cur_pos);
963 this->cur_pos += offs;
964 offs =
Utf8Decode(&c, this->
string + this->cur_pos);
967 return this->cur_pos;
977 virtual size_t Prev(IterType what)
979 assert(this->
string !=
nullptr);
982 if (this->cur_pos == 0)
return END;
986 return this->cur_pos =
Utf8PrevChar(this->
string + this->cur_pos) - this->string;
989 const char *s = this->
string + this->cur_pos;
1004 return this->cur_pos = s - this->string;
1015 #if defined(WITH_COCOA) && !defined(STRGEN) && !defined(SETTINGSGEN)
1019 if (i !=
nullptr)
return i;
1021 return new DefaultStringIterator();
1026 return new DefaultStringIterator();
virtual size_t Prev(IterType what=ITER_CHARACTER)=0
Move the cursor back by one iteration unit.
std::vector< size_t > utf16_to_utf8
Mapping from UTF-16 code point position to index in the UTF-8 source string.
char32_t WChar
Type for wide characters, i.e.
@ SVS_ALLOW_NEWLINE
Allow newlines.
static uint GB(const T x, const uint8 s, const uint8 n)
Fetch n bits from x, started at bit s.
IterType
Type of the iterator.
bool strtolower(char *str)
Convert a given ASCII string to lowercase.
static void StrRightTrimInPlace(std::string &str)
Trim the spaces from the end of given string in place, i.e.
static const size_t END
Sentinel to indicate end-of-iteration.
void str_fix_scc_encoded(char *str, const char *last)
Scan the string for old values of SCC_ENCODED and fix it to it's new, static value.
static bool HasBit(const T x, const uint8 y)
Checks if a bit in a value is set.
icu::BreakIterator * word_itr
ICU iterator for words.
std::unique_ptr< icu::Collator > _current_collator
Collator for the language currently in use.
size_t Utf8Encode(T buf, WChar c)
Encode a unicode character and place it in the buffer.
virtual size_t Next(IterType what=ITER_CHARACTER)=0
Advance the cursor by one iteration unit.
@ CS_ALPHA
Only alphabetic values.
String iterator using Uniscribe as a backend.
static WChar Utf16DecodeChar(const uint16 *c)
Decode an UTF-16 character.
virtual void SetString(const char *s)=0
Set a new iteration string.
icu::BreakIterator * char_itr
ICU iterator for characters.
@ ITER_CHARACTER
Iterate over characters (or more exactly grapheme clusters).
static bool IsInsideMM(const T x, const size_t min, const size_t max)
Checks if a value is in an interval.
size_t Next(IterType what) override
Advance the cursor by one iteration unit.
size_t Utf8StringLength(const char *s)
Get the length of an UTF-8 encoded string in number of characters and thus not the number of bytes th...
bool StrValid(const char *str, const char *last)
Checks whether the given string is valid, i.e.
static StringIterator * Create()
Create a new iterator instance.
@ SVS_ALLOW_CONTROL_CODE
Allow the special control codes.
Class for iterating over different kind of parts of a string.
size_t SetCurPosition(size_t pos) override
Change the current string cursor.
virtual size_t SetCurPosition(size_t pos)=0
Change the current string cursor.
void StrMakeValidInPlace(char *str, const char *last, StringValidationSettings settings)
Scans the string for invalid characters and replaces then with a question mark '?' (if not ignored).
void StrTrimInPlace(std::string &str)
Trim the spaces from given string in place, i.e.
void str_strip_colours(char *str)
Scans the string for colour codes and strips them.
bool StrStartsWith(const std::string_view str, const std::string_view prefix)
Check whether the given string starts with the given prefix.
const LanguageMetadata * _current_language
The currently loaded language.
std::vector< UChar > utf16_str
UTF-16 copy of the string.
bool IsValidChar(WChar key, CharSetFilter afilter)
Only allow certain keys.
static size_t ttd_strnlen(const char *str, size_t maxlen)
Get the length of a string, within a limited buffer.
fluid_settings_t * settings
FluidSynth settings handle.
int CDECL vseprintf(char *str, const char *last, const char *format, va_list ap)
Safer implementation of vsnprintf; same as vsnprintf except:
@ ITER_WORD
Iterate over words.
@ CS_ALPHANUMERAL
Both numeric and alphabetic and spaces and stuff.
StringValidationSettings
Settings for the string validation.
bool StrEndsWith(const std::string_view str, const std::string_view suffix)
Check whether the given string ends with the given suffix.
size_t Utf8Decode(WChar *c, const char *s)
Decode and consume the next UTF-8 encoded character.
std::string StrMakeValid(const std::string &str, StringValidationSettings settings)
Scans the string for invalid characters and replaces then with a question mark '?' (if not ignored).
char *CDECL str_fmt(const char *str,...)
Format, "printf", into a newly allocated string.
void SetString(const char *s) override
Set a new iteration string.
static void StrLeftTrimInPlace(std::string &str)
Trim the spaces from the begin of given string in place, i.e.
String iterator using ICU as a backend.
int CDECL seprintf(char *str, const char *last, const char *format,...)
Safer implementation of snprintf; same as snprintf except:
char * stredup(const char *s, const char *last)
Create a duplicate of the given string.
void CDECL error(const char *s,...)
Error handling for fatal non-user errors.
#define Debug(name, level, format_string,...)
Ouptut a line of debugging information.
size_t Utf8TrimString(char *s, size_t maxlen)
Properly terminate an UTF8 string to some maximum length.
@ CS_NUMERAL_SPACE
Only numbers and spaces.
static const char * SkipGarbage(const char *str)
Skip some of the 'garbage' in the string that we don't want to use to sort on.
static bool IsWhitespace(WChar c)
Check whether UNICODE character is whitespace or not, i.e.
int strnatcmp(const char *s1, const char *s2, bool ignore_garbage_at_front)
Compares two strings using case insensitive natural sort.
char * md5sumToString(char *buf, const char *last, const uint8 md5sum[16])
Convert the md5sum to a hexadecimal string representation.
char * strecpy(char *dst, const char *src, const char *last)
Copies characters from one buffer to another.
@ CS_HEXADECIMAL
Only hexadecimal characters.
char * strecat(char *dst, const char *src, const char *last)
Appends characters from one string to another.
int MacOSStringCompare(const char *s1, const char *s2)
Compares two strings using case insensitive natural sort.
#define lastof(x)
Get the last element of an fixed size array.
@ SVS_REPLACE_WITH_QUESTION_MARK
Replace the unknown/bad bits with question marks.
@ CS_NUMERAL
Only numeric ones.
CharSetFilter
Valid filter types for IsValidChar.
static char * Utf8PrevChar(char *s)
Retrieve the previous UNICODE character in an UTF-8 encoded string.
static int8 Utf8EncodedCharLen(char c)
Return the length of an UTF-8 encoded value based on a single char.
size_t Prev(IterType what) override
Move the cursor back by one iteration unit.