OpenTTD Source
1.11.0-beta2
|
Go to the documentation of this file.
15 #include "string_base.h"
37 #include <unicode/ustring.h>
42 #if defined(WITH_COCOA)
61 int CDECL
vseprintf(
char *str,
const char *last,
const char *format, va_list ap)
63 ptrdiff_t diff = last - str;
64 if (diff < 0)
return 0;
65 return std::min(
static_cast<int>(diff), vsnprintf(str, diff + 1, format, ap));
84 char *
strecat(
char *dst,
const char *src,
const char *last)
87 while (*dst !=
'\0') {
88 if (dst == last)
return dst;
112 char *
strecpy(
char *dst,
const char *src,
const char *last)
115 while (dst != last && *src !=
'\0') {
120 if (dst == last && *src !=
'\0') {
121 #if defined(STRGEN) || defined(SETTINGSGEN)
122 error(
"String too long for destination buffer");
124 DEBUG(misc, 0,
"String too long for destination buffer");
137 char *
stredup(
const char *s,
const char *last)
139 size_t len = last ==
nullptr ? strlen(s) :
ttd_strnlen(s, last - s + 1);
140 char *tmp = CallocT<char>(len + 1);
158 char *p = MallocT<char>(len + 1);
159 memcpy(p, buf, len + 1);
171 while (str <= last && *str !=
'\0') {
173 if ((len == 0 && str + 4 > last) || str + len > last)
break;
177 if (c ==
'\0')
break;
179 if (c == 0xE028 || c == 0xE02A) {
193 while (str <= last && *str !=
'\0') {
200 if ((len == 0 && str + 4 > last) || str + len > last)
break;
207 if (c ==
'\0')
break;
215 }
while (--len != 0);
252 auto buf = str.data();
253 auto last = buf + str.size();
255 std::ostringstream dst;
256 std::ostreambuf_iterator<char> dst_iter(dst);
270 str_validate(
const_cast<char *
>(str), str + strlen(str) + 1);
285 while (str <= last && *str !=
'\0') {
291 if (len == 0 || str + len > last)
return false;
295 if (!IsPrintable(c) || (c >= SCC_SPRITE_START && c <= SCC_SPRITE_END)) {
313 if (c < SCC_BLUE || c > SCC_BLACK) {
319 }
while (--len != 0);
338 while (Utf8Consume(&t) != 0) len++;
356 bool changed =
false;
357 for (; *str !=
'\0'; str++) {
358 char new_str = tolower(*str);
359 changed |= new_str != *str;
365 bool strtolower(std::string &str, std::string::size_type offs)
367 bool changed =
false;
368 for (
auto ch = str.begin() + offs; ch != str.end(); ++ch) {
369 auto new_ch =
static_cast<char>(tolower(
static_cast<unsigned char>(*ch)));
370 changed |= new_ch != *ch;
387 case CS_NUMERAL:
return (key >=
'0' && key <=
'9');
389 case CS_ALPHA:
return IsPrintable(key) && !(key >=
'0' && key <=
'9');
390 case CS_HEXADECIMAL:
return (key >=
'0' && key <=
'9') || (key >=
'a' && key <=
'f') || (key >=
'A' && key <=
'F');
391 default: NOT_REACHED();
396 #if defined(_MSC_VER) && _MSC_VER < 1900
404 int CDECL vsnprintf(
char *str,
size_t size,
const char *format, va_list ap)
406 if (size == 0)
return 0;
409 int ret = _vsnprintf(str, size, format, ap);
412 if (errno != ERANGE) {
417 }
else if ((
size_t)ret < size) {
426 str[size - 1] =
'\0';
442 int CDECL
seprintf(
char *str,
const char *last,
const char *format, ...)
446 va_start(ap, format);
447 int ret =
vseprintf(str, last, format, ap);
464 for (uint i = 0; i < 16; i++) {
465 p +=
seprintf(p, last,
"%02X", md5sum[i]);
483 assert(c !=
nullptr);
489 }
else if (
GB(s[0], 5, 3) == 6) {
490 if (IsUtf8Part(s[1])) {
492 *c =
GB(s[0], 0, 5) << 6 |
GB(s[1], 0, 6);
493 if (*c >= 0x80)
return 2;
495 }
else if (
GB(s[0], 4, 4) == 14) {
496 if (IsUtf8Part(s[1]) && IsUtf8Part(s[2])) {
498 *c =
GB(s[0], 0, 4) << 12 |
GB(s[1], 0, 6) << 6 |
GB(s[2], 0, 6);
499 if (*c >= 0x800)
return 3;
501 }
else if (
GB(s[0], 3, 5) == 30) {
502 if (IsUtf8Part(s[1]) && IsUtf8Part(s[2]) && IsUtf8Part(s[3])) {
504 *c =
GB(s[0], 0, 3) << 18 |
GB(s[1], 0, 6) << 12 |
GB(s[2], 0, 6) << 6 |
GB(s[3], 0, 6);
505 if (*c >= 0x10000 && *c <= 0x10FFFF)
return 4;
528 }
else if (c < 0x800) {
529 *buf++ = 0xC0 +
GB(c, 6, 5);
530 *buf = 0x80 +
GB(c, 0, 6);
532 }
else if (c < 0x10000) {
533 *buf++ = 0xE0 +
GB(c, 12, 4);
534 *buf++ = 0x80 +
GB(c, 6, 6);
535 *buf = 0x80 +
GB(c, 0, 6);
537 }
else if (c < 0x110000) {
538 *buf++ = 0xF0 +
GB(c, 18, 3);
539 *buf++ = 0x80 +
GB(c, 12, 6);
540 *buf++ = 0x80 +
GB(c, 6, 6);
541 *buf = 0x80 +
GB(c, 0, 6);
552 return Utf8Encode<char *>(buf, c);
557 return Utf8Encode<std::ostreambuf_iterator<char> &>(buf, c);
571 for (
const char *ptr = strchr(s,
'\0'); *s !=
'\0';) {
574 if (len == 0) len = 1;
578 if (length + len >= maxlen || (s + len > ptr))
break;
587 #ifdef DEFINE_STRCASESTR
588 char *strcasestr(
const char *haystack,
const char *needle)
590 size_t hay_len = strlen(haystack);
591 size_t needle_len = strlen(needle);
592 while (hay_len >= needle_len) {
593 if (strncasecmp(haystack, needle, needle_len) == 0)
return const_cast<char *
>(haystack);
625 int strnatcmp(
const char *s1,
const char *s2,
bool ignore_garbage_at_front)
627 if (ignore_garbage_at_front) {
634 UErrorCode status = U_ZERO_ERROR;
636 if (U_SUCCESS(status))
return result;
640 #if defined(_WIN32) && !defined(STRGEN) && !defined(SETTINGSGEN)
641 int res = OTTDStringCompare(s1, s2);
642 if (res != 0)
return res - 2;
645 #if defined(WITH_COCOA) && !defined(STRGEN) && !defined(SETTINGSGEN)
647 if (res != 0)
return res - 2;
651 return strcasecmp(s1, s2);
654 #ifdef WITH_UNISCRIBE
661 #elif defined(WITH_ICU_I18N)
663 #include <unicode/utext.h>
664 #include <unicode/brkiter.h>
678 UErrorCode status = U_ZERO_ERROR;
682 this->utf16_str.push_back(
'\0');
683 this->utf16_to_utf8.push_back(0);
694 const char *string_base = s;
700 this->utf16_str.clear();
701 this->utf16_to_utf8.clear();
704 size_t idx = s - string_base;
706 WChar c = Utf8Consume(&s);
708 this->utf16_str.push_back((UChar)c);
711 this->utf16_str.push_back((UChar)(0xD800 + ((c - 0x10000) >> 10)));
712 this->utf16_str.push_back((UChar)(0xDC00 + ((c - 0x10000) & 0x3FF)));
713 this->utf16_to_utf8.push_back(idx);
715 this->utf16_to_utf8.push_back(idx);
717 this->utf16_str.push_back(
'\0');
718 this->utf16_to_utf8.push_back(s - string_base);
720 UText text = UTEXT_INITIALIZER;
721 UErrorCode status = U_ZERO_ERROR;
722 utext_openUChars(&text, this->utf16_str.data(), this->utf16_str.size() - 1, &status);
723 this->char_itr->setText(&text, status);
724 this->word_itr->setText(&text, status);
725 this->char_itr->first();
726 this->word_itr->first();
733 for (uint i = 0; i < this->utf16_to_utf8.size(); i++) {
734 if (this->utf16_to_utf8[i] == pos) {
743 this->char_itr->isBoundary(utf16_pos);
744 return this->utf16_to_utf8[this->char_itr->current()];
752 pos = this->char_itr->next();
756 pos = this->word_itr->following(this->char_itr->current());
760 while (pos != icu::BreakIterator::DONE &&
762 int32_t new_pos = this->word_itr->next();
765 if (new_pos == icu::BreakIterator::DONE)
break;
769 this->char_itr->isBoundary(pos);
776 return pos == icu::BreakIterator::DONE ?
END : this->utf16_to_utf8[pos];
784 pos = this->char_itr->previous();
788 pos = this->word_itr->preceding(this->char_itr->current());
792 while (pos != icu::BreakIterator::DONE &&
794 int32_t new_pos = this->word_itr->previous();
797 if (new_pos == icu::BreakIterator::DONE)
break;
801 this->char_itr->isBoundary(pos);
808 return pos == icu::BreakIterator::DONE ?
END : this->utf16_to_utf8[pos];
827 DefaultStringIterator() : string(nullptr), len(0), cur_pos(0)
834 this->len = strlen(s);
840 assert(this->
string !=
nullptr && pos <= this->len);
842 while (pos > 0 && IsUtf8Part(this->
string[pos])) pos--;
843 return this->cur_pos = pos;
846 virtual size_t Next(IterType what)
848 assert(this->
string !=
nullptr);
851 if (this->cur_pos >= this->len)
return END;
856 this->cur_pos +=
Utf8Decode(&c, this->
string + this->cur_pos);
857 return this->cur_pos;
863 size_t offs =
Utf8Decode(&c, this->
string + this->cur_pos);
865 this->cur_pos += offs;
866 offs =
Utf8Decode(&c, this->
string + this->cur_pos);
870 this->cur_pos += offs;
871 offs =
Utf8Decode(&c, this->
string + this->cur_pos);
874 return this->cur_pos;
884 virtual size_t Prev(IterType what)
886 assert(this->
string !=
nullptr);
889 if (this->cur_pos == 0)
return END;
893 return this->cur_pos =
Utf8PrevChar(this->
string + this->cur_pos) - this->string;
896 const char *s = this->
string + this->cur_pos;
911 return this->cur_pos = s - this->string;
922 #if defined(WITH_COCOA) && !defined(STRGEN) && !defined(SETTINGSGEN)
926 if (i !=
nullptr)
return i;
928 return new DefaultStringIterator();
933 return new DefaultStringIterator();
virtual size_t Prev(IterType what=ITER_CHARACTER)=0
Move the cursor back by one iteration unit.
std::vector< size_t > utf16_to_utf8
Mapping from UTF-16 code point position to index in the UTF-8 source string.
char32_t WChar
Type for wide characters, i.e.
@ SVS_ALLOW_NEWLINE
Allow newlines.
static uint GB(const T x, const uint8 s, const uint8 n)
Fetch n bits from x, started at bit s.
IterType
Type of the iterator.
void str_validate(char *str, const char *last, StringValidationSettings settings)
Scans the string for valid characters and if it finds invalid ones, replaces them with a question mar...
bool strtolower(char *str)
Convert a given ASCII string to lowercase.
static const size_t END
Sentinel to indicate end-of-iteration.
void str_fix_scc_encoded(char *str, const char *last)
Scan the string for old values of SCC_ENCODED and fix it to it's new, static value.
static bool HasBit(const T x, const uint8 y)
Checks if a bit in a value is set.
icu::BreakIterator * word_itr
ICU iterator for words.
std::unique_ptr< icu::Collator > _current_collator
Collator for the language currently in use.
size_t Utf8Encode(T buf, WChar c)
Encode a unicode character and place it in the buffer.
virtual size_t Next(IterType what=ITER_CHARACTER)=0
Advance the cursor by one iteration unit.
@ CS_ALPHA
Only alphabetic values.
String iterator using Uniscribe as a backend.
static WChar Utf16DecodeChar(const uint16 *c)
Decode an UTF-16 character.
virtual void SetString(const char *s)=0
Set a new iteration string.
icu::BreakIterator * char_itr
ICU iterator for characters.
@ ITER_CHARACTER
Iterate over characters (or more exactly grapheme clusters).
static bool IsInsideMM(const T x, const size_t min, const size_t max)
Checks if a value is in an interval.
size_t Next(IterType what) override
Advance the cursor by one iteration unit.
size_t Utf8StringLength(const char *s)
Get the length of an UTF-8 encoded string in number of characters and thus not the number of bytes th...
bool StrValid(const char *str, const char *last)
Checks whether the given string is valid, i.e.
static StringIterator * Create()
Create a new iterator instance.
@ SVS_ALLOW_CONTROL_CODE
Allow the special control codes.
Class for iterating over different kind of parts of a string.
size_t SetCurPosition(size_t pos) override
Change the current string cursor.
virtual size_t SetCurPosition(size_t pos)=0
Change the current string cursor.
#define DEBUG(name, level,...)
Output a line of debugging information.
void str_strip_colours(char *str)
Scans the string for colour codes and strips them.
const LanguageMetadata * _current_language
The currently loaded language.
std::vector< UChar > utf16_str
UTF-16 copy of the string.
bool IsValidChar(WChar key, CharSetFilter afilter)
Only allow certain keys.
static size_t ttd_strnlen(const char *str, size_t maxlen)
Get the length of a string, within a limited buffer.
fluid_settings_t * settings
FluidSynth settings handle.
void ValidateString(const char *str)
Scans the string for valid characters and if it finds invalid ones, replaces them with a question mar...
int CDECL vseprintf(char *str, const char *last, const char *format, va_list ap)
Safer implementation of vsnprintf; same as vsnprintf except:
@ ITER_WORD
Iterate over words.
@ CS_ALPHANUMERAL
Both numeric and alphabetic and spaces and stuff.
StringValidationSettings
Settings for the string validation.
size_t Utf8Decode(WChar *c, const char *s)
Decode and consume the next UTF-8 encoded character.
char *CDECL str_fmt(const char *str,...)
Format, "printf", into a newly allocated string.
void SetString(const char *s) override
Set a new iteration string.
String iterator using ICU as a backend.
int CDECL seprintf(char *str, const char *last, const char *format,...)
Safer implementation of snprintf; same as snprintf except:
char * stredup(const char *s, const char *last)
Create a duplicate of the given string.
void CDECL error(const char *s,...)
Error handling for fatal non-user errors.
size_t Utf8TrimString(char *s, size_t maxlen)
Properly terminate an UTF8 string to some maximum length.
@ CS_NUMERAL_SPACE
Only numbers and spaces.
static const char * SkipGarbage(const char *str)
Skip some of the 'garbage' in the string that we don't want to use to sort on.
static bool IsWhitespace(WChar c)
Check whether UNICODE character is whitespace or not, i.e.
int strnatcmp(const char *s1, const char *s2, bool ignore_garbage_at_front)
Compares two strings using case insensitive natural sort.
char * md5sumToString(char *buf, const char *last, const uint8 md5sum[16])
Convert the md5sum to a hexadecimal string representation.
char * strecpy(char *dst, const char *src, const char *last)
Copies characters from one buffer to another.
@ CS_HEXADECIMAL
Only hexadecimal characters.
char * strecat(char *dst, const char *src, const char *last)
Appends characters from one string to another.
int MacOSStringCompare(const char *s1, const char *s2)
Compares two strings using case insensitive natural sort.
#define lastof(x)
Get the last element of an fixed size array.
@ SVS_REPLACE_WITH_QUESTION_MARK
Replace the unknown/bad bits with question marks.
@ CS_NUMERAL
Only numeric ones.
CharSetFilter
Valid filter types for IsValidChar.
static char * Utf8PrevChar(char *s)
Retrieve the previous UNICODE character in an UTF-8 encoded string.
static int8 Utf8EncodedCharLen(char c)
Return the length of an UTF-8 encoded value based on a single char.
size_t Prev(IterType what) override
Move the cursor back by one iteration unit.