Darkvater@5165: /* $Id$ */ Darkvater@5165: rubidium@9111: /** @file string.cpp Handling of C-type strings (char*). */ belugas@6420: Darkvater@5165: #include "stdafx.h" Darkvater@5165: #include "openttd.h" rubidium@7257: #include "debug.h" rubidium@8130: #include "core/alloc_func.hpp" rubidium@10299: #include "core/math_func.hpp" rubidium@8214: #include "string_func.h" Darkvater@5165: rubidium@8264: #include "table/control_codes.h" rubidium@8264: Darkvater@5165: #include Darkvater@5165: #include // required for tolower() Darkvater@5165: rubidium@10300: /** rubidium@10300: * Safer implementation of vsnprintf; same as vsnprintf except: rubidium@10300: * - last instead of size, i.e. replace sizeof with lastof. rubidium@10300: * - return gives the amount of characters added, not what it would add. rubidium@10300: * @param str buffer to write to up to last rubidium@10300: * @param last last character we may write to rubidium@10300: * @param format the formatting (see snprintf) rubidium@10300: * @param ap the list of arguments for the format rubidium@10300: * @return the number of added characters rubidium@10300: */ rubidium@10300: static int CDECL vseprintf(char *str, const char *last, const char *format, va_list ap) rubidium@10300: { rubidium@10300: if (str >= last) return 0; rubidium@10300: size_t size = last - str; rubidium@10300: return min((int)size, vsnprintf(str, size, format, ap)); rubidium@10300: } rubidium@10300: Darkvater@5165: void ttd_strlcat(char *dst, const char *src, size_t size) Darkvater@5165: { Darkvater@5165: assert(size > 0); skidd13@10303: while (size > 0 && *dst != '\0') { skidd13@10303: size--; skidd13@10303: dst++; skidd13@10303: } skidd13@10303: skidd13@10303: ttd_strlcpy(dst, src, size); Darkvater@5165: } Darkvater@5165: Darkvater@5165: Darkvater@5165: void ttd_strlcpy(char *dst, const char *src, size_t size) Darkvater@5165: { Darkvater@5165: assert(size > 0); skidd13@10303: while (--size > 0 && *src != '\0') { skidd13@10303: *dst++ = *src++; skidd13@10303: } Darkvater@5165: *dst = '\0'; Darkvater@5165: } Darkvater@5165: Darkvater@5165: Darkvater@5165: char* strecat(char* dst, const char* src, const char* last) Darkvater@5165: { Darkvater@5165: assert(dst <= last); skidd13@10303: while (*dst != '\0') { Darkvater@5165: if (dst == last) return dst; skidd13@10303: dst++; skidd13@10303: } skidd13@10303: Darkvater@5165: return strecpy(dst, src, last); Darkvater@5165: } Darkvater@5165: Darkvater@5165: Darkvater@5165: char* strecpy(char* dst, const char* src, const char* last) Darkvater@5165: { Darkvater@5165: assert(dst <= last); skidd13@10303: while (dst != last && *src != '\0') { skidd13@10303: *dst++ = *src++; skidd13@10303: } Darkvater@5165: *dst = '\0'; skidd13@10303: Darkvater@5165: if (dst == last && *src != '\0') { rubidium@7257: #ifdef STRGEN Darkvater@5165: error("String too long for destination buffer"); rubidium@7257: #else /* STRGEN */ rubidium@7257: DEBUG(misc, 0, "String too long for destination buffer"); rubidium@7257: *dst = '\0'; rubidium@7257: #endif /* STRGEN */ Darkvater@5165: } Darkvater@5165: return dst; Darkvater@5165: } Darkvater@5165: Darkvater@5165: rubidium@10299: char *CDECL str_fmt(const char *str, ...) Darkvater@5165: { Darkvater@5165: char buf[4096]; Darkvater@5165: va_list va; Darkvater@5165: Darkvater@5165: va_start(va, str); rubidium@10299: int len = vseprintf(buf, lastof(buf), str, va); Darkvater@5165: va_end(va); rubidium@10299: char *p = MallocT(len + 1); rubidium@10299: memcpy(p, buf, len + 1); Darkvater@5165: return p; Darkvater@5165: } Darkvater@5165: Darkvater@5165: Darkvater@5165: void str_validate(char *str) Darkvater@5165: { Darkvater@5165: char *dst = str; Darkvater@5165: WChar c; Darkvater@5165: size_t len; Darkvater@5165: Darkvater@5165: for (len = Utf8Decode(&c, str); c != '\0'; len = Utf8Decode(&c, str)) { Darkvater@5165: if (IsPrintable(c) && (c < SCC_SPRITE_START || c > SCC_SPRITE_END || Darkvater@5165: IsValidChar(c - SCC_SPRITE_START, CS_ALPHANUMERAL))) { Darkvater@5165: /* Copy the character back. Even if dst is current the same as str Darkvater@5165: * (i.e. no characters have been changed) this is quicker than Darkvater@5165: * moving the pointers ahead by len */ Darkvater@5165: do { Darkvater@5165: *dst++ = *str++; Darkvater@5165: } while (--len != 0); Darkvater@5165: } else { Darkvater@5165: /* Replace the undesirable character with a question mark */ Darkvater@5165: str += len; Darkvater@5165: *dst++ = '?'; Darkvater@5165: } Darkvater@5165: } Darkvater@5165: Darkvater@5165: *dst = '\0'; Darkvater@5165: } Darkvater@5165: Darkvater@5165: Darkvater@5165: void str_strip_colours(char *str) Darkvater@5165: { Darkvater@5165: char *dst = str; Darkvater@5165: WChar c; Darkvater@5165: size_t len; Darkvater@5165: Darkvater@5165: for (len = Utf8Decode(&c, str); c != '\0'; len = Utf8Decode(&c, str)) { Darkvater@5165: if (c < SCC_BLUE || c > SCC_BLACK) { Darkvater@5165: /* Copy the character back. Even if dst is current the same as str Darkvater@5165: * (i.e. no characters have been changed) this is quicker than Darkvater@5165: * moving the pointers ahead by len */ Darkvater@5165: do { Darkvater@5165: *dst++ = *str++; Darkvater@5165: } while (--len != 0); Darkvater@5165: } else { Darkvater@5165: /* Just skip (strip) the colour codes */ Darkvater@5165: str += len; Darkvater@5165: } Darkvater@5165: } Darkvater@5165: *dst = '\0'; Darkvater@5165: } Darkvater@5165: Darkvater@5165: /** Convert a given ASCII string to lowercase. Darkvater@5165: * NOTE: only support ASCII characters, no UTF8 fancy. As currently Darkvater@5165: * the function is only used to lowercase data-filenames if they are Darkvater@5165: * not found, this is sufficient. If more, or general functionality is Darkvater@5165: * needed, look to r7271 where it was removed because it was broken when Darkvater@5165: * using certain locales: eg in Turkish the uppercase 'I' was converted to belugas@6420: * '?', so just revert to the old functionality belugas@6420: * @param str string to convert */ Darkvater@5165: void strtolower(char *str) Darkvater@5165: { Darkvater@5165: for (; *str != '\0'; str++) *str = tolower(*str); Darkvater@5165: } Darkvater@5165: Darkvater@5165: /** Darkvater@5165: * Only allow certain keys. You can define the filter to be used. This makes Darkvater@5165: * sure no invalid keys can get into an editbox, like BELL. Darkvater@5165: * @param key character to be checked Darkvater@5165: * @param afilter the filter to use Darkvater@5165: * @return true or false depending if the character is printable/valid or not Darkvater@5165: */ Darkvater@5165: bool IsValidChar(WChar key, CharSetFilter afilter) Darkvater@5165: { Darkvater@5165: switch (afilter) { Darkvater@5165: case CS_ALPHANUMERAL: return IsPrintable(key); Darkvater@5165: case CS_NUMERAL: return (key >= '0' && key <= '9'); Darkvater@5165: case CS_ALPHA: return IsPrintable(key) && !(key >= '0' && key <= '9'); Darkvater@5165: } Darkvater@5165: Darkvater@5165: return false; Darkvater@5165: } Darkvater@5165: Darkvater@5165: #ifdef WIN32 glx@8181: /* Since version 3.14, MinGW Runtime has snprintf() and vsnprintf() conform to C99 but it's not the case for older versions */ glx@8181: #if (__MINGW32_MAJOR_VERSION < 3) || ((__MINGW32_MAJOR_VERSION == 3) && (__MINGW32_MINOR_VERSION < 14)) Darkvater@5165: int CDECL snprintf(char *str, size_t size, const char *format, ...) Darkvater@5165: { Darkvater@5165: va_list ap; Darkvater@5165: int ret; Darkvater@5165: Darkvater@5165: va_start(ap, format); Darkvater@5165: ret = vsnprintf(str, size, format, ap); Darkvater@5165: va_end(ap); Darkvater@5165: return ret; Darkvater@5165: } glx@8181: #endif /* MinGW Runtime < 3.14 */ Darkvater@5165: Darkvater@5165: #ifdef _MSC_VER Darkvater@5630: /* *nprintf broken, not POSIX compliant, MSDN description Darkvater@5630: * - If len < count, then len characters are stored in buffer, a null-terminator is appended, and len is returned. Darkvater@5630: * - If len = count, then len characters are stored in buffer, no null-terminator is appended, and len is returned. Darkvater@5630: * - If len > count, then count characters are stored in buffer, no null-terminator is appended, and a negative value is returned Darkvater@5630: */ Darkvater@5165: int CDECL vsnprintf(char *str, size_t size, const char *format, va_list ap) Darkvater@5165: { Darkvater@5165: int ret; Darkvater@5165: ret = _vsnprintf(str, size, format, ap); Darkvater@5631: if (ret < 0 || ret == size) str[size - 1] = '\0'; Darkvater@5165: return ret; Darkvater@5165: } Darkvater@5165: #endif /* _MSC_VER */ Darkvater@5165: Darkvater@5165: #endif /* WIN32 */ Darkvater@5165: rubidium@10299: /** rubidium@10299: * Safer implementation of snprintf; same as snprintf except: rubidium@10299: * - last instead of size, i.e. replace sizeof with lastof. rubidium@10299: * - return gives the amount of characters added, not what it would add. rubidium@10299: * @param str buffer to write to up to last rubidium@10299: * @param last last character we may write to rubidium@10299: * @param format the formatting (see snprintf) rubidium@10299: * @return the number of added characters rubidium@10299: */ rubidium@10299: int CDECL seprintf(char *str, const char *last, const char *format, ...) rubidium@10299: { rubidium@10299: va_list ap; rubidium@10299: rubidium@10299: va_start(ap, format); rubidium@10299: int ret = vseprintf(str, last, format, ap); rubidium@10299: va_end(ap); rubidium@10299: return ret; rubidium@10299: } rubidium@10299: Darkvater@5165: Darkvater@5634: /** Convert the md5sum to a hexadecimal string representation Darkvater@5634: * @param buf buffer to put the md5sum into Darkvater@5634: * @param last last character of buffer (usually lastof(buf)) Darkvater@5634: * @param md5sum the md5sum itself Darkvater@5634: * @return a pointer to the next character after the md5sum */ Darkvater@5634: char *md5sumToString(char *buf, const char *last, const uint8 md5sum[16]) Darkvater@5634: { Darkvater@5634: char *p = buf; Darkvater@5634: Darkvater@5634: for (uint i = 0; i < 16; i++) { rubidium@10299: p += seprintf(p, last, "%02X", md5sum[i]); Darkvater@5634: } Darkvater@5634: Darkvater@5634: return p; Darkvater@5634: } Darkvater@5634: Darkvater@5634: Darkvater@5165: /* UTF-8 handling routines */ Darkvater@5165: Darkvater@5165: Darkvater@5165: /* Decode and consume the next UTF-8 encoded character Darkvater@5165: * @param c Buffer to place decoded character. Darkvater@5165: * @param s Character stream to retrieve character from. Darkvater@5165: * @return Number of characters in the sequence. Darkvater@5165: */ Darkvater@5165: size_t Utf8Decode(WChar *c, const char *s) Darkvater@5165: { Darkvater@5165: assert(c != NULL); Darkvater@5165: skidd13@7928: if (!HasBit(s[0], 7)) { Darkvater@5165: /* Single byte character: 0xxxxxxx */ Darkvater@5165: *c = s[0]; Darkvater@5165: return 1; Darkvater@5165: } else if (GB(s[0], 5, 3) == 6) { Darkvater@5165: if (IsUtf8Part(s[1])) { Darkvater@5165: /* Double byte character: 110xxxxx 10xxxxxx */ Darkvater@5165: *c = GB(s[0], 0, 5) << 6 | GB(s[1], 0, 6); Darkvater@5165: if (*c >= 0x80) return 2; Darkvater@5165: } Darkvater@5165: } else if (GB(s[0], 4, 4) == 14) { Darkvater@5165: if (IsUtf8Part(s[1]) && IsUtf8Part(s[2])) { Darkvater@5165: /* Triple byte character: 1110xxxx 10xxxxxx 10xxxxxx */ Darkvater@5165: *c = GB(s[0], 0, 4) << 12 | GB(s[1], 0, 6) << 6 | GB(s[2], 0, 6); Darkvater@5165: if (*c >= 0x800) return 3; Darkvater@5165: } Darkvater@5165: } else if (GB(s[0], 3, 5) == 30) { Darkvater@5165: if (IsUtf8Part(s[1]) && IsUtf8Part(s[2]) && IsUtf8Part(s[3])) { Darkvater@5165: /* 4 byte character: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ Darkvater@5165: *c = GB(s[0], 0, 3) << 18 | GB(s[1], 0, 6) << 12 | GB(s[2], 0, 6) << 6 | GB(s[3], 0, 6); Darkvater@5165: if (*c >= 0x10000 && *c <= 0x10FFFF) return 4; Darkvater@5165: } Darkvater@5165: } Darkvater@5165: Darkvater@5380: //DEBUG(misc, 1, "[utf8] invalid UTF-8 sequence"); Darkvater@5165: *c = '?'; Darkvater@5165: return 1; Darkvater@5165: } Darkvater@5165: Darkvater@5165: Darkvater@5165: /* Encode a unicode character and place it in the buffer Darkvater@5165: * @param buf Buffer to place character. Darkvater@5165: * @param c Unicode character to encode. Darkvater@5165: * @return Number of characters in the encoded sequence. Darkvater@5165: */ Darkvater@5165: size_t Utf8Encode(char *buf, WChar c) Darkvater@5165: { Darkvater@5165: if (c < 0x80) { Darkvater@5165: *buf = c; Darkvater@5165: return 1; Darkvater@5165: } else if (c < 0x800) { Darkvater@5165: *buf++ = 0xC0 + GB(c, 6, 5); Darkvater@5165: *buf = 0x80 + GB(c, 0, 6); Darkvater@5165: return 2; Darkvater@5165: } else if (c < 0x10000) { Darkvater@5165: *buf++ = 0xE0 + GB(c, 12, 4); Darkvater@5165: *buf++ = 0x80 + GB(c, 6, 6); Darkvater@5165: *buf = 0x80 + GB(c, 0, 6); Darkvater@5165: return 3; Darkvater@5165: } else if (c < 0x110000) { Darkvater@5165: *buf++ = 0xF0 + GB(c, 18, 3); Darkvater@5165: *buf++ = 0x80 + GB(c, 12, 6); Darkvater@5165: *buf++ = 0x80 + GB(c, 6, 6); Darkvater@5165: *buf = 0x80 + GB(c, 0, 6); Darkvater@5165: return 4; Darkvater@5165: } Darkvater@5165: Darkvater@5380: //DEBUG(misc, 1, "[utf8] can't UTF-8 encode value 0x%X", c); Darkvater@5165: *buf = '?'; Darkvater@5165: return 1; Darkvater@5165: } Darkvater@6215: Darkvater@6215: /** Darkvater@6215: * Properly terminate an UTF8 string to some maximum length Darkvater@6215: * @param s string to check if it needs additional trimming Darkvater@6215: * @param maxlen the maximum length the buffer can have. Darkvater@6215: * @return the new length in bytes of the string (eg. strlen(new_string)) Darkvater@6215: * @NOTE maxlen is the string length _INCLUDING_ the terminating '\0' Darkvater@6215: */ Darkvater@6215: size_t Utf8TrimString(char *s, size_t maxlen) Darkvater@6215: { Darkvater@6215: size_t length = 0; Darkvater@6215: Darkvater@6215: for (const char *ptr = strchr(s, '\0'); *s != '\0';) { Darkvater@6215: size_t len = Utf8EncodedCharLen(*s); Darkvater@6274: /* Silently ignore invalid UTF8 sequences, our only concern trimming */ Darkvater@6274: if (len == 0) len = 1; Darkvater@6215: Darkvater@6215: /* Take care when a hard cutoff was made for the string and Darkvater@6215: * the last UTF8 sequence is invalid */ Darkvater@6215: if (length + len >= maxlen || (s + len > ptr)) break; Darkvater@6215: s += len; Darkvater@6215: length += len; Darkvater@6215: } Darkvater@6215: Darkvater@6215: *s = '\0'; Darkvater@6215: return length; tron@6218: } glx@9997: glx@9997: #ifndef _GNU_SOURCE glx@9997: #include "core/math_func.hpp" glx@9997: char *strndup(const char *s, size_t len) glx@9997: { glx@9997: len = min(strlen(s), len); glx@9997: char *tmp = CallocT(len + 1); glx@9997: memcpy(tmp, s, len); glx@9997: return tmp; glx@9997: } glx@9997: #endif /* !_GNU_SOURCE */