src/string_func.h
author rubidium
Fri, 04 Jul 2008 19:00:11 +0000
changeset 11118 f66e0a4ce878
parent 10751 ebd94f2d6385
child 11178 aa617a8b4f34
permissions -rw-r--r--
(svn r13676) -Fix [FS#2126]: inactive companies from old (TTD) saves could be marked active in some cases, which then loads garbage in their statistics and such.
2186
461a2aff3486 (svn r2701) Insert Id tags into all source files
tron
parents: 1317
diff changeset
     1
/* $Id$ */
461a2aff3486 (svn r2701) Insert Id tags into all source files
tron
parents: 1317
diff changeset
     2
8844
b4f9ff470b85 (svn r11914) -Documentation: fix some @file statement
glx
parents: 8710
diff changeset
     3
/** @file string_func.h Functions related to low-level strings. */
6916
e87d54a598ea (svn r9556) -Documentation: doxygen and comment-style changes. 'R', 'S'.. The end of the preliminary work is near
belugas
parents: 6574
diff changeset
     4
8710
52015340050c (svn r11777) -Codechange: split the string header and make do not include it when it's not necessary.
rubidium
parents: 8609
diff changeset
     5
#ifndef STRING_FUNC_H
52015340050c (svn r11777) -Codechange: split the string header and make do not include it when it's not necessary.
rubidium
parents: 8609
diff changeset
     6
#define STRING_FUNC_H
1317
f382f1b439c7 (svn r1821) Move generic string handling functions to string.[ch] and introduce stre{cpy,cat}, see string.h for their semantics
tron
parents:
diff changeset
     7
8609
8c0c3e9dd6a0 (svn r11674) -Codechange: refactor some functions out of macros.h into more logical locations.
rubidium
parents: 8006
diff changeset
     8
#include "core/bitmath_func.hpp"
8710
52015340050c (svn r11777) -Codechange: split the string header and make do not include it when it's not necessary.
rubidium
parents: 8609
diff changeset
     9
#include "string_type.h"
5108
dc67d70b5a45 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
    10
6916
e87d54a598ea (svn r9556) -Documentation: doxygen and comment-style changes. 'R', 'S'.. The end of the preliminary work is near
belugas
parents: 6574
diff changeset
    11
/**
e87d54a598ea (svn r9556) -Documentation: doxygen and comment-style changes. 'R', 'S'.. The end of the preliminary work is near
belugas
parents: 6574
diff changeset
    12
 * usage ttd_strlcpy(dst, src, lengthof(dst));
e87d54a598ea (svn r9556) -Documentation: doxygen and comment-style changes. 'R', 'S'.. The end of the preliminary work is near
belugas
parents: 6574
diff changeset
    13
 * @param dst destination buffer
e87d54a598ea (svn r9556) -Documentation: doxygen and comment-style changes. 'R', 'S'.. The end of the preliminary work is near
belugas
parents: 6574
diff changeset
    14
 * @param src string to copy/concatenate
e87d54a598ea (svn r9556) -Documentation: doxygen and comment-style changes. 'R', 'S'.. The end of the preliminary work is near
belugas
parents: 6574
diff changeset
    15
 * @param size size of the destination buffer
1317
f382f1b439c7 (svn r1821) Move generic string handling functions to string.[ch] and introduce stre{cpy,cat}, see string.h for their semantics
tron
parents:
diff changeset
    16
 */
f382f1b439c7 (svn r1821) Move generic string handling functions to string.[ch] and introduce stre{cpy,cat}, see string.h for their semantics
tron
parents:
diff changeset
    17
void ttd_strlcat(char *dst, const char *src, size_t size);
f382f1b439c7 (svn r1821) Move generic string handling functions to string.[ch] and introduce stre{cpy,cat}, see string.h for their semantics
tron
parents:
diff changeset
    18
void ttd_strlcpy(char *dst, const char *src, size_t size);
f382f1b439c7 (svn r1821) Move generic string handling functions to string.[ch] and introduce stre{cpy,cat}, see string.h for their semantics
tron
parents:
diff changeset
    19
6916
e87d54a598ea (svn r9556) -Documentation: doxygen and comment-style changes. 'R', 'S'.. The end of the preliminary work is near
belugas
parents: 6574
diff changeset
    20
/**
1317
f382f1b439c7 (svn r1821) Move generic string handling functions to string.[ch] and introduce stre{cpy,cat}, see string.h for their semantics
tron
parents:
diff changeset
    21
 * usage: strecpy(dst, src, lastof(dst));
6916
e87d54a598ea (svn r9556) -Documentation: doxygen and comment-style changes. 'R', 'S'.. The end of the preliminary work is near
belugas
parents: 6574
diff changeset
    22
 * @param dst destination buffer
e87d54a598ea (svn r9556) -Documentation: doxygen and comment-style changes. 'R', 'S'.. The end of the preliminary work is near
belugas
parents: 6574
diff changeset
    23
 * @param src string to copy
e87d54a598ea (svn r9556) -Documentation: doxygen and comment-style changes. 'R', 'S'.. The end of the preliminary work is near
belugas
parents: 6574
diff changeset
    24
 * @param last pointer to the last element in the dst array
e87d54a598ea (svn r9556) -Documentation: doxygen and comment-style changes. 'R', 'S'.. The end of the preliminary work is near
belugas
parents: 6574
diff changeset
    25
 *             if NULL no boundary check is performed
e87d54a598ea (svn r9556) -Documentation: doxygen and comment-style changes. 'R', 'S'.. The end of the preliminary work is near
belugas
parents: 6574
diff changeset
    26
 * @return a pointer to the terminating \0 in the destination buffer
1317
f382f1b439c7 (svn r1821) Move generic string handling functions to string.[ch] and introduce stre{cpy,cat}, see string.h for their semantics
tron
parents:
diff changeset
    27
 */
7814
fe643468ad64 (svn r10673) -Cleanup: some assorted style cleanups. Primarily type* var -> type *var.
rubidium
parents: 6916
diff changeset
    28
char *strecat(char *dst, const char *src, const char *last);
fe643468ad64 (svn r10673) -Cleanup: some assorted style cleanups. Primarily type* var -> type *var.
rubidium
parents: 6916
diff changeset
    29
char *strecpy(char *dst, const char *src, const char *last);
1317
f382f1b439c7 (svn r1821) Move generic string handling functions to string.[ch] and introduce stre{cpy,cat}, see string.h for their semantics
tron
parents:
diff changeset
    30
7814
fe643468ad64 (svn r10673) -Cleanup: some assorted style cleanups. Primarily type* var -> type *var.
rubidium
parents: 6916
diff changeset
    31
char *CDECL str_fmt(const char *str, ...);
2234
d44294cfea36 (svn r2754) Move str_fmt into string.[ch]
tron
parents: 2186
diff changeset
    32
2775
d3ed38a97250 (svn r3322) - Fix: Network window crash when it receives invalid information for example from the integrated nightly, so validate the network-input when it is received
Darkvater
parents: 2436
diff changeset
    33
/** Scans the string for valid characters and if it finds invalid ones,
d3ed38a97250 (svn r3322) - Fix: Network window crash when it receives invalid information for example from the integrated nightly, so validate the network-input when it is received
Darkvater
parents: 2436
diff changeset
    34
 * replaces them with a question mark '?' */
d3ed38a97250 (svn r3322) - Fix: Network window crash when it receives invalid information for example from the integrated nightly, so validate the network-input when it is received
Darkvater
parents: 2436
diff changeset
    35
void str_validate(char *str);
d3ed38a97250 (svn r3322) - Fix: Network window crash when it receives invalid information for example from the integrated nightly, so validate the network-input when it is received
Darkvater
parents: 2436
diff changeset
    36
5101
797a070e5b22 (svn r7172) -Fix [r6931]: The console showed '?' characters instead of colours. Now strip all
Darkvater
parents: 4300
diff changeset
    37
/** Scans the string for colour codes and strips them */
797a070e5b22 (svn r7172) -Fix [r6931]: The console showed '?' characters instead of colours. Now strip all
Darkvater
parents: 4300
diff changeset
    38
void str_strip_colours(char *str);
797a070e5b22 (svn r7172) -Fix [r6931]: The console showed '?' characters instead of colours. Now strip all
Darkvater
parents: 4300
diff changeset
    39
5164
a4fb0ede4ce5 (svn r7271) -Codechange: Revert the strtolower part of r7199 as it can actually become broken due to
Darkvater
parents: 5108
diff changeset
    40
/** Convert the given string to lowercase, only works with ASCII! */
5108
dc67d70b5a45 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
    41
void strtolower(char *str);
dc67d70b5a45 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
    42
5317
e235a3a573e3 (svn r7475) -Fix (r7348): sanity check NewGRF action 8 strings for null terminator
peter1138
parents: 5164
diff changeset
    43
8006
947dbf950dc7 (svn r11025) -Fix: safeguard StrEmpty from calls with NULL.
rubidium
parents: 7814
diff changeset
    44
static inline bool StrEmpty(const char *s) { return s == NULL || s[0] == '\0'; }
5889
272800d151ac (svn r8097) Replace strlen() {==,!=,>} 0 by the more concise {,!}StrEmpty(). Additionally the test takes O(1) instead of O(n) now
tron
parents: 5885
diff changeset
    45
272800d151ac (svn r8097) Replace strlen() {==,!=,>} 0 by the more concise {,!}StrEmpty(). Additionally the test takes O(1) instead of O(n) now
tron
parents: 5885
diff changeset
    46
5317
e235a3a573e3 (svn r7475) -Fix (r7348): sanity check NewGRF action 8 strings for null terminator
peter1138
parents: 5164
diff changeset
    47
/** Get the length of a string, within a limited buffer */
10465
0c68afe3d725 (svn r13008) -Fix [FS#1997]: silence some MSVC x64 warnings
glx
parents: 9207
diff changeset
    48
static inline size_t ttd_strnlen(const char *str, size_t maxlen)
5317
e235a3a573e3 (svn r7475) -Fix (r7348): sanity check NewGRF action 8 strings for null terminator
peter1138
parents: 5164
diff changeset
    49
{
e235a3a573e3 (svn r7475) -Fix (r7348): sanity check NewGRF action 8 strings for null terminator
peter1138
parents: 5164
diff changeset
    50
	const char *t;
10465
0c68afe3d725 (svn r13008) -Fix [FS#1997]: silence some MSVC x64 warnings
glx
parents: 9207
diff changeset
    51
	for (t = str; *t != '\0' && (size_t)(t - str) < maxlen; t++) {}
5317
e235a3a573e3 (svn r7475) -Fix (r7348): sanity check NewGRF action 8 strings for null terminator
peter1138
parents: 5164
diff changeset
    52
	return t - str;
e235a3a573e3 (svn r7475) -Fix (r7348): sanity check NewGRF action 8 strings for null terminator
peter1138
parents: 5164
diff changeset
    53
}
e235a3a573e3 (svn r7475) -Fix (r7348): sanity check NewGRF action 8 strings for null terminator
peter1138
parents: 5164
diff changeset
    54
5885
262234e81333 (svn r8093) -Codechange: Add a function to get a string representation of an MD5SUM and use it.
Darkvater
parents: 5726
diff changeset
    55
/** Convert the md5sum number to a 'hexadecimal' string, return next pos in buffer */
262234e81333 (svn r8093) -Codechange: Add a function to get a string representation of an MD5SUM and use it.
Darkvater
parents: 5726
diff changeset
    56
char *md5sumToString(char *buf, const char *last, const uint8 md5sum[16]);
5317
e235a3a573e3 (svn r7475) -Fix (r7348): sanity check NewGRF action 8 strings for null terminator
peter1138
parents: 5164
diff changeset
    57
4300
687a17c9c557 (svn r5946) -Add: merged the TGP branch to mainline. TGP adds:
truelight
parents: 4299
diff changeset
    58
/**
687a17c9c557 (svn r5946) -Add: merged the TGP branch to mainline. TGP adds:
truelight
parents: 4299
diff changeset
    59
 * Only allow certain keys. You can define the filter to be used. This makes
687a17c9c557 (svn r5946) -Add: merged the TGP branch to mainline. TGP adds:
truelight
parents: 4299
diff changeset
    60
 *  sure no invalid keys can get into an editbox, like BELL.
687a17c9c557 (svn r5946) -Add: merged the TGP branch to mainline. TGP adds:
truelight
parents: 4299
diff changeset
    61
 * @param key character to be checked
687a17c9c557 (svn r5946) -Add: merged the TGP branch to mainline. TGP adds:
truelight
parents: 4299
diff changeset
    62
 * @param afilter the filter to use
687a17c9c557 (svn r5946) -Add: merged the TGP branch to mainline. TGP adds:
truelight
parents: 4299
diff changeset
    63
 * @return true or false depending if the character is printable/valid or not
687a17c9c557 (svn r5946) -Add: merged the TGP branch to mainline. TGP adds:
truelight
parents: 4299
diff changeset
    64
 */
5108
dc67d70b5a45 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
    65
bool IsValidChar(WChar key, CharSetFilter afilter);
4300
687a17c9c557 (svn r5946) -Add: merged the TGP branch to mainline. TGP adds:
truelight
parents: 4299
diff changeset
    66
5108
dc67d70b5a45 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
    67
size_t Utf8Decode(WChar *c, const char *s);
dc67d70b5a45 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
    68
size_t Utf8Encode(char *buf, WChar c);
6541
ff575414f0d2 (svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'.
Darkvater
parents: 6540
diff changeset
    69
size_t Utf8TrimString(char *s, size_t maxlen);
5108
dc67d70b5a45 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
    70
dc67d70b5a45 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
    71
dc67d70b5a45 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
    72
static inline WChar Utf8Consume(const char **s)
dc67d70b5a45 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
    73
{
dc67d70b5a45 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
    74
	WChar c;
dc67d70b5a45 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
    75
	*s += Utf8Decode(&c, *s);
dc67d70b5a45 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
    76
	return c;
dc67d70b5a45 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
    77
}
dc67d70b5a45 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
    78
dc67d70b5a45 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
    79
dc67d70b5a45 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
    80
/** Return the length of a UTF-8 encoded character.
dc67d70b5a45 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
    81
 * @param c Unicode character.
dc67d70b5a45 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
    82
 * @return Length of UTF-8 encoding for character.
dc67d70b5a45 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
    83
 */
10751
ebd94f2d6385 (svn r13301) -Fix [FS#1997]: resolve more MSVC 9 x64 warnings.
rubidium
parents: 10465
diff changeset
    84
static inline int8 Utf8CharLen(WChar c)
5108
dc67d70b5a45 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
    85
{
dc67d70b5a45 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
    86
	if (c < 0x80)       return 1;
dc67d70b5a45 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
    87
	if (c < 0x800)      return 2;
dc67d70b5a45 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
    88
	if (c < 0x10000)    return 3;
dc67d70b5a45 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
    89
	if (c < 0x110000)   return 4;
dc67d70b5a45 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
    90
dc67d70b5a45 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
    91
	/* Invalid valid, we encode as a '?' */
dc67d70b5a45 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
    92
	return 1;
dc67d70b5a45 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
    93
}
dc67d70b5a45 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
    94
dc67d70b5a45 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
    95
6541
ff575414f0d2 (svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'.
Darkvater
parents: 6540
diff changeset
    96
/**
ff575414f0d2 (svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'.
Darkvater
parents: 6540
diff changeset
    97
 * Return the length of an UTF-8 encoded value based on a single char. This
ff575414f0d2 (svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'.
Darkvater
parents: 6540
diff changeset
    98
 * char should be the first byte of the UTF-8 encoding. If not, or encoding
ff575414f0d2 (svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'.
Darkvater
parents: 6540
diff changeset
    99
 * is invalid, return value is 0
6916
e87d54a598ea (svn r9556) -Documentation: doxygen and comment-style changes. 'R', 'S'.. The end of the preliminary work is near
belugas
parents: 6574
diff changeset
   100
 * @param c char to query length of
e87d54a598ea (svn r9556) -Documentation: doxygen and comment-style changes. 'R', 'S'.. The end of the preliminary work is near
belugas
parents: 6574
diff changeset
   101
 * @return requested size
6541
ff575414f0d2 (svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'.
Darkvater
parents: 6540
diff changeset
   102
 */
10751
ebd94f2d6385 (svn r13301) -Fix [FS#1997]: resolve more MSVC 9 x64 warnings.
rubidium
parents: 10465
diff changeset
   103
static inline int8 Utf8EncodedCharLen(char c)
6541
ff575414f0d2 (svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'.
Darkvater
parents: 6540
diff changeset
   104
{
ff575414f0d2 (svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'.
Darkvater
parents: 6540
diff changeset
   105
	if (GB(c, 3, 5) == 0x1E) return 4;
ff575414f0d2 (svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'.
Darkvater
parents: 6540
diff changeset
   106
	if (GB(c, 4, 4) == 0x0E) return 3;
ff575414f0d2 (svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'.
Darkvater
parents: 6540
diff changeset
   107
	if (GB(c, 5, 3) == 0x06) return 2;
ff575414f0d2 (svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'.
Darkvater
parents: 6540
diff changeset
   108
	if (GB(c, 7, 1) == 0x00) return 1;
ff575414f0d2 (svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'.
Darkvater
parents: 6540
diff changeset
   109
ff575414f0d2 (svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'.
Darkvater
parents: 6540
diff changeset
   110
	/* Invalid UTF8 start encoding */
ff575414f0d2 (svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'.
Darkvater
parents: 6540
diff changeset
   111
	return 0;
ff575414f0d2 (svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'.
Darkvater
parents: 6540
diff changeset
   112
}
ff575414f0d2 (svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'.
Darkvater
parents: 6540
diff changeset
   113
ff575414f0d2 (svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'.
Darkvater
parents: 6540
diff changeset
   114
5108
dc67d70b5a45 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
   115
/* Check if the given character is part of a UTF8 sequence */
dc67d70b5a45 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
   116
static inline bool IsUtf8Part(char c)
dc67d70b5a45 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
   117
{
dc67d70b5a45 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
   118
	return GB(c, 6, 2) == 2;
dc67d70b5a45 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
   119
}
dc67d70b5a45 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
   120
6537
dbf4fa55395f (svn r9003) -Codechange: Introduce a function Utf8PrevCharLen that finds the starting character of an UTF-8 sequence from a given position and returns the length to the first UTF-8 encoding byte of that sequence.
Darkvater
parents: 5889
diff changeset
   121
/**
6540
4db000b2cc41 (svn r9011) -Codechange (r9003): Rework Utf8PrevChar so that it returns a pointer to the previous UTF8 character's first byte instead of a byte-length offset
Darkvater
parents: 6537
diff changeset
   122
 * Retrieve the previous UNICODE character in an UTF-8 encoded string.
4db000b2cc41 (svn r9011) -Codechange (r9003): Rework Utf8PrevChar so that it returns a pointer to the previous UTF8 character's first byte instead of a byte-length offset
Darkvater
parents: 6537
diff changeset
   123
 * @param s char pointer pointing to (the first char of) the next character
6916
e87d54a598ea (svn r9556) -Documentation: doxygen and comment-style changes. 'R', 'S'.. The end of the preliminary work is near
belugas
parents: 6574
diff changeset
   124
 * @return a pointer in 's' to the previous UNICODE character's first byte
6540
4db000b2cc41 (svn r9011) -Codechange (r9003): Rework Utf8PrevChar so that it returns a pointer to the previous UTF8 character's first byte instead of a byte-length offset
Darkvater
parents: 6537
diff changeset
   125
 * @note The function should not be used to determine the length of the previous
4db000b2cc41 (svn r9011) -Codechange (r9003): Rework Utf8PrevChar so that it returns a pointer to the previous UTF8 character's first byte instead of a byte-length offset
Darkvater
parents: 6537
diff changeset
   126
 * encoded char because it might be an invalid/corrupt start-sequence
6537
dbf4fa55395f (svn r9003) -Codechange: Introduce a function Utf8PrevCharLen that finds the starting character of an UTF-8 sequence from a given position and returns the length to the first UTF-8 encoding byte of that sequence.
Darkvater
parents: 5889
diff changeset
   127
 */
6540
4db000b2cc41 (svn r9011) -Codechange (r9003): Rework Utf8PrevChar so that it returns a pointer to the previous UTF8 character's first byte instead of a byte-length offset
Darkvater
parents: 6537
diff changeset
   128
static inline char *Utf8PrevChar(const char *s)
6537
dbf4fa55395f (svn r9003) -Codechange: Introduce a function Utf8PrevCharLen that finds the starting character of an UTF-8 sequence from a given position and returns the length to the first UTF-8 encoding byte of that sequence.
Darkvater
parents: 5889
diff changeset
   129
{
6540
4db000b2cc41 (svn r9011) -Codechange (r9003): Rework Utf8PrevChar so that it returns a pointer to the previous UTF8 character's first byte instead of a byte-length offset
Darkvater
parents: 6537
diff changeset
   130
	const char *ret = s;
9191
ae14770c829a (svn r12368) -Codechange: use explicit body for loops and conditions and remove -Wno-empty-body from the configure script
smatz
parents: 8844
diff changeset
   131
	while (IsUtf8Part(*--ret)) {}
6540
4db000b2cc41 (svn r9011) -Codechange (r9003): Rework Utf8PrevChar so that it returns a pointer to the previous UTF8 character's first byte instead of a byte-length offset
Darkvater
parents: 6537
diff changeset
   132
	return (char*)ret;
6537
dbf4fa55395f (svn r9003) -Codechange: Introduce a function Utf8PrevCharLen that finds the starting character of an UTF-8 sequence from a given position and returns the length to the first UTF-8 encoding byte of that sequence.
Darkvater
parents: 5889
diff changeset
   133
}
dbf4fa55395f (svn r9003) -Codechange: Introduce a function Utf8PrevCharLen that finds the starting character of an UTF-8 sequence from a given position and returns the length to the first UTF-8 encoding byte of that sequence.
Darkvater
parents: 5889
diff changeset
   134
5108
dc67d70b5a45 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
   135
dc67d70b5a45 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
   136
static inline bool IsPrintable(WChar c)
dc67d70b5a45 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
   137
{
dc67d70b5a45 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
   138
	if (c < 0x20)   return false;
dc67d70b5a45 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
   139
	if (c < 0xE000) return true;
dc67d70b5a45 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
   140
	if (c < 0xE200) return false;
dc67d70b5a45 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
   141
	return true;
dc67d70b5a45 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
   142
}
dc67d70b5a45 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
   143
6541
ff575414f0d2 (svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'.
Darkvater
parents: 6540
diff changeset
   144
/**
9207
2b390a312384 (svn r12385) -Fix [FS#ln]: Non-breaking spaces should not be broken.
peter1138
parents: 9191
diff changeset
   145
 * Check whether UNICODE character is whitespace or not, i.e. whether
2b390a312384 (svn r12385) -Fix [FS#ln]: Non-breaking spaces should not be broken.
peter1138
parents: 9191
diff changeset
   146
 * this is a potential line-break character.
6541
ff575414f0d2 (svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'.
Darkvater
parents: 6540
diff changeset
   147
 * @param c UNICODE character to check
ff575414f0d2 (svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'.
Darkvater
parents: 6540
diff changeset
   148
 * @return a boolean value whether 'c' is a whitespace character or not
ff575414f0d2 (svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'.
Darkvater
parents: 6540
diff changeset
   149
 * @see http://www.fileformat.info/info/unicode/category/Zs/list.htm
ff575414f0d2 (svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'.
Darkvater
parents: 6540
diff changeset
   150
 */
ff575414f0d2 (svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'.
Darkvater
parents: 6540
diff changeset
   151
static inline bool IsWhitespace(WChar c)
ff575414f0d2 (svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'.
Darkvater
parents: 6540
diff changeset
   152
{
ff575414f0d2 (svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'.
Darkvater
parents: 6540
diff changeset
   153
	return
ff575414f0d2 (svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'.
Darkvater
parents: 6540
diff changeset
   154
	  c == 0x0020 /* SPACE */ ||
ff575414f0d2 (svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'.
Darkvater
parents: 6540
diff changeset
   155
	  c == 0x3000 /* IDEOGRAPHIC SPACE */
ff575414f0d2 (svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'.
Darkvater
parents: 6540
diff changeset
   156
	;
ff575414f0d2 (svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'.
Darkvater
parents: 6540
diff changeset
   157
}
ff575414f0d2 (svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'.
Darkvater
parents: 6540
diff changeset
   158
8710
52015340050c (svn r11777) -Codechange: split the string header and make do not include it when it's not necessary.
rubidium
parents: 8609
diff changeset
   159
#endif /* STRING_FUNC_H */