src/string.h
author belugas
Wed, 04 Apr 2007 01:35:16 +0000
changeset 6420 456c275f3313
parent 6248 e4a2ed7e5613
child 7318 632cd0497770
permissions -rw-r--r--
(svn r9556) -Documentation: doxygen and comment-style changes. 'R', 'S'.. The end of the preliminary work is near
2186
db48cf29b983 (svn r2701) Insert Id tags into all source files
tron
parents: 1317
diff changeset
     1
/* $Id$ */
db48cf29b983 (svn r2701) Insert Id tags into all source files
tron
parents: 1317
diff changeset
     2
6420
456c275f3313 (svn r9556) -Documentation: doxygen and comment-style changes. 'R', 'S'.. The end of the preliminary work is near
belugas
parents: 6248
diff changeset
     3
/** @file string.h */
456c275f3313 (svn r9556) -Documentation: doxygen and comment-style changes. 'R', 'S'.. The end of the preliminary work is near
belugas
parents: 6248
diff changeset
     4
1317
3c90086ff34f (svn r1821) Move generic string handling functions to string.[ch] and introduce stre{cpy,cat}, see string.h for their semantics
tron
parents:
diff changeset
     5
#ifndef STRING_H
3c90086ff34f (svn r1821) Move generic string handling functions to string.[ch] and introduce stre{cpy,cat}, see string.h for their semantics
tron
parents:
diff changeset
     6
#define STRING_H
3c90086ff34f (svn r1821) Move generic string handling functions to string.[ch] and introduce stre{cpy,cat}, see string.h for their semantics
tron
parents:
diff changeset
     7
5108
aeaef6fe53b7 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
     8
#include "macros.h"
aeaef6fe53b7 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
     9
6420
456c275f3313 (svn r9556) -Documentation: doxygen and comment-style changes. 'R', 'S'.. The end of the preliminary work is near
belugas
parents: 6248
diff changeset
    10
/**
456c275f3313 (svn r9556) -Documentation: doxygen and comment-style changes. 'R', 'S'.. The end of the preliminary work is near
belugas
parents: 6248
diff changeset
    11
 * usage ttd_strlcpy(dst, src, lengthof(dst));
456c275f3313 (svn r9556) -Documentation: doxygen and comment-style changes. 'R', 'S'.. The end of the preliminary work is near
belugas
parents: 6248
diff changeset
    12
 * @param dst destination buffer
456c275f3313 (svn r9556) -Documentation: doxygen and comment-style changes. 'R', 'S'.. The end of the preliminary work is near
belugas
parents: 6248
diff changeset
    13
 * @param src string to copy/concatenate
456c275f3313 (svn r9556) -Documentation: doxygen and comment-style changes. 'R', 'S'.. The end of the preliminary work is near
belugas
parents: 6248
diff changeset
    14
 * @param size size of the destination buffer
1317
3c90086ff34f (svn r1821) Move generic string handling functions to string.[ch] and introduce stre{cpy,cat}, see string.h for their semantics
tron
parents:
diff changeset
    15
 */
3c90086ff34f (svn r1821) Move generic string handling functions to string.[ch] and introduce stre{cpy,cat}, see string.h for their semantics
tron
parents:
diff changeset
    16
void ttd_strlcat(char *dst, const char *src, size_t size);
3c90086ff34f (svn r1821) Move generic string handling functions to string.[ch] and introduce stre{cpy,cat}, see string.h for their semantics
tron
parents:
diff changeset
    17
void ttd_strlcpy(char *dst, const char *src, size_t size);
3c90086ff34f (svn r1821) Move generic string handling functions to string.[ch] and introduce stre{cpy,cat}, see string.h for their semantics
tron
parents:
diff changeset
    18
6420
456c275f3313 (svn r9556) -Documentation: doxygen and comment-style changes. 'R', 'S'.. The end of the preliminary work is near
belugas
parents: 6248
diff changeset
    19
/**
1317
3c90086ff34f (svn r1821) Move generic string handling functions to string.[ch] and introduce stre{cpy,cat}, see string.h for their semantics
tron
parents:
diff changeset
    20
 * usage: strecpy(dst, src, lastof(dst));
6420
456c275f3313 (svn r9556) -Documentation: doxygen and comment-style changes. 'R', 'S'.. The end of the preliminary work is near
belugas
parents: 6248
diff changeset
    21
 * @param dst destination buffer
456c275f3313 (svn r9556) -Documentation: doxygen and comment-style changes. 'R', 'S'.. The end of the preliminary work is near
belugas
parents: 6248
diff changeset
    22
 * @param src string to copy
456c275f3313 (svn r9556) -Documentation: doxygen and comment-style changes. 'R', 'S'.. The end of the preliminary work is near
belugas
parents: 6248
diff changeset
    23
 * @param last pointer to the last element in the dst array
456c275f3313 (svn r9556) -Documentation: doxygen and comment-style changes. 'R', 'S'.. The end of the preliminary work is near
belugas
parents: 6248
diff changeset
    24
 *             if NULL no boundary check is performed
456c275f3313 (svn r9556) -Documentation: doxygen and comment-style changes. 'R', 'S'.. The end of the preliminary work is near
belugas
parents: 6248
diff changeset
    25
 * @return a pointer to the terminating \0 in the destination buffer
1317
3c90086ff34f (svn r1821) Move generic string handling functions to string.[ch] and introduce stre{cpy,cat}, see string.h for their semantics
tron
parents:
diff changeset
    26
 */
3c90086ff34f (svn r1821) Move generic string handling functions to string.[ch] and introduce stre{cpy,cat}, see string.h for their semantics
tron
parents:
diff changeset
    27
char* strecat(char* dst, const char* src, const char* last);
3c90086ff34f (svn r1821) Move generic string handling functions to string.[ch] and introduce stre{cpy,cat}, see string.h for their semantics
tron
parents:
diff changeset
    28
char* strecpy(char* dst, const char* src, const char* last);
3c90086ff34f (svn r1821) Move generic string handling functions to string.[ch] and introduce stre{cpy,cat}, see string.h for their semantics
tron
parents:
diff changeset
    29
2234
8a9b4c6d9c2a (svn r2754) Move str_fmt into string.[ch]
tron
parents: 2186
diff changeset
    30
char* CDECL str_fmt(const char* str, ...);
8a9b4c6d9c2a (svn r2754) Move str_fmt into string.[ch]
tron
parents: 2186
diff changeset
    31
2775
a18db0ab5e51 (svn r3322) - Fix: Network window crash when it receives invalid information for example from the integrated nightly, so validate the network-input when it is received
Darkvater
parents: 2436
diff changeset
    32
/** Scans the string for valid characters and if it finds invalid ones,
a18db0ab5e51 (svn r3322) - Fix: Network window crash when it receives invalid information for example from the integrated nightly, so validate the network-input when it is received
Darkvater
parents: 2436
diff changeset
    33
 * replaces them with a question mark '?' */
a18db0ab5e51 (svn r3322) - Fix: Network window crash when it receives invalid information for example from the integrated nightly, so validate the network-input when it is received
Darkvater
parents: 2436
diff changeset
    34
void str_validate(char *str);
a18db0ab5e51 (svn r3322) - Fix: Network window crash when it receives invalid information for example from the integrated nightly, so validate the network-input when it is received
Darkvater
parents: 2436
diff changeset
    35
5101
88ee12d71503 (svn r7172) -Fix [r6931]: The console showed '?' characters instead of colours. Now strip all
Darkvater
parents: 4300
diff changeset
    36
/** Scans the string for colour codes and strips them */
88ee12d71503 (svn r7172) -Fix [r6931]: The console showed '?' characters instead of colours. Now strip all
Darkvater
parents: 4300
diff changeset
    37
void str_strip_colours(char *str);
88ee12d71503 (svn r7172) -Fix [r6931]: The console showed '?' characters instead of colours. Now strip all
Darkvater
parents: 4300
diff changeset
    38
4300
c7e43c47a2b9 (svn r5946) -Add: merged the TGP branch to mainline. TGP adds:
truelight
parents: 4299
diff changeset
    39
/**
5108
aeaef6fe53b7 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
    40
 * Valid filter types for IsValidChar.
4300
c7e43c47a2b9 (svn r5946) -Add: merged the TGP branch to mainline. TGP adds:
truelight
parents: 4299
diff changeset
    41
 */
6248
e4a2ed7e5613 (svn r9051) -Codechange: typedef [enum|struct] Y {} X; -> [enum|struct] X {};
rubidium
parents: 6215
diff changeset
    42
enum CharSetFilter {
6420
456c275f3313 (svn r9556) -Documentation: doxygen and comment-style changes. 'R', 'S'.. The end of the preliminary work is near
belugas
parents: 6248
diff changeset
    43
	CS_ALPHANUMERAL,      ///< Both numeric and alphabetic and spaces and stuff
456c275f3313 (svn r9556) -Documentation: doxygen and comment-style changes. 'R', 'S'.. The end of the preliminary work is near
belugas
parents: 6248
diff changeset
    44
	CS_NUMERAL,           ///< Only numeric ones
456c275f3313 (svn r9556) -Documentation: doxygen and comment-style changes. 'R', 'S'.. The end of the preliminary work is near
belugas
parents: 6248
diff changeset
    45
	CS_ALPHA,             ///< Only alphabetic values
6248
e4a2ed7e5613 (svn r9051) -Codechange: typedef [enum|struct] Y {} X; -> [enum|struct] X {};
rubidium
parents: 6215
diff changeset
    46
};
4300
c7e43c47a2b9 (svn r5946) -Add: merged the TGP branch to mainline. TGP adds:
truelight
parents: 4299
diff changeset
    47
5164
54755bc93577 (svn r7271) -Codechange: Revert the strtolower part of r7199 as it can actually become broken due to
Darkvater
parents: 5108
diff changeset
    48
/** Convert the given string to lowercase, only works with ASCII! */
5108
aeaef6fe53b7 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
    49
void strtolower(char *str);
aeaef6fe53b7 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
    50
5317
a41d1389c092 (svn r7475) -Fix (r7348): sanity check NewGRF action 8 strings for null terminator
peter1138
parents: 5164
diff changeset
    51
5638
ee1871005c80 (svn r8097) Replace strlen() {==,!=,>} 0 by the more concise {,!}StrEmpty(). Additionally the test takes O(1) instead of O(n) now
tron
parents: 5634
diff changeset
    52
static inline bool StrEmpty(const char* s) { return s[0] == '\0'; }
ee1871005c80 (svn r8097) Replace strlen() {==,!=,>} 0 by the more concise {,!}StrEmpty(). Additionally the test takes O(1) instead of O(n) now
tron
parents: 5634
diff changeset
    53
ee1871005c80 (svn r8097) Replace strlen() {==,!=,>} 0 by the more concise {,!}StrEmpty(). Additionally the test takes O(1) instead of O(n) now
tron
parents: 5634
diff changeset
    54
5317
a41d1389c092 (svn r7475) -Fix (r7348): sanity check NewGRF action 8 strings for null terminator
peter1138
parents: 5164
diff changeset
    55
/** Get the length of a string, within a limited buffer */
a41d1389c092 (svn r7475) -Fix (r7348): sanity check NewGRF action 8 strings for null terminator
peter1138
parents: 5164
diff changeset
    56
static inline int ttd_strnlen(const char *str, int maxlen)
a41d1389c092 (svn r7475) -Fix (r7348): sanity check NewGRF action 8 strings for null terminator
peter1138
parents: 5164
diff changeset
    57
{
a41d1389c092 (svn r7475) -Fix (r7348): sanity check NewGRF action 8 strings for null terminator
peter1138
parents: 5164
diff changeset
    58
	const char *t;
a41d1389c092 (svn r7475) -Fix (r7348): sanity check NewGRF action 8 strings for null terminator
peter1138
parents: 5164
diff changeset
    59
	for (t = str; *t != '\0' && t - str < maxlen; t++);
a41d1389c092 (svn r7475) -Fix (r7348): sanity check NewGRF action 8 strings for null terminator
peter1138
parents: 5164
diff changeset
    60
	return t - str;
a41d1389c092 (svn r7475) -Fix (r7348): sanity check NewGRF action 8 strings for null terminator
peter1138
parents: 5164
diff changeset
    61
}
a41d1389c092 (svn r7475) -Fix (r7348): sanity check NewGRF action 8 strings for null terminator
peter1138
parents: 5164
diff changeset
    62
5634
ba6b9ebe197a (svn r8093) -Codechange: Add a function to get a string representation of an MD5SUM and use it.
Darkvater
parents: 5475
diff changeset
    63
/** Convert the md5sum number to a 'hexadecimal' string, return next pos in buffer */
ba6b9ebe197a (svn r8093) -Codechange: Add a function to get a string representation of an MD5SUM and use it.
Darkvater
parents: 5475
diff changeset
    64
char *md5sumToString(char *buf, const char *last, const uint8 md5sum[16]);
5317
a41d1389c092 (svn r7475) -Fix (r7348): sanity check NewGRF action 8 strings for null terminator
peter1138
parents: 5164
diff changeset
    65
5108
aeaef6fe53b7 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
    66
typedef uint32 WChar;
aeaef6fe53b7 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
    67
4300
c7e43c47a2b9 (svn r5946) -Add: merged the TGP branch to mainline. TGP adds:
truelight
parents: 4299
diff changeset
    68
/**
c7e43c47a2b9 (svn r5946) -Add: merged the TGP branch to mainline. TGP adds:
truelight
parents: 4299
diff changeset
    69
 * Only allow certain keys. You can define the filter to be used. This makes
c7e43c47a2b9 (svn r5946) -Add: merged the TGP branch to mainline. TGP adds:
truelight
parents: 4299
diff changeset
    70
 *  sure no invalid keys can get into an editbox, like BELL.
c7e43c47a2b9 (svn r5946) -Add: merged the TGP branch to mainline. TGP adds:
truelight
parents: 4299
diff changeset
    71
 * @param key character to be checked
c7e43c47a2b9 (svn r5946) -Add: merged the TGP branch to mainline. TGP adds:
truelight
parents: 4299
diff changeset
    72
 * @param afilter the filter to use
c7e43c47a2b9 (svn r5946) -Add: merged the TGP branch to mainline. TGP adds:
truelight
parents: 4299
diff changeset
    73
 * @return true or false depending if the character is printable/valid or not
c7e43c47a2b9 (svn r5946) -Add: merged the TGP branch to mainline. TGP adds:
truelight
parents: 4299
diff changeset
    74
 */
5108
aeaef6fe53b7 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
    75
bool IsValidChar(WChar key, CharSetFilter afilter);
4300
c7e43c47a2b9 (svn r5946) -Add: merged the TGP branch to mainline. TGP adds:
truelight
parents: 4299
diff changeset
    76
5108
aeaef6fe53b7 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
    77
size_t Utf8Decode(WChar *c, const char *s);
aeaef6fe53b7 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
    78
size_t Utf8Encode(char *buf, WChar c);
6215
bbd141b026b5 (svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'.
Darkvater
parents: 6214
diff changeset
    79
size_t Utf8TrimString(char *s, size_t maxlen);
5108
aeaef6fe53b7 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
    80
aeaef6fe53b7 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
    81
aeaef6fe53b7 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
    82
static inline WChar Utf8Consume(const char **s)
aeaef6fe53b7 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
    83
{
aeaef6fe53b7 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
    84
	WChar c;
aeaef6fe53b7 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
    85
	*s += Utf8Decode(&c, *s);
aeaef6fe53b7 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
    86
	return c;
aeaef6fe53b7 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
    87
}
aeaef6fe53b7 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
    88
aeaef6fe53b7 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
    89
aeaef6fe53b7 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
    90
/** Return the length of a UTF-8 encoded character.
aeaef6fe53b7 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
    91
 * @param c Unicode character.
aeaef6fe53b7 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
    92
 * @return Length of UTF-8 encoding for character.
aeaef6fe53b7 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
    93
 */
aeaef6fe53b7 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
    94
static inline size_t Utf8CharLen(WChar c)
aeaef6fe53b7 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
    95
{
aeaef6fe53b7 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
    96
	if (c < 0x80)       return 1;
aeaef6fe53b7 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
    97
	if (c < 0x800)      return 2;
aeaef6fe53b7 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
    98
	if (c < 0x10000)    return 3;
aeaef6fe53b7 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
    99
	if (c < 0x110000)   return 4;
aeaef6fe53b7 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
   100
aeaef6fe53b7 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
   101
	/* Invalid valid, we encode as a '?' */
aeaef6fe53b7 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
   102
	return 1;
aeaef6fe53b7 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
   103
}
aeaef6fe53b7 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
   104
aeaef6fe53b7 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
   105
6215
bbd141b026b5 (svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'.
Darkvater
parents: 6214
diff changeset
   106
/**
bbd141b026b5 (svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'.
Darkvater
parents: 6214
diff changeset
   107
 * Return the length of an UTF-8 encoded value based on a single char. This
bbd141b026b5 (svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'.
Darkvater
parents: 6214
diff changeset
   108
 * char should be the first byte of the UTF-8 encoding. If not, or encoding
bbd141b026b5 (svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'.
Darkvater
parents: 6214
diff changeset
   109
 * is invalid, return value is 0
6420
456c275f3313 (svn r9556) -Documentation: doxygen and comment-style changes. 'R', 'S'.. The end of the preliminary work is near
belugas
parents: 6248
diff changeset
   110
 * @param c char to query length of
456c275f3313 (svn r9556) -Documentation: doxygen and comment-style changes. 'R', 'S'.. The end of the preliminary work is near
belugas
parents: 6248
diff changeset
   111
 * @return requested size
6215
bbd141b026b5 (svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'.
Darkvater
parents: 6214
diff changeset
   112
 */
bbd141b026b5 (svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'.
Darkvater
parents: 6214
diff changeset
   113
static inline size_t Utf8EncodedCharLen(char c)
bbd141b026b5 (svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'.
Darkvater
parents: 6214
diff changeset
   114
{
bbd141b026b5 (svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'.
Darkvater
parents: 6214
diff changeset
   115
	if (GB(c, 3, 5) == 0x1E) return 4;
bbd141b026b5 (svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'.
Darkvater
parents: 6214
diff changeset
   116
	if (GB(c, 4, 4) == 0x0E) return 3;
bbd141b026b5 (svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'.
Darkvater
parents: 6214
diff changeset
   117
	if (GB(c, 5, 3) == 0x06) return 2;
bbd141b026b5 (svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'.
Darkvater
parents: 6214
diff changeset
   118
	if (GB(c, 7, 1) == 0x00) return 1;
bbd141b026b5 (svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'.
Darkvater
parents: 6214
diff changeset
   119
bbd141b026b5 (svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'.
Darkvater
parents: 6214
diff changeset
   120
	/* Invalid UTF8 start encoding */
bbd141b026b5 (svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'.
Darkvater
parents: 6214
diff changeset
   121
	return 0;
bbd141b026b5 (svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'.
Darkvater
parents: 6214
diff changeset
   122
}
bbd141b026b5 (svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'.
Darkvater
parents: 6214
diff changeset
   123
bbd141b026b5 (svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'.
Darkvater
parents: 6214
diff changeset
   124
5108
aeaef6fe53b7 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
   125
/* Check if the given character is part of a UTF8 sequence */
aeaef6fe53b7 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
   126
static inline bool IsUtf8Part(char c)
aeaef6fe53b7 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
   127
{
aeaef6fe53b7 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
   128
	return GB(c, 6, 2) == 2;
aeaef6fe53b7 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
   129
}
aeaef6fe53b7 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
   130
6211
91567df8ebd6 (svn r9003) -Codechange: Introduce a function Utf8PrevCharLen that finds the starting character of an UTF-8 sequence from a given position and returns the length to the first UTF-8 encoding byte of that sequence.
Darkvater
parents: 5638
diff changeset
   131
/**
6214
1f361897ed7f (svn r9011) -Codechange (r9003): Rework Utf8PrevChar so that it returns a pointer to the previous UTF8 character's first byte instead of a byte-length offset
Darkvater
parents: 6211
diff changeset
   132
 * Retrieve the previous UNICODE character in an UTF-8 encoded string.
1f361897ed7f (svn r9011) -Codechange (r9003): Rework Utf8PrevChar so that it returns a pointer to the previous UTF8 character's first byte instead of a byte-length offset
Darkvater
parents: 6211
diff changeset
   133
 * @param s char pointer pointing to (the first char of) the next character
6420
456c275f3313 (svn r9556) -Documentation: doxygen and comment-style changes. 'R', 'S'.. The end of the preliminary work is near
belugas
parents: 6248
diff changeset
   134
 * @return a pointer in 's' to the previous UNICODE character's first byte
6214
1f361897ed7f (svn r9011) -Codechange (r9003): Rework Utf8PrevChar so that it returns a pointer to the previous UTF8 character's first byte instead of a byte-length offset
Darkvater
parents: 6211
diff changeset
   135
 * @note The function should not be used to determine the length of the previous
1f361897ed7f (svn r9011) -Codechange (r9003): Rework Utf8PrevChar so that it returns a pointer to the previous UTF8 character's first byte instead of a byte-length offset
Darkvater
parents: 6211
diff changeset
   136
 * encoded char because it might be an invalid/corrupt start-sequence
6211
91567df8ebd6 (svn r9003) -Codechange: Introduce a function Utf8PrevCharLen that finds the starting character of an UTF-8 sequence from a given position and returns the length to the first UTF-8 encoding byte of that sequence.
Darkvater
parents: 5638
diff changeset
   137
 */
6214
1f361897ed7f (svn r9011) -Codechange (r9003): Rework Utf8PrevChar so that it returns a pointer to the previous UTF8 character's first byte instead of a byte-length offset
Darkvater
parents: 6211
diff changeset
   138
static inline char *Utf8PrevChar(const char *s)
6211
91567df8ebd6 (svn r9003) -Codechange: Introduce a function Utf8PrevCharLen that finds the starting character of an UTF-8 sequence from a given position and returns the length to the first UTF-8 encoding byte of that sequence.
Darkvater
parents: 5638
diff changeset
   139
{
6214
1f361897ed7f (svn r9011) -Codechange (r9003): Rework Utf8PrevChar so that it returns a pointer to the previous UTF8 character's first byte instead of a byte-length offset
Darkvater
parents: 6211
diff changeset
   140
	const char *ret = s;
1f361897ed7f (svn r9011) -Codechange (r9003): Rework Utf8PrevChar so that it returns a pointer to the previous UTF8 character's first byte instead of a byte-length offset
Darkvater
parents: 6211
diff changeset
   141
	while (IsUtf8Part(*--ret));
1f361897ed7f (svn r9011) -Codechange (r9003): Rework Utf8PrevChar so that it returns a pointer to the previous UTF8 character's first byte instead of a byte-length offset
Darkvater
parents: 6211
diff changeset
   142
	return (char*)ret;
6211
91567df8ebd6 (svn r9003) -Codechange: Introduce a function Utf8PrevCharLen that finds the starting character of an UTF-8 sequence from a given position and returns the length to the first UTF-8 encoding byte of that sequence.
Darkvater
parents: 5638
diff changeset
   143
}
91567df8ebd6 (svn r9003) -Codechange: Introduce a function Utf8PrevCharLen that finds the starting character of an UTF-8 sequence from a given position and returns the length to the first UTF-8 encoding byte of that sequence.
Darkvater
parents: 5638
diff changeset
   144
5108
aeaef6fe53b7 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
   145
aeaef6fe53b7 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
   146
static inline bool IsPrintable(WChar c)
aeaef6fe53b7 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
   147
{
aeaef6fe53b7 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
   148
	if (c < 0x20)   return false;
aeaef6fe53b7 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
   149
	if (c < 0xE000) return true;
aeaef6fe53b7 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
   150
	if (c < 0xE200) return false;
aeaef6fe53b7 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
   151
	return true;
aeaef6fe53b7 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
   152
}
aeaef6fe53b7 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
   153
6215
bbd141b026b5 (svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'.
Darkvater
parents: 6214
diff changeset
   154
/**
bbd141b026b5 (svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'.
Darkvater
parents: 6214
diff changeset
   155
 * Check whether UNICODE character is whitespace or not
bbd141b026b5 (svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'.
Darkvater
parents: 6214
diff changeset
   156
 * @param c UNICODE character to check
bbd141b026b5 (svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'.
Darkvater
parents: 6214
diff changeset
   157
 * @return a boolean value whether 'c' is a whitespace character or not
bbd141b026b5 (svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'.
Darkvater
parents: 6214
diff changeset
   158
 * @see http://www.fileformat.info/info/unicode/category/Zs/list.htm
bbd141b026b5 (svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'.
Darkvater
parents: 6214
diff changeset
   159
 */
bbd141b026b5 (svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'.
Darkvater
parents: 6214
diff changeset
   160
static inline bool IsWhitespace(WChar c)
bbd141b026b5 (svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'.
Darkvater
parents: 6214
diff changeset
   161
{
bbd141b026b5 (svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'.
Darkvater
parents: 6214
diff changeset
   162
	return
bbd141b026b5 (svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'.
Darkvater
parents: 6214
diff changeset
   163
	  c == 0x0020 /* SPACE */ ||
bbd141b026b5 (svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'.
Darkvater
parents: 6214
diff changeset
   164
	  c == 0x00A0 /* NO-BREAK SPACE */ ||
bbd141b026b5 (svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'.
Darkvater
parents: 6214
diff changeset
   165
	  c == 0x3000 /* IDEOGRAPHIC SPACE */
bbd141b026b5 (svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'.
Darkvater
parents: 6214
diff changeset
   166
	;
bbd141b026b5 (svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'.
Darkvater
parents: 6214
diff changeset
   167
}
bbd141b026b5 (svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'.
Darkvater
parents: 6214
diff changeset
   168
4200
e6ba94085b81 (svn r5684) - Codechange: create an strtolower() function that uses tolower() on a whole string and apply it in the places this was used.
Darkvater
parents: 2775
diff changeset
   169
2436
7d5df545bd5d (svn r2962) - const correctness for all Get* functions and most Draw* functions that don't change their pointer parameters
Darkvater
parents: 2234
diff changeset
   170
#endif /* STRING_H */