src/string_func.h
author Tero Marttila <terom@fixme.fi>
Sat, 19 Jul 2008 01:38:52 +0300
changeset 11178 aa617a8b4f34
parent 10751 ebd94f2d6385
permissions -rw-r--r--
a working CheckAvailableNewGRFs that does a metadata search/retreival via HTTP for a given list of GRFs
2186
461a2aff3486 (svn r2701) Insert Id tags into all source files
tron
parents: 1317
diff changeset
     1
/* $Id$ */
461a2aff3486 (svn r2701) Insert Id tags into all source files
tron
parents: 1317
diff changeset
     2
8844
b4f9ff470b85 (svn r11914) -Documentation: fix some @file statement
glx
parents: 8710
diff changeset
     3
/** @file string_func.h Functions related to low-level strings. */
6916
e87d54a598ea (svn r9556) -Documentation: doxygen and comment-style changes. 'R', 'S'.. The end of the preliminary work is near
belugas
parents: 6574
diff changeset
     4
8710
52015340050c (svn r11777) -Codechange: split the string header and make do not include it when it's not necessary.
rubidium
parents: 8609
diff changeset
     5
#ifndef STRING_FUNC_H
52015340050c (svn r11777) -Codechange: split the string header and make do not include it when it's not necessary.
rubidium
parents: 8609
diff changeset
     6
#define STRING_FUNC_H
1317
f382f1b439c7 (svn r1821) Move generic string handling functions to string.[ch] and introduce stre{cpy,cat}, see string.h for their semantics
tron
parents:
diff changeset
     7
8609
8c0c3e9dd6a0 (svn r11674) -Codechange: refactor some functions out of macros.h into more logical locations.
rubidium
parents: 8006
diff changeset
     8
#include "core/bitmath_func.hpp"
8710
52015340050c (svn r11777) -Codechange: split the string header and make do not include it when it's not necessary.
rubidium
parents: 8609
diff changeset
     9
#include "string_type.h"
5108
dc67d70b5a45 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
    10
6916
e87d54a598ea (svn r9556) -Documentation: doxygen and comment-style changes. 'R', 'S'.. The end of the preliminary work is near
belugas
parents: 6574
diff changeset
    11
/**
e87d54a598ea (svn r9556) -Documentation: doxygen and comment-style changes. 'R', 'S'.. The end of the preliminary work is near
belugas
parents: 6574
diff changeset
    12
 * usage ttd_strlcpy(dst, src, lengthof(dst));
e87d54a598ea (svn r9556) -Documentation: doxygen and comment-style changes. 'R', 'S'.. The end of the preliminary work is near
belugas
parents: 6574
diff changeset
    13
 * @param dst destination buffer
e87d54a598ea (svn r9556) -Documentation: doxygen and comment-style changes. 'R', 'S'.. The end of the preliminary work is near
belugas
parents: 6574
diff changeset
    14
 * @param src string to copy/concatenate
e87d54a598ea (svn r9556) -Documentation: doxygen and comment-style changes. 'R', 'S'.. The end of the preliminary work is near
belugas
parents: 6574
diff changeset
    15
 * @param size size of the destination buffer
1317
f382f1b439c7 (svn r1821) Move generic string handling functions to string.[ch] and introduce stre{cpy,cat}, see string.h for their semantics
tron
parents:
diff changeset
    16
 */
f382f1b439c7 (svn r1821) Move generic string handling functions to string.[ch] and introduce stre{cpy,cat}, see string.h for their semantics
tron
parents:
diff changeset
    17
void ttd_strlcat(char *dst, const char *src, size_t size);
f382f1b439c7 (svn r1821) Move generic string handling functions to string.[ch] and introduce stre{cpy,cat}, see string.h for their semantics
tron
parents:
diff changeset
    18
void ttd_strlcpy(char *dst, const char *src, size_t size);
f382f1b439c7 (svn r1821) Move generic string handling functions to string.[ch] and introduce stre{cpy,cat}, see string.h for their semantics
tron
parents:
diff changeset
    19
6916
e87d54a598ea (svn r9556) -Documentation: doxygen and comment-style changes. 'R', 'S'.. The end of the preliminary work is near
belugas
parents: 6574
diff changeset
    20
/**
1317
f382f1b439c7 (svn r1821) Move generic string handling functions to string.[ch] and introduce stre{cpy,cat}, see string.h for their semantics
tron
parents:
diff changeset
    21
 * usage: strecpy(dst, src, lastof(dst));
6916
e87d54a598ea (svn r9556) -Documentation: doxygen and comment-style changes. 'R', 'S'.. The end of the preliminary work is near
belugas
parents: 6574
diff changeset
    22
 * @param dst destination buffer
e87d54a598ea (svn r9556) -Documentation: doxygen and comment-style changes. 'R', 'S'.. The end of the preliminary work is near
belugas
parents: 6574
diff changeset
    23
 * @param src string to copy
e87d54a598ea (svn r9556) -Documentation: doxygen and comment-style changes. 'R', 'S'.. The end of the preliminary work is near
belugas
parents: 6574
diff changeset
    24
 * @param last pointer to the last element in the dst array
e87d54a598ea (svn r9556) -Documentation: doxygen and comment-style changes. 'R', 'S'.. The end of the preliminary work is near
belugas
parents: 6574
diff changeset
    25
 *             if NULL no boundary check is performed
e87d54a598ea (svn r9556) -Documentation: doxygen and comment-style changes. 'R', 'S'.. The end of the preliminary work is near
belugas
parents: 6574
diff changeset
    26
 * @return a pointer to the terminating \0 in the destination buffer
1317
f382f1b439c7 (svn r1821) Move generic string handling functions to string.[ch] and introduce stre{cpy,cat}, see string.h for their semantics
tron
parents:
diff changeset
    27
 */
7814
fe643468ad64 (svn r10673) -Cleanup: some assorted style cleanups. Primarily type* var -> type *var.
rubidium
parents: 6916
diff changeset
    28
char *strecat(char *dst, const char *src, const char *last);
fe643468ad64 (svn r10673) -Cleanup: some assorted style cleanups. Primarily type* var -> type *var.
rubidium
parents: 6916
diff changeset
    29
char *strecpy(char *dst, const char *src, const char *last);
1317
f382f1b439c7 (svn r1821) Move generic string handling functions to string.[ch] and introduce stre{cpy,cat}, see string.h for their semantics
tron
parents:
diff changeset
    30
7814
fe643468ad64 (svn r10673) -Cleanup: some assorted style cleanups. Primarily type* var -> type *var.
rubidium
parents: 6916
diff changeset
    31
char *CDECL str_fmt(const char *str, ...);
2234
d44294cfea36 (svn r2754) Move str_fmt into string.[ch]
tron
parents: 2186
diff changeset
    32
2775
d3ed38a97250 (svn r3322) - Fix: Network window crash when it receives invalid information for example from the integrated nightly, so validate the network-input when it is received
Darkvater
parents: 2436
diff changeset
    33
/** Scans the string for valid characters and if it finds invalid ones,
d3ed38a97250 (svn r3322) - Fix: Network window crash when it receives invalid information for example from the integrated nightly, so validate the network-input when it is received
Darkvater
parents: 2436
diff changeset
    34
 * replaces them with a question mark '?' */
d3ed38a97250 (svn r3322) - Fix: Network window crash when it receives invalid information for example from the integrated nightly, so validate the network-input when it is received
Darkvater
parents: 2436
diff changeset
    35
void str_validate(char *str);
d3ed38a97250 (svn r3322) - Fix: Network window crash when it receives invalid information for example from the integrated nightly, so validate the network-input when it is received
Darkvater
parents: 2436
diff changeset
    36
5101
797a070e5b22 (svn r7172) -Fix [r6931]: The console showed '?' characters instead of colours. Now strip all
Darkvater
parents: 4300
diff changeset
    37
/** Scans the string for colour codes and strips them */
797a070e5b22 (svn r7172) -Fix [r6931]: The console showed '?' characters instead of colours. Now strip all
Darkvater
parents: 4300
diff changeset
    38
void str_strip_colours(char *str);
797a070e5b22 (svn r7172) -Fix [r6931]: The console showed '?' characters instead of colours. Now strip all
Darkvater
parents: 4300
diff changeset
    39
5164
a4fb0ede4ce5 (svn r7271) -Codechange: Revert the strtolower part of r7199 as it can actually become broken due to
Darkvater
parents: 5108
diff changeset
    40
/** Convert the given string to lowercase, only works with ASCII! */
5108
dc67d70b5a45 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
    41
void strtolower(char *str);
dc67d70b5a45 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
    42
5317
e235a3a573e3 (svn r7475) -Fix (r7348): sanity check NewGRF action 8 strings for null terminator
peter1138
parents: 5164
diff changeset
    43
8006
947dbf950dc7 (svn r11025) -Fix: safeguard StrEmpty from calls with NULL.
rubidium
parents: 7814
diff changeset
    44
static inline bool StrEmpty(const char *s) { return s == NULL || s[0] == '\0'; }
5889
272800d151ac (svn r8097) Replace strlen() {==,!=,>} 0 by the more concise {,!}StrEmpty(). Additionally the test takes O(1) instead of O(n) now
tron
parents: 5885
diff changeset
    45
272800d151ac (svn r8097) Replace strlen() {==,!=,>} 0 by the more concise {,!}StrEmpty(). Additionally the test takes O(1) instead of O(n) now
tron
parents: 5885
diff changeset
    46
5317
e235a3a573e3 (svn r7475) -Fix (r7348): sanity check NewGRF action 8 strings for null terminator
peter1138
parents: 5164
diff changeset
    47
/** Get the length of a string, within a limited buffer */
10465
0c68afe3d725 (svn r13008) -Fix [FS#1997]: silence some MSVC x64 warnings
glx
parents: 9207
diff changeset
    48
static inline size_t ttd_strnlen(const char *str, size_t maxlen)
5317
e235a3a573e3 (svn r7475) -Fix (r7348): sanity check NewGRF action 8 strings for null terminator
peter1138
parents: 5164
diff changeset
    49
{
e235a3a573e3 (svn r7475) -Fix (r7348): sanity check NewGRF action 8 strings for null terminator
peter1138
parents: 5164
diff changeset
    50
	const char *t;
10465
0c68afe3d725 (svn r13008) -Fix [FS#1997]: silence some MSVC x64 warnings
glx
parents: 9207
diff changeset
    51
	for (t = str; *t != '\0' && (size_t)(t - str) < maxlen; t++) {}
5317
e235a3a573e3 (svn r7475) -Fix (r7348): sanity check NewGRF action 8 strings for null terminator
peter1138
parents: 5164
diff changeset
    52
	return t - str;
e235a3a573e3 (svn r7475) -Fix (r7348): sanity check NewGRF action 8 strings for null terminator
peter1138
parents: 5164
diff changeset
    53
}
e235a3a573e3 (svn r7475) -Fix (r7348): sanity check NewGRF action 8 strings for null terminator
peter1138
parents: 5164
diff changeset
    54
5885
262234e81333 (svn r8093) -Codechange: Add a function to get a string representation of an MD5SUM and use it.
Darkvater
parents: 5726
diff changeset
    55
/** Convert the md5sum number to a 'hexadecimal' string, return next pos in buffer */
262234e81333 (svn r8093) -Codechange: Add a function to get a string representation of an MD5SUM and use it.
Darkvater
parents: 5726
diff changeset
    56
char *md5sumToString(char *buf, const char *last, const uint8 md5sum[16]);
11178
aa617a8b4f34 a working CheckAvailableNewGRFs that does a metadata search/retreival via HTTP for a given list of GRFs
Tero Marttila <terom@fixme.fi>
parents: 10751
diff changeset
    57
/** Convert the hexademical string to a binary md5sum, return 0 on success, nonzero on error */
aa617a8b4f34 a working CheckAvailableNewGRFs that does a metadata search/retreival via HTTP for a given list of GRFs
Tero Marttila <terom@fixme.fi>
parents: 10751
diff changeset
    58
int StringToMd5sum(uint8 md5sum[16], const char *buf);
5317
e235a3a573e3 (svn r7475) -Fix (r7348): sanity check NewGRF action 8 strings for null terminator
peter1138
parents: 5164
diff changeset
    59
4300
687a17c9c557 (svn r5946) -Add: merged the TGP branch to mainline. TGP adds:
truelight
parents: 4299
diff changeset
    60
/**
687a17c9c557 (svn r5946) -Add: merged the TGP branch to mainline. TGP adds:
truelight
parents: 4299
diff changeset
    61
 * Only allow certain keys. You can define the filter to be used. This makes
687a17c9c557 (svn r5946) -Add: merged the TGP branch to mainline. TGP adds:
truelight
parents: 4299
diff changeset
    62
 *  sure no invalid keys can get into an editbox, like BELL.
687a17c9c557 (svn r5946) -Add: merged the TGP branch to mainline. TGP adds:
truelight
parents: 4299
diff changeset
    63
 * @param key character to be checked
687a17c9c557 (svn r5946) -Add: merged the TGP branch to mainline. TGP adds:
truelight
parents: 4299
diff changeset
    64
 * @param afilter the filter to use
687a17c9c557 (svn r5946) -Add: merged the TGP branch to mainline. TGP adds:
truelight
parents: 4299
diff changeset
    65
 * @return true or false depending if the character is printable/valid or not
687a17c9c557 (svn r5946) -Add: merged the TGP branch to mainline. TGP adds:
truelight
parents: 4299
diff changeset
    66
 */
5108
dc67d70b5a45 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
    67
bool IsValidChar(WChar key, CharSetFilter afilter);
4300
687a17c9c557 (svn r5946) -Add: merged the TGP branch to mainline. TGP adds:
truelight
parents: 4299
diff changeset
    68
5108
dc67d70b5a45 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
    69
size_t Utf8Decode(WChar *c, const char *s);
dc67d70b5a45 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
    70
size_t Utf8Encode(char *buf, WChar c);
6541
ff575414f0d2 (svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'.
Darkvater
parents: 6540
diff changeset
    71
size_t Utf8TrimString(char *s, size_t maxlen);
5108
dc67d70b5a45 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
    72
dc67d70b5a45 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
    73
dc67d70b5a45 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
    74
static inline WChar Utf8Consume(const char **s)
dc67d70b5a45 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
    75
{
dc67d70b5a45 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
    76
	WChar c;
dc67d70b5a45 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
    77
	*s += Utf8Decode(&c, *s);
dc67d70b5a45 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
    78
	return c;
dc67d70b5a45 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
    79
}
dc67d70b5a45 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
    80
dc67d70b5a45 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
    81
dc67d70b5a45 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
    82
/** Return the length of a UTF-8 encoded character.
dc67d70b5a45 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
    83
 * @param c Unicode character.
dc67d70b5a45 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
    84
 * @return Length of UTF-8 encoding for character.
dc67d70b5a45 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
    85
 */
10751
ebd94f2d6385 (svn r13301) -Fix [FS#1997]: resolve more MSVC 9 x64 warnings.
rubidium
parents: 10465
diff changeset
    86
static inline int8 Utf8CharLen(WChar c)
5108
dc67d70b5a45 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
    87
{
dc67d70b5a45 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
    88
	if (c < 0x80)       return 1;
dc67d70b5a45 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
    89
	if (c < 0x800)      return 2;
dc67d70b5a45 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
    90
	if (c < 0x10000)    return 3;
dc67d70b5a45 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
    91
	if (c < 0x110000)   return 4;
dc67d70b5a45 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
    92
dc67d70b5a45 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
    93
	/* Invalid valid, we encode as a '?' */
dc67d70b5a45 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
    94
	return 1;
dc67d70b5a45 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
    95
}
dc67d70b5a45 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
    96
dc67d70b5a45 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
    97
6541
ff575414f0d2 (svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'.
Darkvater
parents: 6540
diff changeset
    98
/**
ff575414f0d2 (svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'.
Darkvater
parents: 6540
diff changeset
    99
 * Return the length of an UTF-8 encoded value based on a single char. This
ff575414f0d2 (svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'.
Darkvater
parents: 6540
diff changeset
   100
 * char should be the first byte of the UTF-8 encoding. If not, or encoding
ff575414f0d2 (svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'.
Darkvater
parents: 6540
diff changeset
   101
 * is invalid, return value is 0
6916
e87d54a598ea (svn r9556) -Documentation: doxygen and comment-style changes. 'R', 'S'.. The end of the preliminary work is near
belugas
parents: 6574
diff changeset
   102
 * @param c char to query length of
e87d54a598ea (svn r9556) -Documentation: doxygen and comment-style changes. 'R', 'S'.. The end of the preliminary work is near
belugas
parents: 6574
diff changeset
   103
 * @return requested size
6541
ff575414f0d2 (svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'.
Darkvater
parents: 6540
diff changeset
   104
 */
10751
ebd94f2d6385 (svn r13301) -Fix [FS#1997]: resolve more MSVC 9 x64 warnings.
rubidium
parents: 10465
diff changeset
   105
static inline int8 Utf8EncodedCharLen(char c)
6541
ff575414f0d2 (svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'.
Darkvater
parents: 6540
diff changeset
   106
{
ff575414f0d2 (svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'.
Darkvater
parents: 6540
diff changeset
   107
	if (GB(c, 3, 5) == 0x1E) return 4;
ff575414f0d2 (svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'.
Darkvater
parents: 6540
diff changeset
   108
	if (GB(c, 4, 4) == 0x0E) return 3;
ff575414f0d2 (svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'.
Darkvater
parents: 6540
diff changeset
   109
	if (GB(c, 5, 3) == 0x06) return 2;
ff575414f0d2 (svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'.
Darkvater
parents: 6540
diff changeset
   110
	if (GB(c, 7, 1) == 0x00) return 1;
ff575414f0d2 (svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'.
Darkvater
parents: 6540
diff changeset
   111
ff575414f0d2 (svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'.
Darkvater
parents: 6540
diff changeset
   112
	/* Invalid UTF8 start encoding */
ff575414f0d2 (svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'.
Darkvater
parents: 6540
diff changeset
   113
	return 0;
ff575414f0d2 (svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'.
Darkvater
parents: 6540
diff changeset
   114
}
ff575414f0d2 (svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'.
Darkvater
parents: 6540
diff changeset
   115
ff575414f0d2 (svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'.
Darkvater
parents: 6540
diff changeset
   116
5108
dc67d70b5a45 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
   117
/* Check if the given character is part of a UTF8 sequence */
dc67d70b5a45 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
   118
static inline bool IsUtf8Part(char c)
dc67d70b5a45 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
   119
{
dc67d70b5a45 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
   120
	return GB(c, 6, 2) == 2;
dc67d70b5a45 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
   121
}
dc67d70b5a45 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
   122
6537
dbf4fa55395f (svn r9003) -Codechange: Introduce a function Utf8PrevCharLen that finds the starting character of an UTF-8 sequence from a given position and returns the length to the first UTF-8 encoding byte of that sequence.
Darkvater
parents: 5889
diff changeset
   123
/**
6540
4db000b2cc41 (svn r9011) -Codechange (r9003): Rework Utf8PrevChar so that it returns a pointer to the previous UTF8 character's first byte instead of a byte-length offset
Darkvater
parents: 6537
diff changeset
   124
 * Retrieve the previous UNICODE character in an UTF-8 encoded string.
4db000b2cc41 (svn r9011) -Codechange (r9003): Rework Utf8PrevChar so that it returns a pointer to the previous UTF8 character's first byte instead of a byte-length offset
Darkvater
parents: 6537
diff changeset
   125
 * @param s char pointer pointing to (the first char of) the next character
6916
e87d54a598ea (svn r9556) -Documentation: doxygen and comment-style changes. 'R', 'S'.. The end of the preliminary work is near
belugas
parents: 6574
diff changeset
   126
 * @return a pointer in 's' to the previous UNICODE character's first byte
6540
4db000b2cc41 (svn r9011) -Codechange (r9003): Rework Utf8PrevChar so that it returns a pointer to the previous UTF8 character's first byte instead of a byte-length offset
Darkvater
parents: 6537
diff changeset
   127
 * @note The function should not be used to determine the length of the previous
4db000b2cc41 (svn r9011) -Codechange (r9003): Rework Utf8PrevChar so that it returns a pointer to the previous UTF8 character's first byte instead of a byte-length offset
Darkvater
parents: 6537
diff changeset
   128
 * encoded char because it might be an invalid/corrupt start-sequence
6537
dbf4fa55395f (svn r9003) -Codechange: Introduce a function Utf8PrevCharLen that finds the starting character of an UTF-8 sequence from a given position and returns the length to the first UTF-8 encoding byte of that sequence.
Darkvater
parents: 5889
diff changeset
   129
 */
6540
4db000b2cc41 (svn r9011) -Codechange (r9003): Rework Utf8PrevChar so that it returns a pointer to the previous UTF8 character's first byte instead of a byte-length offset
Darkvater
parents: 6537
diff changeset
   130
static inline char *Utf8PrevChar(const char *s)
6537
dbf4fa55395f (svn r9003) -Codechange: Introduce a function Utf8PrevCharLen that finds the starting character of an UTF-8 sequence from a given position and returns the length to the first UTF-8 encoding byte of that sequence.
Darkvater
parents: 5889
diff changeset
   131
{
6540
4db000b2cc41 (svn r9011) -Codechange (r9003): Rework Utf8PrevChar so that it returns a pointer to the previous UTF8 character's first byte instead of a byte-length offset
Darkvater
parents: 6537
diff changeset
   132
	const char *ret = s;
9191
ae14770c829a (svn r12368) -Codechange: use explicit body for loops and conditions and remove -Wno-empty-body from the configure script
smatz
parents: 8844
diff changeset
   133
	while (IsUtf8Part(*--ret)) {}
6540
4db000b2cc41 (svn r9011) -Codechange (r9003): Rework Utf8PrevChar so that it returns a pointer to the previous UTF8 character's first byte instead of a byte-length offset
Darkvater
parents: 6537
diff changeset
   134
	return (char*)ret;
6537
dbf4fa55395f (svn r9003) -Codechange: Introduce a function Utf8PrevCharLen that finds the starting character of an UTF-8 sequence from a given position and returns the length to the first UTF-8 encoding byte of that sequence.
Darkvater
parents: 5889
diff changeset
   135
}
dbf4fa55395f (svn r9003) -Codechange: Introduce a function Utf8PrevCharLen that finds the starting character of an UTF-8 sequence from a given position and returns the length to the first UTF-8 encoding byte of that sequence.
Darkvater
parents: 5889
diff changeset
   136
5108
dc67d70b5a45 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
   137
dc67d70b5a45 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
   138
static inline bool IsPrintable(WChar c)
dc67d70b5a45 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
   139
{
dc67d70b5a45 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
   140
	if (c < 0x20)   return false;
dc67d70b5a45 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
   141
	if (c < 0xE000) return true;
dc67d70b5a45 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
   142
	if (c < 0xE200) return false;
dc67d70b5a45 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
   143
	return true;
dc67d70b5a45 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
   144
}
dc67d70b5a45 (svn r7182) -Feature: Merge utf8 branch. This brings us support for Unicode/UTF-8 and the option for fonts rendered by FreeType. Language changes to come.
peter1138
parents: 5101
diff changeset
   145
6541
ff575414f0d2 (svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'.
Darkvater
parents: 6540
diff changeset
   146
/**
9207
2b390a312384 (svn r12385) -Fix [FS#ln]: Non-breaking spaces should not be broken.
peter1138
parents: 9191
diff changeset
   147
 * Check whether UNICODE character is whitespace or not, i.e. whether
2b390a312384 (svn r12385) -Fix [FS#ln]: Non-breaking spaces should not be broken.
peter1138
parents: 9191
diff changeset
   148
 * this is a potential line-break character.
6541
ff575414f0d2 (svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'.
Darkvater
parents: 6540
diff changeset
   149
 * @param c UNICODE character to check
ff575414f0d2 (svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'.
Darkvater
parents: 6540
diff changeset
   150
 * @return a boolean value whether 'c' is a whitespace character or not
ff575414f0d2 (svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'.
Darkvater
parents: 6540
diff changeset
   151
 * @see http://www.fileformat.info/info/unicode/category/Zs/list.htm
ff575414f0d2 (svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'.
Darkvater
parents: 6540
diff changeset
   152
 */
ff575414f0d2 (svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'.
Darkvater
parents: 6540
diff changeset
   153
static inline bool IsWhitespace(WChar c)
ff575414f0d2 (svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'.
Darkvater
parents: 6540
diff changeset
   154
{
ff575414f0d2 (svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'.
Darkvater
parents: 6540
diff changeset
   155
	return
ff575414f0d2 (svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'.
Darkvater
parents: 6540
diff changeset
   156
	  c == 0x0020 /* SPACE */ ||
ff575414f0d2 (svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'.
Darkvater
parents: 6540
diff changeset
   157
	  c == 0x3000 /* IDEOGRAPHIC SPACE */
ff575414f0d2 (svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'.
Darkvater
parents: 6540
diff changeset
   158
	;
ff575414f0d2 (svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'.
Darkvater
parents: 6540
diff changeset
   159
}
ff575414f0d2 (svn r9012) -Fix/Feature (UTF8): When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'.
Darkvater
parents: 6540
diff changeset
   160
8710
52015340050c (svn r11777) -Codechange: split the string header and make do not include it when it's not necessary.
rubidium
parents: 8609
diff changeset
   161
#endif /* STRING_FUNC_H */