|
1 /* $Id$ */ |
|
2 |
|
3 /** @file string_func.h Functions related to low-level strings. */ |
|
4 |
|
5 #ifndef STRING_FUNC_H |
|
6 #define STRING_FUNC_H |
|
7 |
|
8 #include "core/bitmath_func.hpp" |
|
9 #include "string_type.h" |
|
10 |
|
11 /** |
|
12 * usage ttd_strlcpy(dst, src, lengthof(dst)); |
|
13 * @param dst destination buffer |
|
14 * @param src string to copy/concatenate |
|
15 * @param size size of the destination buffer |
|
16 */ |
|
17 void ttd_strlcat(char *dst, const char *src, size_t size); |
|
18 void ttd_strlcpy(char *dst, const char *src, size_t size); |
|
19 |
|
20 /** |
|
21 * usage: strecpy(dst, src, lastof(dst)); |
|
22 * @param dst destination buffer |
|
23 * @param src string to copy |
|
24 * @param last pointer to the last element in the dst array |
|
25 * if NULL no boundary check is performed |
|
26 * @return a pointer to the terminating \0 in the destination buffer |
|
27 */ |
|
28 char *strecat(char *dst, const char *src, const char *last); |
|
29 char *strecpy(char *dst, const char *src, const char *last); |
|
30 |
|
31 char *CDECL str_fmt(const char *str, ...); |
|
32 |
|
33 /** Scans the string for valid characters and if it finds invalid ones, |
|
34 * replaces them with a question mark '?' */ |
|
35 void str_validate(char *str); |
|
36 |
|
37 /** Scans the string for colour codes and strips them */ |
|
38 void str_strip_colours(char *str); |
|
39 |
|
40 /** Convert the given string to lowercase, only works with ASCII! */ |
|
41 void strtolower(char *str); |
|
42 |
|
43 |
|
44 static inline bool StrEmpty(const char *s) { return s == NULL || s[0] == '\0'; } |
|
45 |
|
46 |
|
47 /** Get the length of a string, within a limited buffer */ |
|
48 static inline int ttd_strnlen(const char *str, int maxlen) |
|
49 { |
|
50 const char *t; |
|
51 for (t = str; *t != '\0' && t - str < maxlen; t++); |
|
52 return t - str; |
|
53 } |
|
54 |
|
55 /** Convert the md5sum number to a 'hexadecimal' string, return next pos in buffer */ |
|
56 char *md5sumToString(char *buf, const char *last, const uint8 md5sum[16]); |
|
57 |
|
58 /** |
|
59 * Only allow certain keys. You can define the filter to be used. This makes |
|
60 * sure no invalid keys can get into an editbox, like BELL. |
|
61 * @param key character to be checked |
|
62 * @param afilter the filter to use |
|
63 * @return true or false depending if the character is printable/valid or not |
|
64 */ |
|
65 bool IsValidChar(WChar key, CharSetFilter afilter); |
|
66 |
|
67 size_t Utf8Decode(WChar *c, const char *s); |
|
68 size_t Utf8Encode(char *buf, WChar c); |
|
69 size_t Utf8TrimString(char *s, size_t maxlen); |
|
70 |
|
71 |
|
72 static inline WChar Utf8Consume(const char **s) |
|
73 { |
|
74 WChar c; |
|
75 *s += Utf8Decode(&c, *s); |
|
76 return c; |
|
77 } |
|
78 |
|
79 |
|
80 /** Return the length of a UTF-8 encoded character. |
|
81 * @param c Unicode character. |
|
82 * @return Length of UTF-8 encoding for character. |
|
83 */ |
|
84 static inline size_t Utf8CharLen(WChar c) |
|
85 { |
|
86 if (c < 0x80) return 1; |
|
87 if (c < 0x800) return 2; |
|
88 if (c < 0x10000) return 3; |
|
89 if (c < 0x110000) return 4; |
|
90 |
|
91 /* Invalid valid, we encode as a '?' */ |
|
92 return 1; |
|
93 } |
|
94 |
|
95 |
|
96 /** |
|
97 * Return the length of an UTF-8 encoded value based on a single char. This |
|
98 * char should be the first byte of the UTF-8 encoding. If not, or encoding |
|
99 * is invalid, return value is 0 |
|
100 * @param c char to query length of |
|
101 * @return requested size |
|
102 */ |
|
103 static inline size_t Utf8EncodedCharLen(char c) |
|
104 { |
|
105 if (GB(c, 3, 5) == 0x1E) return 4; |
|
106 if (GB(c, 4, 4) == 0x0E) return 3; |
|
107 if (GB(c, 5, 3) == 0x06) return 2; |
|
108 if (GB(c, 7, 1) == 0x00) return 1; |
|
109 |
|
110 /* Invalid UTF8 start encoding */ |
|
111 return 0; |
|
112 } |
|
113 |
|
114 |
|
115 /* Check if the given character is part of a UTF8 sequence */ |
|
116 static inline bool IsUtf8Part(char c) |
|
117 { |
|
118 return GB(c, 6, 2) == 2; |
|
119 } |
|
120 |
|
121 /** |
|
122 * Retrieve the previous UNICODE character in an UTF-8 encoded string. |
|
123 * @param s char pointer pointing to (the first char of) the next character |
|
124 * @return a pointer in 's' to the previous UNICODE character's first byte |
|
125 * @note The function should not be used to determine the length of the previous |
|
126 * encoded char because it might be an invalid/corrupt start-sequence |
|
127 */ |
|
128 static inline char *Utf8PrevChar(const char *s) |
|
129 { |
|
130 const char *ret = s; |
|
131 while (IsUtf8Part(*--ret)); |
|
132 return (char*)ret; |
|
133 } |
|
134 |
|
135 |
|
136 static inline bool IsPrintable(WChar c) |
|
137 { |
|
138 if (c < 0x20) return false; |
|
139 if (c < 0xE000) return true; |
|
140 if (c < 0xE200) return false; |
|
141 return true; |
|
142 } |
|
143 |
|
144 /** |
|
145 * Check whether UNICODE character is whitespace or not |
|
146 * @param c UNICODE character to check |
|
147 * @return a boolean value whether 'c' is a whitespace character or not |
|
148 * @see http://www.fileformat.info/info/unicode/category/Zs/list.htm |
|
149 */ |
|
150 static inline bool IsWhitespace(WChar c) |
|
151 { |
|
152 return |
|
153 c == 0x0020 /* SPACE */ || |
|
154 c == 0x00A0 /* NO-BREAK SPACE */ || |
|
155 c == 0x3000 /* IDEOGRAPHIC SPACE */ |
|
156 ; |
|
157 } |
|
158 |
|
159 #endif /* STRING_FUNC_H */ |