equal
deleted
inserted
replaced
72 */ |
72 */ |
73 bool IsValidChar(WChar key, CharSetFilter afilter); |
73 bool IsValidChar(WChar key, CharSetFilter afilter); |
74 |
74 |
75 size_t Utf8Decode(WChar *c, const char *s); |
75 size_t Utf8Decode(WChar *c, const char *s); |
76 size_t Utf8Encode(char *buf, WChar c); |
76 size_t Utf8Encode(char *buf, WChar c); |
|
77 size_t Utf8TrimString(char *s, size_t maxlen); |
77 |
78 |
78 |
79 |
79 static inline WChar Utf8Consume(const char **s) |
80 static inline WChar Utf8Consume(const char **s) |
80 { |
81 { |
81 WChar c; |
82 WChar c; |
95 if (c < 0x10000) return 3; |
96 if (c < 0x10000) return 3; |
96 if (c < 0x110000) return 4; |
97 if (c < 0x110000) return 4; |
97 |
98 |
98 /* Invalid valid, we encode as a '?' */ |
99 /* Invalid valid, we encode as a '?' */ |
99 return 1; |
100 return 1; |
|
101 } |
|
102 |
|
103 |
|
104 /** |
|
105 * Return the length of an UTF-8 encoded value based on a single char. This |
|
106 * char should be the first byte of the UTF-8 encoding. If not, or encoding |
|
107 * is invalid, return value is 0 |
|
108 */ |
|
109 static inline size_t Utf8EncodedCharLen(char c) |
|
110 { |
|
111 if (GB(c, 3, 5) == 0x1E) return 4; |
|
112 if (GB(c, 4, 4) == 0x0E) return 3; |
|
113 if (GB(c, 5, 3) == 0x06) return 2; |
|
114 if (GB(c, 7, 1) == 0x00) return 1; |
|
115 |
|
116 /* Invalid UTF8 start encoding */ |
|
117 return 0; |
100 } |
118 } |
101 |
119 |
102 |
120 |
103 /* Check if the given character is part of a UTF8 sequence */ |
121 /* Check if the given character is part of a UTF8 sequence */ |
104 static inline bool IsUtf8Part(char c) |
122 static inline bool IsUtf8Part(char c) |
127 if (c < 0xE000) return true; |
145 if (c < 0xE000) return true; |
128 if (c < 0xE200) return false; |
146 if (c < 0xE200) return false; |
129 return true; |
147 return true; |
130 } |
148 } |
131 |
149 |
|
150 /** |
|
151 * Check whether UNICODE character is whitespace or not |
|
152 * @param c UNICODE character to check |
|
153 * @return a boolean value whether 'c' is a whitespace character or not |
|
154 * @see http://www.fileformat.info/info/unicode/category/Zs/list.htm |
|
155 */ |
|
156 static inline bool IsWhitespace(WChar c) |
|
157 { |
|
158 return |
|
159 c == 0x0020 /* SPACE */ || |
|
160 c == 0x00A0 /* NO-BREAK SPACE */ || |
|
161 c == 0x3000 /* IDEOGRAPHIC SPACE */ |
|
162 ; |
|
163 } |
|
164 |
132 |
165 |
133 #endif /* STRING_H */ |
166 #endif /* STRING_H */ |