src/string.h
changeset 6215 bbd141b026b5
parent 6214 1f361897ed7f
child 6248 e4a2ed7e5613
equal deleted inserted replaced
6214:1f361897ed7f 6215:bbd141b026b5
    72  */
    72  */
    73 bool IsValidChar(WChar key, CharSetFilter afilter);
    73 bool IsValidChar(WChar key, CharSetFilter afilter);
    74 
    74 
    75 size_t Utf8Decode(WChar *c, const char *s);
    75 size_t Utf8Decode(WChar *c, const char *s);
    76 size_t Utf8Encode(char *buf, WChar c);
    76 size_t Utf8Encode(char *buf, WChar c);
       
    77 size_t Utf8TrimString(char *s, size_t maxlen);
    77 
    78 
    78 
    79 
    79 static inline WChar Utf8Consume(const char **s)
    80 static inline WChar Utf8Consume(const char **s)
    80 {
    81 {
    81 	WChar c;
    82 	WChar c;
    95 	if (c < 0x10000)    return 3;
    96 	if (c < 0x10000)    return 3;
    96 	if (c < 0x110000)   return 4;
    97 	if (c < 0x110000)   return 4;
    97 
    98 
    98 	/* Invalid valid, we encode as a '?' */
    99 	/* Invalid valid, we encode as a '?' */
    99 	return 1;
   100 	return 1;
       
   101 }
       
   102 
       
   103 
       
   104 /**
       
   105  * Return the length of an UTF-8 encoded value based on a single char. This
       
   106  * char should be the first byte of the UTF-8 encoding. If not, or encoding
       
   107  * is invalid, return value is 0
       
   108  */
       
   109 static inline size_t Utf8EncodedCharLen(char c)
       
   110 {
       
   111 	if (GB(c, 3, 5) == 0x1E) return 4;
       
   112 	if (GB(c, 4, 4) == 0x0E) return 3;
       
   113 	if (GB(c, 5, 3) == 0x06) return 2;
       
   114 	if (GB(c, 7, 1) == 0x00) return 1;
       
   115 
       
   116 	/* Invalid UTF8 start encoding */
       
   117 	return 0;
   100 }
   118 }
   101 
   119 
   102 
   120 
   103 /* Check if the given character is part of a UTF8 sequence */
   121 /* Check if the given character is part of a UTF8 sequence */
   104 static inline bool IsUtf8Part(char c)
   122 static inline bool IsUtf8Part(char c)
   127 	if (c < 0xE000) return true;
   145 	if (c < 0xE000) return true;
   128 	if (c < 0xE200) return false;
   146 	if (c < 0xE200) return false;
   129 	return true;
   147 	return true;
   130 }
   148 }
   131 
   149 
       
   150 /**
       
   151  * Check whether UNICODE character is whitespace or not
       
   152  * @param c UNICODE character to check
       
   153  * @return a boolean value whether 'c' is a whitespace character or not
       
   154  * @see http://www.fileformat.info/info/unicode/category/Zs/list.htm
       
   155  */
       
   156 static inline bool IsWhitespace(WChar c)
       
   157 {
       
   158 	return
       
   159 	  c == 0x0020 /* SPACE */ ||
       
   160 	  c == 0x00A0 /* NO-BREAK SPACE */ ||
       
   161 	  c == 0x3000 /* IDEOGRAPHIC SPACE */
       
   162 	;
       
   163 }
       
   164 
   132 
   165 
   133 #endif /* STRING_H */
   166 #endif /* STRING_H */