/* MIT (BSD) license - see LICENSE file for details */ #ifndef CCAN_UTF8_H #define CCAN_UTF8_H #include #include #include /* Unicode is limited to 21 bits. */ #define UTF8_MAX_LEN 4 struct utf8_state { /* How many characters we are expecting as part of this Unicode point */ uint16_t total_len; /* How many characters we've already seen. */ uint16_t used_len; /* Compound character, aka Unicode point. */ uint32_t c; }; #define UTF8_STATE_INIT { 0, 0, 0 } static inline void utf8_state_init(struct utf8_state *utf8_state) { memset(utf8_state, 0, sizeof(*utf8_state)); } /** * utf8_decode - continue UTF8 decoding with this character. * @utf8_state - initialized UTF8 state. * @c - the character. * * Returns false if it needs another character to give results. * Otherwise returns true, @utf8_state can be reused without initializeation, * and sets errno: * 0: success * EINVAL: bad encoding (including a NUL character). * EFBIG: not a minimal encoding. * ERANGE: encoding of invalid character. * * You can extract the character from @utf8_state->c; @utf8_state->used_len * indicates how many characters have been consumed. */ bool utf8_decode(struct utf8_state *utf8_state, char c); /** * utf8_encode - encode a point into UTF8. * @point - Unicode point to include. * @dest - buffer to fill. * * Returns 0 if point was invalid, otherwise bytes of dest used. * Sets errno to ERANGE if point was invalid. */ size_t utf8_encode(uint32_t point, char dest[UTF8_MAX_LEN]); /* Check for valid UTF-8 */ bool utf8_check(const void *vbuf, size_t buflen); #endif /* CCAN_UTF8_H */