/* MIT (BSD) license - see LICENSE file for details */
#ifndef CCAN_UTF8_H
#define CCAN_UTF8_H
#include <inttypes.h>
#include <stdbool.h>
#include <string.h>

/* Unicode is limited to 21 bits. */
#define UTF8_MAX_LEN    4

struct utf8_state {
    /* How many characters we are expecting as part of this Unicode point */
    uint16_t total_len;
    /* How many characters we've already seen. */
    uint16_t used_len;
    /* Compound character, aka Unicode point. */
    uint32_t c;
};

#define UTF8_STATE_INIT { 0, 0, 0 }

static inline void utf8_state_init(struct utf8_state *utf8_state)
{
    memset(utf8_state, 0, sizeof(*utf8_state));
}

/**
 * utf8_decode - continue UTF8 decoding with this character.
 * @utf8_state - initialized UTF8 state.
 * @c - the character.
 *
 * Returns false if it needs another character to give results.
 * Otherwise returns true, @utf8_state can be reused without initializeation,
 * and sets errno:
 * 0: success
 * EINVAL: bad encoding (including a NUL character).
 * EFBIG: not a minimal encoding.
 * ERANGE: encoding of invalid character.
 *
 * You can extract the character from @utf8_state->c; @utf8_state->used_len
 * indicates how many characters have been consumed.
 */
bool utf8_decode(struct utf8_state *utf8_state, char c);

/**
 * utf8_encode - encode a point into UTF8.
 * @point - Unicode point to include.
 * @dest - buffer to fill.
 *
 * Returns 0 if point was invalid, otherwise bytes of dest used.
 * Sets errno to ERANGE if point was invalid.
 */
size_t utf8_encode(uint32_t point, char dest[UTF8_MAX_LEN]);

/* Check for valid UTF-8 */
bool utf8_check(const void *vbuf, size_t buflen);
#endif /* CCAN_UTF8_H */