#include "flatcc_rtconfig.h" #include "flatcc_json_parser.h" #include "flatcc_assert.h" #define uoffset_t flatbuffers_uoffset_t #define soffset_t flatbuffers_soffset_t #define voffset_t flatbuffers_voffset_t #define utype_t flatbuffers_utype_t #define uoffset_size sizeof(uoffset_t) #define soffset_size sizeof(soffset_t) #define voffset_size sizeof(voffset_t) #define utype_size sizeof(utype_t) #define offset_size uoffset_size #if FLATCC_USE_GRISU3 && !defined(PORTABLE_USE_GRISU3) #define PORTABLE_USE_GRISU3 1 #endif #include "portable/pparsefp.h" #include "portable/pbase64.h" #if FLATCC_USE_SSE4_2 #ifdef __SSE4_2__ #define USE_SSE4_2 #endif #endif #ifdef USE_SSE4_2 #include #define cmpistri(end, haystack, needle, flags) \ if (end - haystack >= 16) do { \ int i; \ __m128i a = _mm_loadu_si128((const __m128i *)(needle)); \ do { \ __m128i b = _mm_loadu_si128((const __m128i *)(haystack)); \ i = _mm_cmpistri(a, b, flags); \ haystack += i; \ } while (i == 16 && end - haystack >= 16); \ } while(0) #endif const char *flatcc_json_parser_error_string(int err) { switch (err) { #define XX(no, str) \ case flatcc_json_parser_error_##no: \ return str; FLATCC_JSON_PARSE_ERROR_MAP(XX) #undef XX default: return "unknown"; } } const char *flatcc_json_parser_set_error(flatcc_json_parser_t *ctx, const char *loc, const char *end, int err) { if (!ctx->error) { ctx->error = err; ctx->pos = (int)(loc - ctx->line_start + 1); ctx->error_loc = loc; } return end; } const char *flatcc_json_parser_string_part(flatcc_json_parser_t *ctx, const char *buf, const char *end) { /* * Disabled because it doesn't catch all control characters, but is * useful for performance testing. */ #if 0 //#ifdef USE_SSE4_2 cmpistri(end, buf, "\"\\\0\r\n\t\v\f", _SIDD_POSITIVE_POLARITY); #else /* * Testing for signed char >= 0x20 would also capture UTF-8 * encodings that we could verify, and also invalid encodings like * 0xff, but we do not wan't to enforce strict UTF-8. */ while (buf != end && *buf != '\"' && ((unsigned char)*buf) >= 0x20 && *buf != '\\') { ++buf; } #endif if (buf == end) { return flatcc_json_parser_set_error(ctx, buf, end, flatcc_json_parser_error_unterminated_string); } if (*buf == '"') { return buf; } if (*buf < 0x20) { return flatcc_json_parser_set_error(ctx, buf, end, flatcc_json_parser_error_invalid_character); } return buf; } const char *flatcc_json_parser_space_ext(flatcc_json_parser_t *ctx, const char *buf, const char *end) { again: #ifdef USE_SSE4_2 /* * We can include line break, but then error reporting suffers and * it really makes no big difference. */ //cmpistri(end, buf, "\x20\t\v\f\r\n", _SIDD_NEGATIVE_POLARITY); cmpistri(end, buf, "\x20\t\v\f", _SIDD_NEGATIVE_POLARITY); #else #if FLATCC_ALLOW_UNALIGNED_ACCESS while (end - buf >= 16) { if (*buf > 0x20) { return buf; } #if FLATCC_JSON_PARSE_WIDE_SPACE if (((uint64_t *)buf)[0] != 0x2020202020202020) { descend: if (((uint32_t *)buf)[0] == 0x20202020) { buf += 4; } #endif if (((uint16_t *)buf)[0] == 0x2020) { buf += 2; } if (*buf == 0x20) { ++buf; } if (*buf > 0x20) { return buf; } break; #if FLATCC_JSON_PARSE_WIDE_SPACE } if (((uint64_t *)buf)[1] != 0x2020202020202020) { buf += 8; goto descend; } buf += 16; #endif } #endif #endif while (buf != end && *buf == 0x20) { ++buf; } while (buf != end && *buf <= 0x20) { /* Fall through comments needed to silence gcc 7 warnings. */ switch (*buf) { case 0x0d: buf += (end - buf > 1 && buf[1] == 0x0a); /* Consume following LF or treating CR as LF. */ fallthrough; case 0x0a: ++ctx->line; ctx->line_start = ++buf; continue; case 0x09: ++buf; continue; case 0x20: goto again; /* Don't consume here, sync with power of 2 spaces. */ default: return flatcc_json_parser_set_error(ctx, buf, end, flatcc_json_parser_error_unexpected_character); } } return buf; } static int decode_hex4(const char *buf, uint32_t *result) { uint32_t u, x; char c; u = 0; c = buf[0]; if (c >= '0' && c <= '9') { x = (uint32_t)(c - '0'); u = x << 12; } else { /* Lower case. */ c |= 0x20; if (c >= 'a' && c <= 'f') { x = (uint32_t)(c - 'a' + 10); u |= x << 12; } else { return -1; } } c = buf[1]; if (c >= '0' && c <= '9') { x = (uint32_t)(c - '0'); u |= x << 8; } else { /* Lower case. */ c |= 0x20; if (c >= 'a' && c <= 'f') { x = (uint32_t)(c - 'a' + 10); u |= x << 8; } else { return -1; } } c = buf[2]; if (c >= '0' && c <= '9') { x = (uint32_t)(c - '0'); u |= x << 4; } else { /* Lower case. */ c |= 0x20; if (c >= 'a' && c <= 'f') { x = (uint32_t)(c - 'a' + 10); u |= x << 4; } else { return -1; } } c = buf[3]; if (c >= '0' && c <= '9') { x = (uint32_t)(c - '0'); u |= x; } else { /* Lower case. */ c |= 0x20; if (c >= 'a' && c <= 'f') { x = (uint32_t)(c - 'a' + 10); u |= x; } else { return -1; } } *result = u; return 0; } static int decode_unicode_char(uint32_t u, char *code) { if (u <= 0x7f) { code[0] = 1; code[1] = (char)u; } else if (u <= 0x7ff) { code[0] = 2; code[1] = (char)(0xc0 | (u >> 6)); code[2] = (char)(0x80 | (u & 0x3f)); } else if (u <= 0xffff) { code[0] = 3; code[1] = (char)(0xe0 | (u >> 12)); code[2] = (char)(0x80 | ((u >> 6) & 0x3f)); code[3] = (char)(0x80 | (u & 0x3f)); } else if (u <= 0x10ffff) { code[0] = 4; code[1] = (char)(0xf0 | (u >> 18)); code[2] = (char)(0x80 | ((u >> 12) & 0x3f)); code[3] = (char)(0x80 | ((u >> 6) & 0x3f)); code[4] = (char)(0x80 | (u & 0x3f)); } else { code[0] = 0; return -1; } return 0; } static inline uint32_t combine_utf16_surrogate_pair(uint32_t high, uint32_t low) { return (high - 0xd800) * 0x400 + (low - 0xdc00) + 0x10000; } static inline int decode_utf16_surrogate_pair(uint32_t high, uint32_t low, char *code) { return decode_unicode_char(combine_utf16_surrogate_pair(high, low), code); } /* * UTF-8 code points can have up to 4 bytes but JSON can only * encode up to 3 bytes via the \uXXXX syntax. * To handle the range U+10000..U+10FFFF two UTF-16 surrogate * pairs must be used. If this is not detected, the pairs * survive in the output which is not valid but often tolerated. * Emojis generally require such a pair, unless encoded * unescaped in UTF-8. * * If a high surrogate pair is detected and a low surrogate pair * follows, the combined sequence is decoded as a 4 byte * UTF-8 sequence. Unpaired surrogate halves are decoded as is * despite being an invalid UTF-8 value. */ const char *flatcc_json_parser_string_escape(flatcc_json_parser_t *ctx, const char *buf, const char *end, flatcc_json_parser_escape_buffer_t code) { char c, v; uint32_t u, u2; if (end - buf < 2 || buf[0] != '\\') { code[0] = 0; return flatcc_json_parser_set_error(ctx, buf, end, flatcc_json_parser_error_invalid_escape); } switch (buf[1]) { case 'x': v = 0; code[0] = 1; if (end - buf < 4) { code[0] = 0; return flatcc_json_parser_set_error(ctx, buf, end, flatcc_json_parser_error_invalid_escape); } c = buf[2]; if (c >= '0' && c <= '9') { v |= (c - '0') << 4; } else { /* Lower case. */ c |= 0x20; if (c >= 'a' && c <= 'f') { v |= (c - 'a' + 10) << 4; } else { code[0] = 0; return flatcc_json_parser_set_error(ctx, buf, end, flatcc_json_parser_error_invalid_escape); } } c = buf[3]; if (c >= '0' && c <= '9') { v |= c - '0'; } else { /* Lower case. */ c |= 0x20; if (c >= 'a' && c <= 'f') { v |= c - 'a' + 10; } else { code[0] = 0; return flatcc_json_parser_set_error(ctx, buf, end, flatcc_json_parser_error_invalid_escape); } } code[1] = v; return buf + 4; case 'u': if (end - buf < 6) { code[0] = 0; return flatcc_json_parser_set_error(ctx, buf, end, flatcc_json_parser_error_invalid_escape); } if (decode_hex4(buf + 2, &u)) { code[0] = 0; return flatcc_json_parser_set_error(ctx, buf, end, flatcc_json_parser_error_invalid_escape); }; /* If a high UTF-16 surrogate half pair was detected */ if (u >= 0xd800 && u <= 0xdbff && /* and there is space for a matching low half pair */ end - buf >= 12 && /* and there is a second escape following immediately */ buf[6] == '\\' && buf[7] == 'u' && /* and it is valid hex */ decode_hex4(buf + 8, &u2) == 0 && /* and it is a low UTF-16 surrogate pair */ u2 >= 0xdc00 && u2 <= 0xdfff) { /* then decode the pair into a single 4 byte utf-8 sequence. */ if (decode_utf16_surrogate_pair(u, u2, code)) { code[0] = 0; return flatcc_json_parser_set_error(ctx, buf, end, flatcc_json_parser_error_invalid_escape); } return buf + 12; /* * Otherwise decode unmatched surrogate pairs as is any * other UTF-8. Some systems might depend on these surviving. * Leave ignored errors for the next parse step. */ } decode_unicode_char(u, code); return buf + 6; case 't': code[0] = 1; code[1] = '\t'; return buf + 2; case 'n': code[0] = 1; code[1] = '\n'; return buf + 2; case 'r': code[0] = 1; code[1] = '\r'; return buf + 2; case 'b': code[0] = 1; code[1] = '\b'; return buf + 2; case 'f': code[0] = 1; code[1] = '\f'; return buf + 2; case '\"': code[0] = 1; code[1] = '\"'; return buf + 2; case '\\': code[0] = 1; code[1] = '\\'; return buf + 2; case '/': code[0] = 1; code[1] = '/'; return buf + 2; default: code[0] = 0; return flatcc_json_parser_set_error(ctx, buf, end, flatcc_json_parser_error_invalid_escape); } } /* Only applies to unquoted constants during generic parsring, otherwise it is skipped as a string. */ const char *flatcc_json_parser_skip_constant(flatcc_json_parser_t *ctx, const char *buf, const char *end) { char c; const char *k; while (buf != end) { c = *buf; if ((c & 0x80) || (c == '_') || (c >= '0' && c <= '9') || c == '.') { ++buf; continue; } /* Upper case. */ c |= 0x20; if (c >= 'a' && c <= 'z') { ++buf; continue; } buf = flatcc_json_parser_space(ctx, (k = buf), end); if (buf == k) { return buf; } } return buf; } const char *flatcc_json_parser_match_constant(flatcc_json_parser_t *ctx, const char *buf, const char *end, int pos, int *more) { const char *mark = buf, *k = buf + pos; if (end - buf <= pos) { *more = 0; return buf; } #if FLATCC_JSON_PARSE_ALLOW_UNQUOTED if (ctx->unquoted) { buf = flatcc_json_parser_space(ctx, k, end); if (buf == end) { /* * We cannot make a decision on more. * Just return end and let parser handle sync point in * case it is able to resume parse later on. * For the same reason we do not lower ctx->unquoted. */ *more = 0; return buf; } if (buf != k) { char c = *buf; /* * Space was seen - and thus we have a valid match. * If the next char is an identifier start symbol * we raise the more flag to support syntax like: * * `flags: Hungry Sleepy Awake, ...` */ if (c == '_' || (c & 0x80)) { *more = 1; return buf; } c |= 0x20; if (c >= 'a' && c <= 'z') { *more = 1; return buf; } } /* * Space was not seen, so the match is only valid if followed * by a JSON separator symbol, and there cannot be more values * following so `more` is lowered. */ *more = 0; if (*buf == ',' || *buf == '}' || *buf == ']') { return buf; } return mark; } #endif buf = k; if (*buf == 0x20) { ++buf; while (buf != end && *buf == 0x20) { ++buf; } if (buf == end) { *more = 0; return buf; } /* We accept untrimmed space like " Green Blue ". */ if (*buf != '\"') { *more = 1; return buf; } } switch (*buf) { case '\\': *more = 0; return flatcc_json_parser_set_error(ctx, buf, end, flatcc_json_parser_error_invalid_escape); case '\"': buf = flatcc_json_parser_space(ctx, buf + 1, end); *more = 0; return buf; } *more = 0; return mark; } const char *flatcc_json_parser_unmatched_symbol(flatcc_json_parser_t *ctx, const char *buf, const char *end) { if (ctx->flags & flatcc_json_parser_f_skip_unknown) { buf = flatcc_json_parser_symbol_end(ctx, buf, end); buf = flatcc_json_parser_space(ctx, buf, end); if (buf != end && *buf == ':') { ++buf; buf = flatcc_json_parser_space(ctx, buf, end); } else { return flatcc_json_parser_set_error(ctx, buf, end, flatcc_json_parser_error_expected_colon); } return flatcc_json_parser_generic_json(ctx, buf, end); } else { return flatcc_json_parser_set_error(ctx, buf, end, flatcc_json_parser_error_unknown_symbol); } } static const char *__flatcc_json_parser_number(flatcc_json_parser_t *ctx, const char *buf, const char *end) { if (buf == end) { return buf; } if (*buf == '-') { ++buf; if (buf == end) { return flatcc_json_parser_set_error(ctx, buf, end, flatcc_json_parser_error_invalid_numeric); } } if (*buf == '0') { ++buf; } else { if (*buf < '1' || *buf > '9') { return flatcc_json_parser_set_error(ctx, buf, end, flatcc_json_parser_error_invalid_numeric); } ++buf; while (buf != end && *buf >= '0' && *buf <= '9') { ++buf; } } if (buf != end) { if (*buf == '.') { ++buf; if (*buf < '0' || *buf > '9') { return flatcc_json_parser_set_error(ctx, buf, end, flatcc_json_parser_error_invalid_numeric); } ++buf; while (buf != end && *buf >= '0' && *buf <= '9') { ++buf; } } } if (buf != end && (*buf == 'e' || *buf == 'E')) { ++buf; if (buf == end) { return flatcc_json_parser_set_error(ctx, buf, end, flatcc_json_parser_error_invalid_numeric); } if (*buf == '+' || *buf == '-') { ++buf; } if (buf == end || *buf < '0' || *buf > '9') { return flatcc_json_parser_set_error(ctx, buf, end, flatcc_json_parser_error_invalid_numeric); } ++buf; while (buf != end && *buf >= '0' && *buf <= '9') { ++buf; } } /* * For strtod termination we must ensure the tail is not valid * including non-json exponent types. The simplest approach is * to accept anything that could be valid json successor * characters and reject end of buffer since we expect a closing * '}'. * * The ',' is actually not safe if strtod uses a non-POSIX locale. */ if (buf != end) { switch (*buf) { case ',': case ':': case ']': case '}': case ' ': case '\r': case '\t': case '\n': case '\v': return buf; } } return flatcc_json_parser_set_error(ctx, buf, end, flatcc_json_parser_error_invalid_numeric); } const char *flatcc_json_parser_double(flatcc_json_parser_t *ctx, const char *buf, const char *end, double *v) { const char *next, *k; *v = 0.0; if (buf == end) { return buf; } k = buf; if (*buf == '-') ++k; if (end - k > 1 && (k[0] == '.' || (k[0] == '0' && k[1] == '0'))) { return flatcc_json_parser_set_error(ctx, buf, end, flatcc_json_parser_error_invalid_numeric); } next = parse_double(buf, (size_t)(end - buf), v); if (next == 0 || next == buf) { if (parse_double_isinf(*v)) { return flatcc_json_parser_set_error(ctx, buf, end, flatcc_json_parser_error_overflow); } return flatcc_json_parser_set_error(ctx, buf, end, flatcc_json_parser_error_invalid_numeric); } return next; } const char *flatcc_json_parser_float(flatcc_json_parser_t *ctx, const char *buf, const char *end, float *v) { const char *next, *k; *v = 0.0; if (buf == end) { return buf; } k = buf; if (*buf == '-') ++k; if (end - k > 1 && (k[0] == '.' || (k[0] == '0' && k[1] == '0'))) { return flatcc_json_parser_set_error(ctx, buf, end, flatcc_json_parser_error_invalid_numeric); } next = parse_float(buf, (size_t)(end - buf), v); if (next == 0 || next == buf) { if (parse_float_isinf(*v)) { return flatcc_json_parser_set_error(ctx, buf, end, flatcc_json_parser_error_overflow); } return flatcc_json_parser_set_error(ctx, buf, end, flatcc_json_parser_error_invalid_numeric); } return next; } const char *flatcc_json_parser_generic_json(flatcc_json_parser_t *ctx, const char *buf, const char *end) { char stack[FLATCC_JSON_PARSE_GENERIC_MAX_NEST]; char *sp, *spend; const char *k; flatcc_json_parser_escape_buffer_t code; int more = 0; sp = stack; spend = sp + FLATCC_JSON_PARSE_GENERIC_MAX_NEST; again: if (buf == end) { return buf; } if (sp != stack && sp[-1] == '}') { /* Inside an object, about to read field name. */ buf = flatcc_json_parser_symbol_start(ctx, buf, end); buf = flatcc_json_parser_symbol_end(ctx, buf, end); buf = flatcc_json_parser_space(ctx, buf, end); if (buf == end) { return flatcc_json_parser_set_error(ctx, buf, end, flatcc_json_parser_error_unbalanced_object); } if (*buf != ':') { return flatcc_json_parser_set_error(ctx, buf, end, flatcc_json_parser_error_expected_colon); } buf = flatcc_json_parser_space(ctx, buf + 1, end); } switch (*buf) { case '\"': buf = flatcc_json_parser_string_start(ctx, buf, end); while (buf != end && *buf != '\"') { buf = flatcc_json_parser_string_part(ctx, buf, end); if (buf != end && *buf == '\"') { break; } buf = flatcc_json_parser_string_escape(ctx, buf, end, code); } buf = flatcc_json_parser_string_end(ctx, buf, end); break; case '-': case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': buf = __flatcc_json_parser_number(ctx, buf, end); break; #if !FLATCC_JSON_PARSE_ALLOW_UNQUOTED case 't': case 'f': { uint8_t v; buf = flatcc_json_parser_bool(ctx, (k = buf), end, &v); if (k == buf) { return flatcc_json_parser_set_error(ctx, buf, end, flatcc_json_parser_error_unexpected_character); } } break; case 'n': buf = flatcc_json_parser_null((k = buf), end); if (k == buf) { return flatcc_json_parser_set_error(ctx, buf, end, flatcc_json_parser_error_unexpected_character); } break; #endif case '[': if (sp == spend) { return flatcc_json_parser_set_error(ctx, buf, end, flatcc_json_parser_error_deep_nesting); } *sp++ = ']'; buf = flatcc_json_parser_space(ctx, buf + 1, end); if (buf != end && *buf == ']') { break; } goto again; case '{': if (sp == spend) { return flatcc_json_parser_set_error(ctx, buf, end, flatcc_json_parser_error_deep_nesting); } *sp++ = '}'; buf = flatcc_json_parser_space(ctx, buf + 1, end); if (buf != end && *buf == '}') { break; } goto again; default: #if FLATCC_JSON_PARSE_ALLOW_UNQUOTED buf = flatcc_json_parser_skip_constant(ctx, (k = buf), end); if (k == buf) { return flatcc_json_parser_set_error(ctx, buf, end, flatcc_json_parser_error_unexpected_character); } break; #else return flatcc_json_parser_set_error(ctx, buf, end, flatcc_json_parser_error_unexpected_character); #endif } while (buf != end && sp != stack) { --sp; if (*sp == ']') { buf = flatcc_json_parser_array_end(ctx, buf, end, &more); } else { buf = flatcc_json_parser_object_end(ctx, buf, end, &more); } if (more) { ++sp; goto again; } } if (buf == end && sp != stack) { return flatcc_json_parser_set_error(ctx, buf, end, sp[-1] == ']' ? flatcc_json_parser_error_unbalanced_array : flatcc_json_parser_error_unbalanced_object); } /* Any ',', ']', or '}' belongs to parent context. */ return buf; } const char *flatcc_json_parser_integer(flatcc_json_parser_t *ctx, const char *buf, const char *end, int *value_sign, uint64_t *value) { uint64_t x0, x = 0; const char *k; if (buf == end) { return buf; } k = buf; *value_sign = *buf == '-'; buf += *value_sign; while (buf != end && *buf >= '0' && *buf <= '9') { x0 = x; x = x * 10 + (uint64_t)(*buf - '0'); if (x0 > x) { return flatcc_json_parser_set_error(ctx, buf, end, value_sign ? flatcc_json_parser_error_underflow : flatcc_json_parser_error_overflow); } ++buf; } if (buf == k) { /* Give up, but don't fail the parse just yet, it might be a valid symbol. */ return buf; } if (buf != end && (*buf == 'e' || *buf == 'E' || *buf == '.')) { return flatcc_json_parser_set_error(ctx, buf, end, flatcc_json_parser_error_float_unexpected); } *value = x; return buf; } /* Array Creation - depends on flatcc builder. */ const char *flatcc_json_parser_build_uint8_vector_base64(flatcc_json_parser_t *ctx, const char *buf, const char *end, flatcc_builder_ref_t *ref, int urlsafe) { const char *mark; uint8_t *pval; size_t max_len; size_t decoded_len, src_len; int mode; int ret; mode = urlsafe ? base64_mode_url : base64_mode_rfc4648; buf = flatcc_json_parser_string_start(ctx, buf, end); buf = flatcc_json_parser_string_part(ctx, (mark = buf), end); if (buf == end || *buf != '\"') { goto base64_failed; } max_len = base64_decoded_size((size_t)(buf - mark)); if (flatcc_builder_start_vector(ctx->ctx, 1, 1, FLATBUFFERS_COUNT_MAX((utype_size)))) { goto failed; } if (!(pval = flatcc_builder_extend_vector(ctx->ctx, max_len))) { goto failed; } src_len = (size_t)(buf - mark); decoded_len = max_len; if ((ret = base64_decode(pval, (const uint8_t *)mark, &decoded_len, &src_len, mode))) { buf = mark + src_len; goto base64_failed; } if (src_len != (size_t)(buf - mark)) { buf = mark + src_len; goto base64_failed; } if (decoded_len < max_len) { if (flatcc_builder_truncate_vector(ctx->ctx, max_len - decoded_len)) { goto failed; } } if (!(*ref = flatcc_builder_end_vector(ctx->ctx))) { goto failed; } return flatcc_json_parser_string_end(ctx, buf, end); failed: *ref = 0; return flatcc_json_parser_set_error(ctx, buf, end, flatcc_json_parser_error_runtime); base64_failed: *ref = 0; return flatcc_json_parser_set_error(ctx, buf, end, urlsafe ? flatcc_json_parser_error_base64url : flatcc_json_parser_error_base64); } const char *flatcc_json_parser_char_array(flatcc_json_parser_t *ctx, const char *buf, const char *end, char *s, size_t n) { flatcc_json_parser_escape_buffer_t code; const char *mark; size_t k = 0; buf = flatcc_json_parser_string_start(ctx, buf, end); if (buf != end) while (*buf != '\"') { buf = flatcc_json_parser_string_part(ctx, (mark = buf), end); if (buf == end) return end; k = (size_t)(buf - mark); if (k > n) { if (!(ctx->flags & flatcc_json_parser_f_skip_array_overflow)) { return flatcc_json_parser_set_error(ctx, buf, end, flatcc_json_parser_error_array_overflow); } k = n; /* Might truncate UTF-8. */ } memcpy(s, mark, k); s += k; n -= k; if (*buf == '\"') break; buf = flatcc_json_parser_string_escape(ctx, buf, end, code); if (buf == end) return end; k = (size_t)code[0]; mark = code + 1; if (k > n) { if (!(ctx->flags & flatcc_json_parser_f_skip_array_overflow)) { return flatcc_json_parser_set_error(ctx, buf, end, flatcc_json_parser_error_array_overflow); } k = n; /* Might truncate UTF-8. */ } memcpy(s, mark, k); s += k; n -= k; } if (n != 0) { if (ctx->flags & flatcc_json_parser_f_reject_array_underflow) { return flatcc_json_parser_set_error(ctx, buf, end, flatcc_json_parser_error_array_underflow); } memset(s, 0, n - k); } return flatcc_json_parser_string_end(ctx, buf, end); } /* String Creation - depends on flatcc builder. */ const char *flatcc_json_parser_build_string(flatcc_json_parser_t *ctx, const char *buf, const char *end, flatcc_builder_ref_t *ref) { flatcc_json_parser_escape_buffer_t code; const char *mark; buf = flatcc_json_parser_string_start(ctx, buf, end); buf = flatcc_json_parser_string_part(ctx, (mark = buf), end); if (buf != end && *buf == '\"') { *ref = flatcc_builder_create_string(ctx->ctx, mark, (size_t)(buf - mark)); } else { if (flatcc_builder_start_string(ctx->ctx) || 0 == flatcc_builder_append_string(ctx->ctx, mark, (size_t)(buf - mark))) goto failed; while (buf != end && *buf != '\"') { buf = flatcc_json_parser_string_escape(ctx, buf, end, code); if (0 == flatcc_builder_append_string(ctx->ctx, code + 1, (size_t)code[0])) goto failed; if (end != (buf = flatcc_json_parser_string_part(ctx, (mark = buf), end))) { if (0 == flatcc_builder_append_string(ctx->ctx, mark, (size_t)(buf - mark))) goto failed; } } *ref = flatcc_builder_end_string(ctx->ctx); } return flatcc_json_parser_string_end(ctx, buf, end); failed: *ref = 0; return buf; } /* UNIONS */ /* * Unions are difficult to parse because the type field may appear after * the union table and because having two fields opens up for many more * possible error scenarios. We must store each union of a table * temporarily - this cannot be in the generated table parser function * because there could be many unions (about 2^15 with default voffsets) * although usually there will be only a few. We can also not store the * data encoded in the existing table buffer in builder because we may * have to remove it due to schema forwarding and removing it messes up * the table layout. We also cannot naively allocate it dynamically for * performance reasons. Instead we place the temporary union data in a * separate frame from the table buffer, but on a similar stack. This is * called the user stack and we manage one frame per table that is known * to contain unions. * * Even the temporary structures in place we still cannot parse a union * before we know its type. Due to JSON typically sorting fields * alphabetically in various pretty printers, we are likely to receive * the type late with (`_type` following ``. * To deal with this we store a backtracking pointer and parses the * table generically in a first pass and reparse the table once the type * is known. This can happen recursively with nested tables containing * unions which is why we need to have a stack frame. * * If the type field is stored first we just store the type in the * custom frame and immediately parses the table with the right type * once we see it. The parse will be much faster and we can strongly * recommend that flatbuffer serializers do this, but we cannot require * it. * * The actual overhead of dealing with the custom stack frame is fairly * cheap once we get past the first custom stack allocation. * * We cannot update the builder before both the table and table type * has been parsed because the the type might have to be ingored due * to schema forwarding. Therefore the union type must be cached or * reread. This happens trivially be calling the union parser with the * type as argument, but it is important to be aware of before * refactoring the code. * * The user frame is created at table start and remains valid until * table exit, but we cannot assume the pointers to the frame remain * valid. Specifically we cannot use frame pointers after calling * the union parser. This means the union type must be cached or reread * so it can be added to the table. Because the type is passed to * the union parser this caching happens automatically but it is still * important to be aware that it is required. * * The frame reserves temporary information for all unions the table * holds, enumerated 0 <= `union_index` < `union_total` * where the `union_total` is fixed type specific number. * * The `type_present` is needed because union types range from 0..255 * and we need an extra bit do distinguish not present from union type * `NONE = 0`. */ typedef struct { const char *backtrace; const char *line_start; int line; uint8_t type_present; uint8_t type; /* Union vectors: */ uoffset_t count; size_t h_types; } __flatcc_json_parser_union_entry_t; typedef struct { size_t union_total; size_t union_count; __flatcc_json_parser_union_entry_t unions[1]; } __flatcc_json_parser_union_frame_t; const char *flatcc_json_parser_prepare_unions(flatcc_json_parser_t *ctx, const char *buf, const char *end, size_t union_total, size_t *handle) { __flatcc_json_parser_union_frame_t *f; if (!(*handle = flatcc_builder_enter_user_frame(ctx->ctx, sizeof(__flatcc_json_parser_union_frame_t) + (union_total - 1) * sizeof(__flatcc_json_parser_union_entry_t)))) { return flatcc_json_parser_set_error(ctx, buf, end, flatcc_json_parser_error_runtime); } f = flatcc_builder_get_user_frame_ptr(ctx->ctx, *handle); /* Frames have zeroed memory. */ f->union_total = union_total; return buf; } const char *flatcc_json_parser_finalize_unions(flatcc_json_parser_t *ctx, const char *buf, const char *end, size_t handle) { __flatcc_json_parser_union_frame_t *f = flatcc_builder_get_user_frame_ptr(ctx->ctx, handle); if (f->union_count) { buf = flatcc_json_parser_set_error(ctx, buf, end, flatcc_json_parser_error_union_incomplete); } flatcc_builder_exit_user_frame_at(ctx->ctx, handle); return buf; } const char *flatcc_json_parser_union(flatcc_json_parser_t *ctx, const char *buf, const char *end, size_t union_index, flatbuffers_voffset_t id, size_t handle, flatcc_json_parser_union_f *union_parser) { __flatcc_json_parser_union_frame_t *f = flatcc_builder_get_user_frame_ptr(ctx->ctx, handle); __flatcc_json_parser_union_entry_t *e = &f->unions[union_index]; flatcc_builder_union_ref_t uref; if (e->backtrace) { return flatcc_json_parser_set_error(ctx, buf, end, flatcc_json_parser_error_duplicate); } if (!e->type_present) { /* If we supported table: null, we should not count it, but we don't. */ ++f->union_count; e->line = ctx->line; e->line_start = ctx->line_start; buf = flatcc_json_parser_generic_json(ctx, (e->backtrace = buf), end); } else { uref.type = e->type; if (e->type == 0) { return flatcc_json_parser_set_error(ctx, buf, end, flatcc_json_parser_error_union_none_present); } --f->union_count; buf = union_parser(ctx, buf, end, e->type, &uref.value); if (buf != end) { if (flatcc_builder_table_add_union(ctx->ctx, id, uref)) { return flatcc_json_parser_set_error(ctx, buf, end, flatcc_json_parser_error_duplicate); } } } return buf; } const char *flatcc_json_parser_union_type(flatcc_json_parser_t *ctx, const char *buf, const char *end, size_t union_index, flatbuffers_voffset_t id, size_t handle, flatcc_json_parser_integral_symbol_f *type_parsers[], flatcc_json_parser_union_f *union_parser) { __flatcc_json_parser_union_frame_t *f = flatcc_builder_get_user_frame_ptr(ctx->ctx, handle); __flatcc_json_parser_union_entry_t *e = f->unions + union_index; flatcc_builder_union_ref_t uref; const char *mark; int line; const char *line_start; if (e->type_present) { return flatcc_json_parser_set_error(ctx, buf, end, flatcc_json_parser_error_duplicate); } e->type_present = 1; buf = flatcc_json_parser_uint8(ctx, (mark = buf), end, &e->type); if (mark == buf) { buf = flatcc_json_parser_symbolic_uint8(ctx, buf, end, type_parsers, &e->type); } /* Only count the union if the type is not NONE. */ if (e->backtrace == 0) { f->union_count += e->type != 0; return buf; } FLATCC_ASSERT(f->union_count); --f->union_count; /* * IMPORTANT: we cannot access any value in the frame or entry * pointer after calling union parse because it might cause the * stack to reallocate. We should read the frame pointer again if * needed - we don't but remember it if refactoring code. * * IMPORTANT 2: Do not assign buf here. We are backtracking. */ line = ctx->line; line_start = ctx->line_start; ctx->line = e->line; ctx->line_start = e->line_start; uref.type = e->type; if (end == union_parser(ctx, e->backtrace, end, e->type, &uref.value)) { return end; } if (flatcc_builder_table_add_union(ctx->ctx, id, uref)) { return flatcc_json_parser_set_error(ctx, buf, end, flatcc_json_parser_error_duplicate); } ctx->line = line; ctx->line_start = line_start; return buf; } static const char *_parse_union_vector(flatcc_json_parser_t *ctx, const char *buf, const char *end, size_t h_types, uoffset_t count, flatbuffers_voffset_t id, flatcc_json_parser_union_f *union_parser) { flatcc_builder_ref_t ref = 0, *pref; utype_t *types; int more; size_t i; if (flatcc_builder_start_offset_vector(ctx->ctx)) goto failed; buf = flatcc_json_parser_array_start(ctx, buf, end, &more); i = 0; while (more) { if (i == count) { return flatcc_json_parser_set_error(ctx, buf, end, flatcc_json_parser_error_union_vector_length); } /* Frame must be restored between calls to table parser. */ types = flatcc_builder_get_user_frame_ptr(ctx->ctx, h_types); buf = union_parser(ctx, buf, end, types[i], &ref); if (buf == end) { return buf; } if (!(pref = flatcc_builder_extend_offset_vector(ctx->ctx, 1))) goto failed; *pref = ref; buf = flatcc_json_parser_array_end(ctx, buf, end, &more); ++i; } if (i != count) { return flatcc_json_parser_set_error(ctx, buf, end, flatcc_json_parser_error_union_vector_length); } /* Frame must be restored between calls to table parser. */ types = flatcc_builder_get_user_frame_ptr(ctx->ctx, h_types); if (!(ref = flatcc_builder_end_offset_vector_for_unions(ctx->ctx, types))) goto failed; if (!(pref = flatcc_builder_table_add_offset(ctx->ctx, id))) goto failed; *pref = ref; return buf; failed: return flatcc_json_parser_set_error(ctx, buf, end, flatcc_json_parser_error_runtime); } const char *flatcc_json_parser_union_vector(flatcc_json_parser_t *ctx, const char *buf, const char *end, size_t union_index, flatbuffers_voffset_t id, size_t handle, flatcc_json_parser_union_f *union_parser) { __flatcc_json_parser_union_frame_t *f = flatcc_builder_get_user_frame_ptr(ctx->ctx, handle); __flatcc_json_parser_union_entry_t *e = f->unions + union_index; if (e->backtrace) { return flatcc_json_parser_set_error(ctx, buf, end, flatcc_json_parser_error_duplicate); } if (!e->type_present) { ++f->union_count; e->line = ctx->line; e->line_start = ctx->line_start; buf = flatcc_json_parser_generic_json(ctx, (e->backtrace = buf), end); } else { --f->union_count; buf = _parse_union_vector(ctx, buf, end, e->h_types, e->count, id, union_parser); } return buf; } const char *flatcc_json_parser_union_type_vector(flatcc_json_parser_t *ctx, const char *buf, const char *end, size_t union_index, flatbuffers_voffset_t id, size_t handle, flatcc_json_parser_integral_symbol_f *type_parsers[], flatcc_json_parser_union_f *union_parser, flatcc_json_parser_is_known_type_f accept_type) { __flatcc_json_parser_union_frame_t *f = flatcc_builder_get_user_frame_ptr(ctx->ctx, handle); __flatcc_json_parser_union_entry_t *e = f->unions + union_index; const char *mark; int line; const char *line_start; int more; utype_t val; void *pval; flatcc_builder_ref_t ref, *pref; utype_t *types; size_t size; size_t h_types; uoffset_t count; #if FLATBUFFERS_UTYPE_MAX != UINT8_MAX #error "Update union vector parser to support current union type definition." #endif if (e->type_present) { return flatcc_json_parser_set_error(ctx, buf, end, flatcc_json_parser_error_duplicate); } e->type_present = 1; if (flatcc_builder_start_vector(ctx->ctx, 1, 1, FLATBUFFERS_COUNT_MAX((utype_size)))) goto failed; buf = flatcc_json_parser_array_start(ctx, buf, end, &more); while (more) { if (!(pval = flatcc_builder_extend_vector(ctx->ctx, 1))) goto failed; buf = flatcc_json_parser_uint8(ctx, (mark = buf), end, &val); if (mark == buf) { buf = flatcc_json_parser_symbolic_uint8(ctx, (mark = buf), end, type_parsers, &val); if (buf == mark || buf == end) goto failed; } /* Parse unknown types as NONE */ if (!accept_type(val)) { if (!(ctx->flags & flatcc_json_parser_f_skip_unknown)) { return flatcc_json_parser_set_error(ctx, buf, end, flatcc_json_parser_error_unknown_union); } val = 0; } flatbuffers_uint8_write_to_pe(pval, val); buf = flatcc_json_parser_array_end(ctx, buf, end, &more); } count = (uoffset_t)flatcc_builder_vector_count(ctx->ctx); e->count = count; size = count * utype_size; /* Store type vector so it is accessible to the table vector parser. */ h_types = flatcc_builder_enter_user_frame(ctx->ctx, size); types = flatcc_builder_get_user_frame_ptr(ctx->ctx, h_types); memcpy(types, flatcc_builder_vector_edit(ctx->ctx), size); if (!((ref = flatcc_builder_end_vector(ctx->ctx)))) goto failed; if (!(pref = flatcc_builder_table_add_offset(ctx->ctx, id - 1))) goto failed; *pref = ref; /* Restore union frame after possible invalidation due to types frame allocation. */ f = flatcc_builder_get_user_frame_ptr(ctx->ctx, handle); e = f->unions + union_index; e->h_types = h_types; if (e->backtrace == 0) { ++f->union_count; return buf; } FLATCC_ASSERT(f->union_count); --f->union_count; line = ctx->line; line_start = ctx->line_start; ctx->line = e->line; ctx->line_start = e->line_start; /* We must not assign buf here because we are backtracking. */ if (end == _parse_union_vector(ctx, e->backtrace, end, h_types, count, id, union_parser)) return end; /* * NOTE: We do not need the user frame anymore, but if we did, it * would have to be restored from its handle due to the above parse. */ ctx->line = line; ctx->line_start = line_start; return buf; failed: return flatcc_json_parser_set_error(ctx, buf, end, flatcc_json_parser_error_runtime); } int flatcc_json_parser_table_as_root(flatcc_builder_t *B, flatcc_json_parser_t *ctx, const char *buf, size_t bufsiz, int flags, const char *fid, flatcc_json_parser_table_f *parser) { flatcc_json_parser_t _ctx; flatcc_builder_ref_t root; int builder_flags = flags & flatcc_json_parser_f_with_size ? flatcc_builder_with_size : 0; ctx = ctx ? ctx : &_ctx; flatcc_json_parser_init(ctx, B, buf, buf + bufsiz, flags); if (flatcc_builder_start_buffer(B, fid, 0, builder_flags)) return -1; buf = parser(ctx, buf, buf + bufsiz, &root); if (ctx->error) { return ctx->error; } if (!flatcc_builder_end_buffer(B, root)) return -1; ctx->end_loc = buf; return 0; } int flatcc_json_parser_struct_as_root(flatcc_builder_t *B, flatcc_json_parser_t *ctx, const char *buf, size_t bufsiz, int flags, const char *fid, flatcc_json_parser_table_f *parser) { flatcc_json_parser_t _ctx; flatcc_builder_ref_t root; int builder_flags = flags & flatcc_json_parser_f_with_size ? flatcc_builder_with_size : 0; ctx = ctx ? ctx : &_ctx; flatcc_json_parser_init(ctx, B, buf, buf + bufsiz, flags); if (flatcc_builder_start_buffer(B, fid, 0, builder_flags)) return -1; buf = parser(ctx, buf, buf + bufsiz, &root); if (ctx->error) { return ctx->error; } if (!flatcc_builder_end_buffer(B, root)) return -1; ctx->end_loc = buf; return 0; }