#include "nostrdb.h" #include "jsmn.h" #include "hex.h" #include "cursor.h" #include #include struct ndb_json_parser { const char *json; int json_len; struct ndb_builder builder; jsmn_parser json_parser; jsmntok_t *toks, *toks_end; int num_tokens; }; static inline int cursor_push_tag(struct cursor *cur, struct ndb_tag *tag) { return cursor_push_u16(cur, tag->count); } int ndb_builder_new(struct ndb_builder *builder, unsigned char *buf, int bufsize) { struct ndb_note *note; struct cursor mem; int half, size, str_indices_size; // come on bruh if (bufsize < sizeof(struct ndb_note) * 2) return 0; str_indices_size = bufsize / 32; size = bufsize - str_indices_size; half = size / 2; //debug("size %d half %d str_indices %d\n", size, half, str_indices_size); // make a safe cursor of our available memory make_cursor(buf, buf + bufsize, &mem); note = builder->note = (struct ndb_note *)buf; // take slices of the memory into subcursors if (!(cursor_slice(&mem, &builder->note_cur, half) && cursor_slice(&mem, &builder->strings, half) && cursor_slice(&mem, &builder->str_indices, str_indices_size))) { return 0; } memset(note, 0, sizeof(*note)); builder->note_cur.p += sizeof(*note); note->version = 1; return 1; } /// Check for small strings to pack static inline int ndb_builder_try_compact_str(struct ndb_builder *builder, const char *str, int len, union packed_str *pstr) { if (len == 0) { *pstr = ndb_char_to_packed_str(0); return 1; } else if (len == 1) { *pstr = ndb_char_to_packed_str(str[0]); return 1; } else if (len == 2) { *pstr = ndb_chars_to_packed_str(str[0], str[1]); return 1; } return 0; } static inline int ndb_json_parser_init(struct ndb_json_parser *p, const char *json, int json_len, unsigned char *buf, int bufsize) { int half = bufsize / 2; unsigned char *tok_start = buf + half; unsigned char *tok_end = buf + bufsize; p->toks = (jsmntok_t*)tok_start; p->toks_end = (jsmntok_t*)tok_end; p->num_tokens = 0; p->json = json; p->json_len = json_len; // ndb_builder gets the first half of the buffer, and jsmn gets the // second half. I like this way of alloating memory (without actually // dynamically allocating memory). You get one big chunk upfront and // then submodules can recursively subdivide it. Maybe you could do // something even more clever like golden-ratio style subdivision where // the more important stuff gets a larger chunk and then it spirals // downward into smaller chunks. Thanks for coming to my TED talk. if (!ndb_builder_new(&p->builder, buf, half)) return 0; jsmn_init(&p->json_parser); return 1; } static inline int ndb_json_parser_parse(struct ndb_json_parser *p) { int cap = ((unsigned char *)p->toks_end - (unsigned char*)p->toks)/sizeof(*p->toks); p->num_tokens = jsmn_parse(&p->json_parser, p->json, p->json_len, p->toks, cap); return p->num_tokens; } int ndb_builder_finalize(struct ndb_builder *builder, struct ndb_note **note) { int strings_len = builder->strings.p - builder->strings.start; unsigned char *end = builder->note_cur.p + strings_len; int total_size = end - builder->note_cur.start; // move the strings buffer next to the end of our ndb_note memmove(builder->note_cur.p, builder->strings.start, strings_len); // set the strings location builder->note->strings = builder->note_cur.p - builder->note_cur.start; // record the total size //builder->note->size = total_size; *note = builder->note; return total_size; } struct ndb_note * ndb_builder_note(struct ndb_builder *builder) { return builder->note; } /// find an existing string via str_indices. these indices only exist in the /// builder phase just for this purpose. static inline int ndb_builder_find_str(struct ndb_builder *builder, const char *str, int len, union packed_str *pstr) { // find existing matching string to avoid duplicate strings int indices = cursor_count(&builder->str_indices, sizeof(uint32_t)); for (int i = 0; i < indices; i++) { uint32_t index = ((uint32_t*)builder->str_indices.start)[i]; const char *some_str = (const char*)builder->strings.start + index; if (!strncmp(some_str, str, len)) { // found an existing matching str, use that index *pstr = ndb_offset_str(index); return 1; } } return 0; } static int ndb_builder_push_str(struct ndb_builder *builder, const char *str, int len, union packed_str *pstr) { uint32_t loc; // no string found, push a new one loc = builder->strings.p - builder->strings.start; if (!(cursor_push(&builder->strings, (unsigned char*)str, len) && cursor_push_byte(&builder->strings, '\0'))) { return 0; } *pstr = ndb_offset_str(loc); // record in builder indices. ignore return value, if we can't cache it // then whatever cursor_push_u32(&builder->str_indices, loc); return 1; } static int ndb_builder_push_unpacked_str(struct ndb_builder *builder, const char *str, int len, union packed_str *pstr) { if (ndb_builder_find_str(builder, str, len, pstr)) return 1; return ndb_builder_push_str(builder, str, len, pstr); } int ndb_builder_make_str(struct ndb_builder *builder, const char *str, int len, union packed_str *pstr) { if (ndb_builder_try_compact_str(builder, str, len, pstr)) return 1; return ndb_builder_push_unpacked_str(builder, str, len, pstr); } int ndb_builder_set_content(struct ndb_builder *builder, const char *content, int len) { builder->note->content_length = len; return ndb_builder_make_str(builder, content, len, &builder->note->content); } static inline int jsoneq(const char *json, jsmntok_t *tok, int tok_len, const char *s) { if (tok->type == JSMN_STRING && (int)strlen(s) == tok_len && memcmp(json + tok->start, s, tok_len) == 0) { return 1; } return 0; } static inline int toksize(jsmntok_t *tok) { return tok->end - tok->start; } static int ndb_builder_finalize_tag(struct ndb_builder *builder, union packed_str offset) { if (!cursor_push_u32(&builder->note_cur, offset.offset)) return 0; builder->current_tag->count++; return 1; } /// Unescape and push json strings static int ndb_builder_make_json_str(struct ndb_builder *builder, const char *str, int len, union packed_str *pstr, int *written) { // let's not care about de-duping these. we should just unescape // in-place directly into the strings table. const char *p, *end, *start; unsigned char *builder_start; // always try compact strings first if (ndb_builder_try_compact_str(builder, str, len, pstr)) return 1; end = str + len; start = str; // Initialize start to the beginning of the string *pstr = ndb_offset_str(builder->strings.p - builder->strings.start); builder_start = builder->strings.p; for (p = str; p < end; p++) { if (*p == '\\' && p+1 < end) { // Push the chunk of unescaped characters before this escape sequence if (start < p && !cursor_push(&builder->strings, (unsigned char *)start, p - start)) { return 0; } switch (*(p+1)) { case 't': if (!cursor_push_byte(&builder->strings, '\t')) return 0; break; case 'n': if (!cursor_push_byte(&builder->strings, '\n')) return 0; break; case 'r': if (!cursor_push_byte(&builder->strings, '\r')) return 0; break; case 'b': if (!cursor_push_byte(&builder->strings, '\b')) return 0; break; case 'f': if (!cursor_push_byte(&builder->strings, '\f')) return 0; break; case '\\': if (!cursor_push_byte(&builder->strings, '\\')) return 0; break; case '"': if (!cursor_push_byte(&builder->strings, '"')) return 0; break; case 'u': // these aren't handled yet return 0; default: if (!cursor_push_byte(&builder->strings, *p) || !cursor_push_byte(&builder->strings, *(p+1))) return 0; break; } p++; // Skip the character following the backslash start = p + 1; // Update the start pointer to the next character } } // Handle the last chunk after the last escape sequence (or if there are no escape sequences at all) if (start < p && !cursor_push(&builder->strings, (unsigned char *)start, p - start)) { return 0; } if (written) *written = builder->strings.p - builder_start; // TODO: dedupe these!? return cursor_push_byte(&builder->strings, '\0'); } static int ndb_builder_push_json_tag(struct ndb_builder *builder, const char *str, int len) { union packed_str pstr; if (!ndb_builder_make_json_str(builder, str, len, &pstr, NULL)) return 0; return ndb_builder_finalize_tag(builder, pstr); } // Push a json array into an ndb tag ["p", "abcd..."] -> struct ndb_tag static int ndb_builder_tag_from_json_array(struct ndb_json_parser *p, jsmntok_t *array) { jsmntok_t *str_tok; const char *str; if (array->size == 0) return 0; if (!ndb_builder_new_tag(&p->builder)) return 0; for (int i = 0; i < array->size; i++) { str_tok = &array[i+1]; str = p->json + str_tok->start; if (!ndb_builder_push_json_tag(&p->builder, str, toksize(str_tok))) { return 0; } } return 1; } // Push json tags into ndb data // [["t", "hashtag"], ["p", "abcde..."]] -> struct ndb_tags static inline int ndb_builder_process_json_tags(struct ndb_json_parser *p, jsmntok_t *array) { jsmntok_t *tag = array; if (array->size == 0) return 1; for (int i = 0; i < array->size; i++) { if (!ndb_builder_tag_from_json_array(p, &tag[i+1])) return 0; tag += tag[i+1].size; } return 1; } static int parse_unsigned_int(const char *start, int len, unsigned int *num) { unsigned int number = 0; const char *p = start, *end = start + len; int digits = 0; while (p < end) { char c = *p; if (c < '0' || c > '9') break; // Check for overflow char digit = c - '0'; if (number > (UINT_MAX - digit) / 10) return 0; // Overflow detected number = number * 10 + digit; p++; digits++; } if (digits == 0) return 0; *num = number; return 1; } int ndb_note_from_json(const char *json, int len, struct ndb_note **note, unsigned char *buf, int bufsize) { jsmntok_t *tok = NULL; unsigned char hexbuf[64]; int i, tok_len, res; const char *start; struct ndb_json_parser parser; ndb_json_parser_init(&parser, json, len, buf, bufsize); res = ndb_json_parser_parse(&parser); if (res < 0) return res; if (parser.num_tokens < 1 || parser.toks[0].type != JSMN_OBJECT) return 0; for (i = 1; i < parser.num_tokens; i++) { tok = &parser.toks[i]; start = json + tok->start; tok_len = toksize(tok); //printf("toplevel %.*s %d\n", tok_len, json + tok->start, tok->type); if (tok_len == 0 || i + 1 >= parser.num_tokens) continue; if (start[0] == 'p' && jsoneq(json, tok, tok_len, "pubkey")) { // pubkey tok = &parser.toks[i+1]; hex_decode(json + tok->start, toksize(tok), hexbuf, sizeof(hexbuf)); ndb_builder_set_pubkey(&parser.builder, hexbuf); } else if (tok_len == 2 && start[0] == 'i' && start[1] == 'd') { // id tok = &parser.toks[i+1]; hex_decode(json + tok->start, toksize(tok), hexbuf, sizeof(hexbuf)); // TODO: validate id ndb_builder_set_id(&parser.builder, hexbuf); } else if (tok_len == 3 && start[0] == 's' && start[1] == 'i' && start[2] == 'g') { // sig tok = &parser.toks[i+1]; hex_decode(json + tok->start, toksize(tok), hexbuf, sizeof(hexbuf)); ndb_builder_set_signature(&parser.builder, hexbuf); } else if (start[0] == 'k' && jsoneq(json, tok, tok_len, "kind")) { // kind tok = &parser.toks[i+1]; start = json + tok->start; if (tok->type != JSMN_PRIMITIVE || tok_len <= 0) return 0; if (!parse_unsigned_int(start, toksize(tok), &parser.builder.note->kind)) return 0; } else if (start[0] == 'c') { if (jsoneq(json, tok, tok_len, "created_at")) { // created_at tok = &parser.toks[i+1]; start = json + tok->start; if (tok->type != JSMN_PRIMITIVE || tok_len <= 0) return 0; if (!parse_unsigned_int(start, toksize(tok), &parser.builder.note->created_at)) return 0; } else if (jsoneq(json, tok, tok_len, "content")) { // content tok = &parser.toks[i+1]; union packed_str pstr; tok_len = toksize(tok); int written; if (!ndb_builder_make_json_str(&parser.builder, json + tok->start, tok_len, &pstr, &written)) { return 0; } parser.builder.note->content_length = written; parser.builder.note->content = pstr; } } else if (start[0] == 't' && jsoneq(json, tok, tok_len, "tags")) { tok = &parser.toks[i+1]; ndb_builder_process_json_tags(&parser, tok); i += tok->size; } } return ndb_builder_finalize(&parser.builder, note); } void ndb_builder_set_pubkey(struct ndb_builder *builder, unsigned char *pubkey) { memcpy(builder->note->pubkey, pubkey, 32); } void ndb_builder_set_id(struct ndb_builder *builder, unsigned char *id) { memcpy(builder->note->id, id, 32); } void ndb_builder_set_signature(struct ndb_builder *builder, unsigned char *signature) { memcpy(builder->note->signature, signature, 64); } void ndb_builder_set_kind(struct ndb_builder *builder, uint32_t kind) { builder->note->kind = kind; } int ndb_builder_new_tag(struct ndb_builder *builder) { builder->note->tags.count++; struct ndb_tag tag = {0}; builder->current_tag = (struct ndb_tag *)builder->note_cur.p; return cursor_push_tag(&builder->note_cur, &tag); } /// Push an element to the current tag /// /// Basic idea is to call ndb_builder_new_tag inline int ndb_builder_push_tag_str(struct ndb_builder *builder, const char *str, int len) { union packed_str pstr; if (!ndb_builder_make_str(builder, str, len, &pstr)) return 0; return ndb_builder_finalize_tag(builder, pstr); }