1
0
mirror of git://jb55.com/damus synced 2024-10-04 19:00:42 +00:00

ndb: update lib

This commit is contained in:
William Casarin 2023-07-22 16:57:16 -07:00
parent c74993366b
commit 4da23390f8
2 changed files with 237 additions and 31 deletions

View File

@ -4,6 +4,7 @@
#include "hex.h" #include "hex.h"
#include "cursor.h" #include "cursor.h"
#include <stdlib.h> #include <stdlib.h>
#include <limits.h>
struct ndb_json_parser { struct ndb_json_parser {
const char *json; const char *json;
@ -56,6 +57,26 @@ int ndb_builder_new(struct ndb_builder *builder, unsigned char *buf,
return 1; return 1;
} }
/// Check for small strings to pack
static inline int ndb_builder_try_compact_str(struct ndb_builder *builder,
const char *str, int len,
union packed_str *pstr)
{
if (len == 0) {
*pstr = ndb_char_to_packed_str(0);
return 1;
} else if (len == 1) {
*pstr = ndb_char_to_packed_str(str[0]);
return 1;
} else if (len == 2) {
*pstr = ndb_chars_to_packed_str(str[0], str[1]);
return 1;
}
return 0;
}
static inline int ndb_json_parser_init(struct ndb_json_parser *p, static inline int ndb_json_parser_init(struct ndb_json_parser *p,
const char *json, int json_len, const char *json, int json_len,
unsigned char *buf, int bufsize) unsigned char *buf, int bufsize)
@ -121,41 +142,40 @@ struct ndb_note * ndb_builder_note(struct ndb_builder *builder)
return builder->note; return builder->note;
} }
int ndb_builder_make_string(struct ndb_builder *builder, const char *str, /// find an existing string via str_indices. these indices only exist in the
int len, union packed_str *pstr) /// builder phase just for this purpose.
static inline int ndb_builder_find_str(struct ndb_builder *builder,
const char *str, int len,
union packed_str *pstr)
{ {
uint32_t loc;
if (len == 0) {
*pstr = ndb_char_to_packed_str(0);
return 1;
} else if (len == 1) {
*pstr = ndb_char_to_packed_str(str[0]);
return 1;
} else if (len == 2) {
*pstr = ndb_chars_to_packed_str(str[0], str[1]);
return 1;
}
// find existing matching string to avoid duplicate strings // find existing matching string to avoid duplicate strings
int indices = cursor_count(&builder->str_indices, sizeof(uint32_t)); int indices = cursor_count(&builder->str_indices, sizeof(uint32_t));
for (int i = 0; i < indices; i++) { for (int i = 0; i < indices; i++) {
uint32_t index = ((uint32_t*)builder->str_indices.start)[i]; uint32_t index = ((uint32_t*)builder->str_indices.start)[i];
const char *some_str = (const char*)builder->strings.start + index; const char *some_str = (const char*)builder->strings.start + index;
if (!strcmp(some_str, str)) { if (!strncmp(some_str, str, len)) {
// found an existing matching str, use that index // found an existing matching str, use that index
*pstr = ndb_offset_str(index); *pstr = ndb_offset_str(index);
return 1; return 1;
} }
} }
return 0;
}
static int ndb_builder_push_str(struct ndb_builder *builder, const char *str,
int len, union packed_str *pstr)
{
uint32_t loc;
// no string found, push a new one // no string found, push a new one
loc = builder->strings.p - builder->strings.start; loc = builder->strings.p - builder->strings.start;
if (!(cursor_push(&builder->strings, (unsigned char*)str, len) && if (!(cursor_push(&builder->strings, (unsigned char*)str, len) &&
cursor_push_byte(&builder->strings, '\0'))) { cursor_push_byte(&builder->strings, '\0'))) {
return 0; return 0;
} }
*pstr = ndb_offset_str(loc); *pstr = ndb_offset_str(loc);
// record in builder indices. ignore return value, if we can't cache it // record in builder indices. ignore return value, if we can't cache it
@ -165,10 +185,30 @@ int ndb_builder_make_string(struct ndb_builder *builder, const char *str,
return 1; return 1;
} }
static int ndb_builder_push_unpacked_str(struct ndb_builder *builder,
const char *str, int len,
union packed_str *pstr)
{
if (ndb_builder_find_str(builder, str, len, pstr))
return 1;
return ndb_builder_push_str(builder, str, len, pstr);
}
int ndb_builder_make_str(struct ndb_builder *builder, const char *str, int len,
union packed_str *pstr)
{
if (ndb_builder_try_compact_str(builder, str, len, pstr))
return 1;
return ndb_builder_push_unpacked_str(builder, str, len, pstr);
}
int ndb_builder_set_content(struct ndb_builder *builder, const char *content, int ndb_builder_set_content(struct ndb_builder *builder, const char *content,
int len) int len)
{ {
return ndb_builder_make_string(builder, content, len, &builder->note->content); builder->note->content_length = len;
return ndb_builder_make_str(builder, content, len, &builder->note->content);
} }
@ -187,9 +227,115 @@ static inline int toksize(jsmntok_t *tok)
return tok->end - tok->start; return tok->end - tok->start;
} }
static int ndb_builder_finalize_tag(struct ndb_builder *builder,
union packed_str offset)
{
if (!cursor_push_u32(&builder->note_cur, offset.offset))
return 0;
builder->current_tag->count++;
return 1;
}
/// Unescape and push json strings
static int ndb_builder_make_json_str(struct ndb_builder *builder,
const char *str, int len,
union packed_str *pstr,
int *written)
{
// let's not care about de-duping these. we should just unescape
// in-place directly into the strings table.
const char *p, *end, *start;
unsigned char *builder_start;
// always try compact strings first
if (ndb_builder_try_compact_str(builder, str, len, pstr))
return 1;
end = str + len;
start = str; // Initialize start to the beginning of the string
*pstr = ndb_offset_str(builder->strings.p - builder->strings.start);
builder_start = builder->strings.p;
for (p = str; p < end; p++) {
if (*p == '\\' && p+1 < end) {
// Push the chunk of unescaped characters before this escape sequence
if (start < p && !cursor_push(&builder->strings,
(unsigned char *)start,
p - start)) {
return 0;
}
switch (*(p+1)) {
case 't':
if (!cursor_push_byte(&builder->strings, '\t'))
return 0;
break;
case 'n':
if (!cursor_push_byte(&builder->strings, '\n'))
return 0;
break;
case 'r':
if (!cursor_push_byte(&builder->strings, '\r'))
return 0;
break;
case 'b':
if (!cursor_push_byte(&builder->strings, '\b'))
return 0;
break;
case 'f':
if (!cursor_push_byte(&builder->strings, '\f'))
return 0;
break;
case '\\':
if (!cursor_push_byte(&builder->strings, '\\'))
return 0;
break;
case '"':
if (!cursor_push_byte(&builder->strings, '"'))
return 0;
break;
case 'u':
// these aren't handled yet
return 0;
default:
if (!cursor_push_byte(&builder->strings, *p) ||
!cursor_push_byte(&builder->strings, *(p+1)))
return 0;
break;
}
p++; // Skip the character following the backslash
start = p + 1; // Update the start pointer to the next character
}
}
// Handle the last chunk after the last escape sequence (or if there are no escape sequences at all)
if (start < p && !cursor_push(&builder->strings, (unsigned char *)start,
p - start)) {
return 0;
}
if (written)
*written = builder->strings.p - builder_start;
// TODO: dedupe these!?
return cursor_push_byte(&builder->strings, '\0');
}
static int ndb_builder_push_json_tag(struct ndb_builder *builder,
const char *str, int len)
{
union packed_str pstr;
if (!ndb_builder_make_json_str(builder, str, len, &pstr, NULL))
return 0;
return ndb_builder_finalize_tag(builder, pstr);
}
// Push a json array into an ndb tag ["p", "abcd..."] -> struct ndb_tag // Push a json array into an ndb tag ["p", "abcd..."] -> struct ndb_tag
static inline int ndb_builder_tag_from_json_array(struct ndb_json_parser *p, static int ndb_builder_tag_from_json_array(struct ndb_json_parser *p,
jsmntok_t *array) jsmntok_t *array)
{ {
jsmntok_t *str_tok; jsmntok_t *str_tok;
const char *str; const char *str;
@ -204,8 +350,10 @@ static inline int ndb_builder_tag_from_json_array(struct ndb_json_parser *p,
str_tok = &array[i+1]; str_tok = &array[i+1];
str = p->json + str_tok->start; str = p->json + str_tok->start;
if (!ndb_builder_push_tag_str(&p->builder, str, toksize(str_tok))) if (!ndb_builder_push_json_tag(&p->builder, str,
toksize(str_tok))) {
return 0; return 0;
}
} }
return 1; return 1;
@ -222,14 +370,44 @@ static inline int ndb_builder_process_json_tags(struct ndb_json_parser *p,
return 1; return 1;
for (int i = 0; i < array->size; i++) { for (int i = 0; i < array->size; i++) {
if (!ndb_builder_tag_from_json_array(p, &tag[i+1])) if (!ndb_builder_tag_from_json_array(p, &tag[i+1]))
return 0; return 0;
tag += tag[i+1].size; tag += tag[i+1].size;
} }
return 1; return 1;
} }
static int parse_unsigned_int(const char *start, int len, unsigned int *num)
{
unsigned int number = 0;
const char *p = start, *end = start + len;
int digits = 0;
while (p < end) {
char c = *p;
if (c < '0' || c > '9')
break;
// Check for overflow
char digit = c - '0';
if (number > (UINT_MAX - digit) / 10)
return 0; // Overflow detected
number = number * 10 + digit;
p++;
digits++;
}
if (digits == 0)
return 0;
*num = number;
return 1;
}
int ndb_note_from_json(const char *json, int len, struct ndb_note **note, int ndb_note_from_json(const char *json, int len, struct ndb_note **note,
@ -278,17 +456,36 @@ int ndb_note_from_json(const char *json, int len, struct ndb_note **note,
} else if (start[0] == 'k' && jsoneq(json, tok, tok_len, "kind")) { } else if (start[0] == 'k' && jsoneq(json, tok, tok_len, "kind")) {
// kind // kind
tok = &parser.toks[i+1]; tok = &parser.toks[i+1];
printf("json_kind %.*s\n", toksize(tok), json + tok->start); start = json + tok->start;
if (tok->type != JSMN_PRIMITIVE || tok_len <= 0)
return 0;
if (!parse_unsigned_int(start, toksize(tok),
&parser.builder.note->kind))
return 0;
} else if (start[0] == 'c') { } else if (start[0] == 'c') {
if (jsoneq(json, tok, tok_len, "created_at")) { if (jsoneq(json, tok, tok_len, "created_at")) {
// created_at // created_at
tok = &parser.toks[i+1]; tok = &parser.toks[i+1];
printf("json_created_at %.*s\n", toksize(tok), json + tok->start); start = json + tok->start;
if (tok->type != JSMN_PRIMITIVE || tok_len <= 0)
return 0;
if (!parse_unsigned_int(start, toksize(tok),
&parser.builder.note->created_at))
return 0;
} else if (jsoneq(json, tok, tok_len, "content")) { } else if (jsoneq(json, tok, tok_len, "content")) {
// content // content
tok = &parser.toks[i+1]; tok = &parser.toks[i+1];
if (!ndb_builder_set_content(&parser.builder, json + tok->start, toksize(tok))) union packed_str pstr;
tok_len = toksize(tok);
int written;
if (!ndb_builder_make_json_str(&parser.builder,
json + tok->start,
tok_len, &pstr,
&written)) {
return 0; return 0;
}
parser.builder.note->content_length = written;
parser.builder.note->content = pstr;
} }
} else if (start[0] == 't' && jsoneq(json, tok, tok_len, "tags")) { } else if (start[0] == 't' && jsoneq(json, tok, tok_len, "tags")) {
tok = &parser.toks[i+1]; tok = &parser.toks[i+1];
@ -336,10 +533,7 @@ inline int ndb_builder_push_tag_str(struct ndb_builder *builder,
const char *str, int len) const char *str, int len)
{ {
union packed_str pstr; union packed_str pstr;
if (!ndb_builder_make_string(builder, str, len, &pstr)) if (!ndb_builder_make_str(builder, str, len, &pstr))
return 0; return 0;
if (!cursor_push_u32(&builder->note_cur, pstr.offset)) return ndb_builder_finalize_tag(builder, pstr);
return 0;
builder->current_tag->count++;
return 1;
} }

View File

@ -40,6 +40,7 @@ struct ndb_note {
uint32_t created_at; uint32_t created_at;
uint32_t kind; uint32_t kind;
uint32_t content_length;
union packed_str content; union packed_str content;
uint32_t strings; uint32_t strings;
uint32_t json; uint32_t json;
@ -84,6 +85,7 @@ static inline int ndb_str_is_packed(union packed_str str)
return (str.offset >> 31) & 0x1; return (str.offset >> 31) & 0x1;
} }
static inline const char * ndb_note_str(struct ndb_note *note, static inline const char * ndb_note_str(struct ndb_note *note,
union packed_str *str) union packed_str *str)
{ {
@ -99,7 +101,7 @@ static inline const char * ndb_tag_str(struct ndb_note *note,
return ndb_note_str(note, &tag->strs[ind]); return ndb_note_str(note, &tag->strs[ind]);
} }
static inline int ndb_tag_matches_char(struct ndb_note *note, static int ndb_tag_matches_char(struct ndb_note *note,
struct ndb_tag *tag, int ind, char c) struct ndb_tag *tag, int ind, char c)
{ {
const char *str = ndb_tag_str(note, tag, ind); const char *str = ndb_tag_str(note, tag, ind);
@ -136,11 +138,21 @@ static inline uint32_t ndb_note_created_at(struct ndb_note *note)
return note->created_at; return note->created_at;
} }
static inline uint32_t ndb_note_kind(struct ndb_note *note)
{
return note->kind;
}
static inline const char * ndb_note_content(struct ndb_note *note) static inline const char * ndb_note_content(struct ndb_note *note)
{ {
return ndb_note_str(note, &note->content); return ndb_note_str(note, &note->content);
} }
static inline uint32_t ndb_note_content_length(struct ndb_note *note)
{
return note->content_length;
}
static inline struct ndb_note * ndb_note_from_bytes(unsigned char *bytes) static inline struct ndb_note * ndb_note_from_bytes(unsigned char *bytes)
{ {
struct ndb_note *note = (struct ndb_note *)bytes; struct ndb_note *note = (struct ndb_note *)bytes;