From c31a213704c8149cfc50e729d82ae2b74369aef0 Mon Sep 17 00:00:00 2001 From: Doug Hoyte Date: Wed, 11 Jan 2023 16:23:18 -0500 Subject: [PATCH] fixed size arrays in NostrIndex, where possible - shrinks records by 16 bytes, and 16 bytes for every e or p tag --- fbs/nostr-index.fbs | 18 +++++++++++++---- golpe.yaml | 13 +++++++++++- src/ActiveMonitors.h | 23 ++++++++++++++++----- src/events.cpp | 48 ++++++++++++++++++++++++++++---------------- src/filters.h | 12 ++++++++++- 5 files changed, 86 insertions(+), 28 deletions(-) diff --git a/fbs/nostr-index.fbs b/fbs/nostr-index.fbs index e5d5858..73680a1 100644 --- a/fbs/nostr-index.fbs +++ b/fbs/nostr-index.fbs @@ -1,16 +1,26 @@ namespace NostrIndex; -table Tag { +struct Fixed32Bytes { + val: [ubyte:32]; +} + +table TagGeneral { key: uint8; val: [ubyte]; } +table TagFixed32 { + key: uint8; + val: Fixed32Bytes; +} + table Event { - id: [ubyte]; - pubkey: [ubyte]; + id: Fixed32Bytes; + pubkey: Fixed32Bytes; created_at: uint64; kind: uint64; - tags: [Tag]; + tagsGeneral: [TagGeneral]; + tagsFixed32: [TagFixed32]; } table Empty {} diff --git a/golpe.yaml b/golpe.yaml index d0b6d18..bba1300 100644 --- a/golpe.yaml +++ b/golpe.yaml @@ -5,6 +5,11 @@ quadrable: true flatBuffers: | include "../fbs/nostr-index.fbs"; +includes: | + inline std::string_view sv(const NostrIndex::Fixed32Bytes *f) { + return std::string_view((const char *)f->val()->data(), 32); + } + tables: Event: tableId: 1 @@ -45,7 +50,13 @@ tables: kind = makeKey_Uint64Uint64(flat->kind(), indexTime); pubkeyKind = makeKey_StringUint64Uint64(sv(flat->pubkey()), flat->kind(), indexTime); - for (const auto &tagPair : *(flat->tags())) { + for (const auto &tagPair : *(flat->tagsGeneral())) { + auto tagName = (char)tagPair->key(); + auto tagVal = sv(tagPair->val()); + tag.push_back(makeKey_StringUint64(std::string(1, tagName) + std::string(tagVal), indexTime)); + } + + for (const auto &tagPair : *(flat->tagsFixed32())) { auto tagName = (char)tagPair->key(); auto tagVal = sv(tagPair->val()); tag.push_back(makeKey_StringUint64(std::string(1, tagName) + std::string(tagVal), indexTime)); diff --git a/src/ActiveMonitors.h b/src/ActiveMonitors.h index 6de6996..e192c15 100644 --- a/src/ActiveMonitors.h +++ b/src/ActiveMonitors.h @@ -30,6 +30,14 @@ struct ActiveMonitors : NonCopyable { std::map allKinds; MonitorSet allOthers; + std::string tagSpecBuf = std::string(256, '\0'); + const std::string &getTagSpec(uint8_t k, std::string_view val) { + tagSpecBuf.clear(); + tagSpecBuf += (char)k; + tagSpecBuf += val; + return tagSpecBuf; + } + public: void addSub(lmdb::txn &txn, Subscription &&sub, uint64_t currEventId) { @@ -124,10 +132,15 @@ struct ActiveMonitors : NonCopyable { })); } - for (const auto &tag : *flat->tags()) { - // FIXME: can avoid this allocation: - auto tagSpec = std::string(1, (char)tag->key()) + std::string(sv(tag->val())); + for (const auto &tag : *flat->tagsFixed32()) { + auto &tagSpec = getTagSpec(tag->key(), sv(tag->val())); + processMonitorsExact(allTags, tagSpec, static_cast>([&](const std::string &val){ + return tagSpec == val; + })); + } + for (const auto &tag : *flat->tagsGeneral()) { + auto &tagSpec = getTagSpec(tag->key(), sv(tag->val())); processMonitorsExact(allTags, tagSpec, static_cast>([&](const std::string &val){ return tagSpec == val; })); @@ -174,7 +187,7 @@ struct ActiveMonitors : NonCopyable { } else if (f.tags.size()) { for (const auto &[tagName, filterSet] : f.tags) { for (size_t i = 0; i < filterSet.size(); i++) { - std::string tagSpec = std::string(1, tagName) + filterSet.at(i); + auto &tagSpec = getTagSpec(tagName, filterSet.at(i)); auto res = allTags.try_emplace(tagSpec); res.first->second.try_emplace(&f, MonitorItem{m, currEventId}); } @@ -207,7 +220,7 @@ struct ActiveMonitors : NonCopyable { } else if (f.tags.size()) { for (const auto &[tagName, filterSet] : f.tags) { for (size_t i = 0; i < filterSet.size(); i++) { - std::string tagSpec = std::string(1, tagName) + filterSet.at(i); + auto &tagSpec = getTagSpec(tagName, filterSet.at(i)); auto &monSet = allTags.at(tagSpec); monSet.erase(&f); if (monSet.empty()) allTags.erase(tagSpec); diff --git a/src/events.cpp b/src/events.cpp index 81bfec1..4e68df6 100644 --- a/src/events.cpp +++ b/src/events.cpp @@ -8,18 +8,17 @@ std::string nostrJsonToFlat(const tao::json::value &v) { // Extract values from JSON, add strings to builder - auto loadHexStr = [&](std::string_view k, uint64_t size){ - auto s = from_hex(v.at(k).get_string(), false); - if (s.size() != size) throw herr("unexpected size of hex data"); - return builder.CreateVector((uint8_t*)s.data(), s.size()); - }; - - auto idPtr = loadHexStr("id", 32); - auto pubkeyPtr = loadHexStr("pubkey", 32); + auto id = from_hex(v.at("id").get_string(), false); + auto pubkey = from_hex(v.at("pubkey").get_string(), false); uint64_t created_at = v.at("created_at").get_unsigned(); uint64_t kind = v.at("kind").get_unsigned(); - std::vector> tagPtrs; + if (id.size() != 32) throw herr("unexpected id size"); + if (pubkey.size() != 32) throw herr("unexpected pubkey size"); + + std::vector> tagsGeneral; + std::vector> tagsFixed32; + if (v.at("tags").get_array().size() > cfg().events__maxNumTags) throw herr("too many tags: ", v.at("tags").get_array().size()); for (auto &tagArr : v.at("tags").get_array()) { auto &tag = tagArr.get_array(); @@ -29,20 +28,35 @@ std::string nostrJsonToFlat(const tao::json::value &v) { if (tagName.size() != 1) continue; // only single-char tags need indexing auto tagVal = tag.at(1).get_string(); - if (tagVal.size() < 1 || tagVal.size() > cfg().events__maxTagValSize) throw herr("tag val too small/large: ", tagVal.size()); + if (tagName == "e" || tagName == "p") { tagVal = from_hex(tagVal, false); - if (tagVal.size() != 32) throw herr("unexpected size for e/p tag"); - } - auto tagValPtr = builder.CreateVector((uint8_t*)tagVal.data(), tagVal.size()); + if (tagVal.size() != 32) throw herr("unexpected size for fixed-size tag"); - tagPtrs.push_back(NostrIndex::CreateTag(builder, (uint8_t)tagName[0], tagValPtr)); + tagsFixed32.emplace_back(NostrIndex::CreateTagFixed32(builder, + (uint8_t)tagName[0], + (NostrIndex::Fixed32Bytes*)tagVal.data() + )); + } else { + if (tagVal.size() < 1 || tagVal.size() > cfg().events__maxTagValSize) throw herr("tag val too small/large: ", tagVal.size()); + + tagsGeneral.emplace_back(NostrIndex::CreateTagGeneral(builder, + (uint8_t)tagName[0], + builder.CreateVector((uint8_t*)tagVal.data(), tagVal.size()) + )); + } } - auto tagsPtr = builder.CreateVector>(tagPtrs); // Create flatbuffer - auto eventPtr = NostrIndex::CreateEvent(builder, idPtr, pubkeyPtr, created_at, kind, tagsPtr); + auto eventPtr = NostrIndex::CreateEvent(builder, + (NostrIndex::Fixed32Bytes*)id.data(), + (NostrIndex::Fixed32Bytes*)pubkey.data(), + created_at, + kind, + builder.CreateVector>(tagsGeneral), + builder.CreateVector>(tagsFixed32) + ); builder.Finish(eventPtr); @@ -212,7 +226,7 @@ void writeEvents(lmdb::txn &txn, quadrable::Quadrable &qdb, std::vectorkind() == 5) { // Deletion event, delete all referenced events - for (const auto &tagPair : *(flat->tags())) { + for (const auto &tagPair : *(flat->tagsFixed32())) { if (tagPair->key() == 'e') { auto otherEv = lookupEventById(txn, sv(tagPair->val())); if (otherEv && sv(otherEv->flat_nested()->pubkey()) == sv(flat->pubkey())) { diff --git a/src/filters.h b/src/filters.h index bf7ea7b..02f121d 100644 --- a/src/filters.h +++ b/src/filters.h @@ -190,7 +190,7 @@ struct NostrFilter { for (const auto &[tag, filt] : tags) { bool foundMatch = false; - for (const auto &tagPair : *(ev->tags())) { + for (const auto &tagPair : *(ev->tagsFixed32())) { auto eventTag = tagPair->key(); if (eventTag == tag && filt.doesMatch(sv(tagPair->val()))) { foundMatch = true; @@ -198,6 +198,16 @@ struct NostrFilter { } } + if (!foundMatch) { + for (const auto &tagPair : *(ev->tagsGeneral())) { + auto eventTag = tagPair->key(); + if (eventTag == tag && filt.doesMatch(sv(tagPair->val()))) { + foundMatch = true; + break; + } + } + } + if (!foundMatch) return false; }