From c31a213704c8149cfc50e729d82ae2b74369aef0 Mon Sep 17 00:00:00 2001 From: Doug Hoyte Date: Wed, 11 Jan 2023 16:23:18 -0500 Subject: [PATCH 01/51] fixed size arrays in NostrIndex, where possible - shrinks records by 16 bytes, and 16 bytes for every e or p tag --- fbs/nostr-index.fbs | 18 +++++++++++++---- golpe.yaml | 13 +++++++++++- src/ActiveMonitors.h | 23 ++++++++++++++++----- src/events.cpp | 48 ++++++++++++++++++++++++++++---------------- src/filters.h | 12 ++++++++++- 5 files changed, 86 insertions(+), 28 deletions(-) diff --git a/fbs/nostr-index.fbs b/fbs/nostr-index.fbs index e5d5858..73680a1 100644 --- a/fbs/nostr-index.fbs +++ b/fbs/nostr-index.fbs @@ -1,16 +1,26 @@ namespace NostrIndex; -table Tag { +struct Fixed32Bytes { + val: [ubyte:32]; +} + +table TagGeneral { key: uint8; val: [ubyte]; } +table TagFixed32 { + key: uint8; + val: Fixed32Bytes; +} + table Event { - id: [ubyte]; - pubkey: [ubyte]; + id: Fixed32Bytes; + pubkey: Fixed32Bytes; created_at: uint64; kind: uint64; - tags: [Tag]; + tagsGeneral: [TagGeneral]; + tagsFixed32: [TagFixed32]; } table Empty {} diff --git a/golpe.yaml b/golpe.yaml index d0b6d18..bba1300 100644 --- a/golpe.yaml +++ b/golpe.yaml @@ -5,6 +5,11 @@ quadrable: true flatBuffers: | include "../fbs/nostr-index.fbs"; +includes: | + inline std::string_view sv(const NostrIndex::Fixed32Bytes *f) { + return std::string_view((const char *)f->val()->data(), 32); + } + tables: Event: tableId: 1 @@ -45,7 +50,13 @@ tables: kind = makeKey_Uint64Uint64(flat->kind(), indexTime); pubkeyKind = makeKey_StringUint64Uint64(sv(flat->pubkey()), flat->kind(), indexTime); - for (const auto &tagPair : *(flat->tags())) { + for (const auto &tagPair : *(flat->tagsGeneral())) { + auto tagName = (char)tagPair->key(); + auto tagVal = sv(tagPair->val()); + tag.push_back(makeKey_StringUint64(std::string(1, tagName) + std::string(tagVal), indexTime)); + } + + for (const auto &tagPair : *(flat->tagsFixed32())) { auto tagName = (char)tagPair->key(); auto tagVal = sv(tagPair->val()); tag.push_back(makeKey_StringUint64(std::string(1, tagName) + std::string(tagVal), indexTime)); diff --git a/src/ActiveMonitors.h b/src/ActiveMonitors.h index 6de6996..e192c15 100644 --- a/src/ActiveMonitors.h +++ b/src/ActiveMonitors.h @@ -30,6 +30,14 @@ struct ActiveMonitors : NonCopyable { std::map allKinds; MonitorSet allOthers; + std::string tagSpecBuf = std::string(256, '\0'); + const std::string &getTagSpec(uint8_t k, std::string_view val) { + tagSpecBuf.clear(); + tagSpecBuf += (char)k; + tagSpecBuf += val; + return tagSpecBuf; + } + public: void addSub(lmdb::txn &txn, Subscription &&sub, uint64_t currEventId) { @@ -124,10 +132,15 @@ struct ActiveMonitors : NonCopyable { })); } - for (const auto &tag : *flat->tags()) { - // FIXME: can avoid this allocation: - auto tagSpec = std::string(1, (char)tag->key()) + std::string(sv(tag->val())); + for (const auto &tag : *flat->tagsFixed32()) { + auto &tagSpec = getTagSpec(tag->key(), sv(tag->val())); + processMonitorsExact(allTags, tagSpec, static_cast>([&](const std::string &val){ + return tagSpec == val; + })); + } + for (const auto &tag : *flat->tagsGeneral()) { + auto &tagSpec = getTagSpec(tag->key(), sv(tag->val())); processMonitorsExact(allTags, tagSpec, static_cast>([&](const std::string &val){ return tagSpec == val; })); @@ -174,7 +187,7 @@ struct ActiveMonitors : NonCopyable { } else if (f.tags.size()) { for (const auto &[tagName, filterSet] : f.tags) { for (size_t i = 0; i < filterSet.size(); i++) { - std::string tagSpec = std::string(1, tagName) + filterSet.at(i); + auto &tagSpec = getTagSpec(tagName, filterSet.at(i)); auto res = allTags.try_emplace(tagSpec); res.first->second.try_emplace(&f, MonitorItem{m, currEventId}); } @@ -207,7 +220,7 @@ struct ActiveMonitors : NonCopyable { } else if (f.tags.size()) { for (const auto &[tagName, filterSet] : f.tags) { for (size_t i = 0; i < filterSet.size(); i++) { - std::string tagSpec = std::string(1, tagName) + filterSet.at(i); + auto &tagSpec = getTagSpec(tagName, filterSet.at(i)); auto &monSet = allTags.at(tagSpec); monSet.erase(&f); if (monSet.empty()) allTags.erase(tagSpec); diff --git a/src/events.cpp b/src/events.cpp index 81bfec1..4e68df6 100644 --- a/src/events.cpp +++ b/src/events.cpp @@ -8,18 +8,17 @@ std::string nostrJsonToFlat(const tao::json::value &v) { // Extract values from JSON, add strings to builder - auto loadHexStr = [&](std::string_view k, uint64_t size){ - auto s = from_hex(v.at(k).get_string(), false); - if (s.size() != size) throw herr("unexpected size of hex data"); - return builder.CreateVector((uint8_t*)s.data(), s.size()); - }; - - auto idPtr = loadHexStr("id", 32); - auto pubkeyPtr = loadHexStr("pubkey", 32); + auto id = from_hex(v.at("id").get_string(), false); + auto pubkey = from_hex(v.at("pubkey").get_string(), false); uint64_t created_at = v.at("created_at").get_unsigned(); uint64_t kind = v.at("kind").get_unsigned(); - std::vector> tagPtrs; + if (id.size() != 32) throw herr("unexpected id size"); + if (pubkey.size() != 32) throw herr("unexpected pubkey size"); + + std::vector> tagsGeneral; + std::vector> tagsFixed32; + if (v.at("tags").get_array().size() > cfg().events__maxNumTags) throw herr("too many tags: ", v.at("tags").get_array().size()); for (auto &tagArr : v.at("tags").get_array()) { auto &tag = tagArr.get_array(); @@ -29,20 +28,35 @@ std::string nostrJsonToFlat(const tao::json::value &v) { if (tagName.size() != 1) continue; // only single-char tags need indexing auto tagVal = tag.at(1).get_string(); - if (tagVal.size() < 1 || tagVal.size() > cfg().events__maxTagValSize) throw herr("tag val too small/large: ", tagVal.size()); + if (tagName == "e" || tagName == "p") { tagVal = from_hex(tagVal, false); - if (tagVal.size() != 32) throw herr("unexpected size for e/p tag"); - } - auto tagValPtr = builder.CreateVector((uint8_t*)tagVal.data(), tagVal.size()); + if (tagVal.size() != 32) throw herr("unexpected size for fixed-size tag"); - tagPtrs.push_back(NostrIndex::CreateTag(builder, (uint8_t)tagName[0], tagValPtr)); + tagsFixed32.emplace_back(NostrIndex::CreateTagFixed32(builder, + (uint8_t)tagName[0], + (NostrIndex::Fixed32Bytes*)tagVal.data() + )); + } else { + if (tagVal.size() < 1 || tagVal.size() > cfg().events__maxTagValSize) throw herr("tag val too small/large: ", tagVal.size()); + + tagsGeneral.emplace_back(NostrIndex::CreateTagGeneral(builder, + (uint8_t)tagName[0], + builder.CreateVector((uint8_t*)tagVal.data(), tagVal.size()) + )); + } } - auto tagsPtr = builder.CreateVector>(tagPtrs); // Create flatbuffer - auto eventPtr = NostrIndex::CreateEvent(builder, idPtr, pubkeyPtr, created_at, kind, tagsPtr); + auto eventPtr = NostrIndex::CreateEvent(builder, + (NostrIndex::Fixed32Bytes*)id.data(), + (NostrIndex::Fixed32Bytes*)pubkey.data(), + created_at, + kind, + builder.CreateVector>(tagsGeneral), + builder.CreateVector>(tagsFixed32) + ); builder.Finish(eventPtr); @@ -212,7 +226,7 @@ void writeEvents(lmdb::txn &txn, quadrable::Quadrable &qdb, std::vectorkind() == 5) { // Deletion event, delete all referenced events - for (const auto &tagPair : *(flat->tags())) { + for (const auto &tagPair : *(flat->tagsFixed32())) { if (tagPair->key() == 'e') { auto otherEv = lookupEventById(txn, sv(tagPair->val())); if (otherEv && sv(otherEv->flat_nested()->pubkey()) == sv(flat->pubkey())) { diff --git a/src/filters.h b/src/filters.h index bf7ea7b..02f121d 100644 --- a/src/filters.h +++ b/src/filters.h @@ -190,7 +190,7 @@ struct NostrFilter { for (const auto &[tag, filt] : tags) { bool foundMatch = false; - for (const auto &tagPair : *(ev->tags())) { + for (const auto &tagPair : *(ev->tagsFixed32())) { auto eventTag = tagPair->key(); if (eventTag == tag && filt.doesMatch(sv(tagPair->val()))) { foundMatch = true; @@ -198,6 +198,16 @@ struct NostrFilter { } } + if (!foundMatch) { + for (const auto &tagPair : *(ev->tagsGeneral())) { + auto eventTag = tagPair->key(); + if (eventTag == tag && filt.doesMatch(sv(tagPair->val()))) { + foundMatch = true; + break; + } + } + } + if (!foundMatch) return false; } From 27398fe54ae9434f7ec5ca4517d5ce853ae29cc7 Mon Sep 17 00:00:00 2001 From: Doug Hoyte Date: Fri, 13 Jan 2023 18:54:04 -0500 Subject: [PATCH 02/51] detect old DB versions that need to upgrade --- golpe.yaml | 11 +++++++++- src/onAppStartup.cpp | 49 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+), 1 deletion(-) create mode 100644 src/onAppStartup.cpp diff --git a/golpe.yaml b/golpe.yaml index bba1300..d551cb0 100644 --- a/golpe.yaml +++ b/golpe.yaml @@ -1,6 +1,6 @@ appName: strfry - quadrable: true +onAppStartup: true flatBuffers: | include "../fbs/nostr-index.fbs"; @@ -11,6 +11,15 @@ includes: | } tables: + ## DB meta-data. Single entry, with id = 1 + Meta: + tableId: 2 + + fields: + - name: dbVersion + - name: endianness + + ## Stored nostr events Event: tableId: 1 diff --git a/src/onAppStartup.cpp b/src/onAppStartup.cpp new file mode 100644 index 0000000..60c0cd6 --- /dev/null +++ b/src/onAppStartup.cpp @@ -0,0 +1,49 @@ +#include "golpe.h" + +const size_t CURR_DB_VERSION = 1; + +void onAppStartup(lmdb::txn &txn, const std::string &cmd) { + auto dbTooOld = [&](uint64_t ver) { + LE << "Database version too old: " << ver << ". Expected version " << CURR_DB_VERSION; + LE << "You should 'strfry export' your events, delete (or move) the DB files, and 'strfry import' them"; + throw herr("aborting: DB too old"); + }; + + auto dbTooNew = [&](uint64_t ver) { + LE << "Database version too new: " << ver << ". Expected version " << CURR_DB_VERSION; + LE << "You should upgrade your version of 'strfry'"; + throw herr("aborting: DB too new"); + }; + + auto s = env.lookup_Meta(txn, 1); + + if (!s) { + { + // The first version of the DB didn't use a Meta entry -- we consider this version 0 + + bool eventFound = false; + + env.foreach_Event(txn, [&](auto &ev){ + eventFound = true; + return false; + }); + + if (cmd == "export") return; + if (eventFound) dbTooOld(0); + } + + env.insert_Meta(txn, CURR_DB_VERSION, 1); + return; + } + + if (s->endianness() != 1) throw herr("DB was created on a machine with different endianness"); + + if (s->dbVersion() < CURR_DB_VERSION) { + if (cmd == "export") return; + dbTooOld(s->dbVersion()); + } + + if (s->dbVersion() > CURR_DB_VERSION) { + dbTooNew(s->dbVersion()); + } +} From ec9161ce0846de11ea3f6a907a37ddd285fdd023 Mon Sep 17 00:00:00 2001 From: Doug Hoyte Date: Sun, 22 Jan 2023 03:22:28 -0500 Subject: [PATCH 03/51] DB schema refactor --- golpe | 2 +- golpe.yaml | 24 ++++++++++++++------- src/DBScan.h | 18 ++++++++-------- src/RelayCron.cpp | 38 +++++++++++++++------------------ src/RelayReqMonitor.cpp | 6 +++--- src/RelayReqWorker.cpp | 6 +++--- src/RelayWriter.cpp | 2 +- src/RelayYesstr.cpp | 12 +++++------ src/cmd_monitor.cpp | 4 ++-- src/cmd_scan.cpp | 4 ++-- src/cmd_stream.cpp | 2 +- src/cmd_sync.cpp | 12 +++++------ src/events.cpp | 47 +++++++++++++++++++++++------------------ src/events.h | 6 +++--- 14 files changed, 97 insertions(+), 86 deletions(-) diff --git a/golpe b/golpe index 7705174..511e70d 160000 --- a/golpe +++ b/golpe @@ -1 +1 @@ -Subproject commit 770517466971574ed0ac9e821ef0db3b43b9f6fb +Subproject commit 511e70d7b9a295d0861884ee0b368605bfe460c3 diff --git a/golpe.yaml b/golpe.yaml index d551cb0..0babc3d 100644 --- a/golpe.yaml +++ b/golpe.yaml @@ -13,20 +13,14 @@ includes: | tables: ## DB meta-data. Single entry, with id = 1 Meta: - tableId: 2 - fields: - name: dbVersion - name: endianness - ## Stored nostr events + ## Meta-info of nostr events, suitable for indexing + ## Primary key is auto-incremented, called "levId" for Local EVent ID Event: - tableId: 1 - - primaryKey: quadId - fields: - - name: quadId - name: receivedAt # microseconds - name: flat type: ubytes @@ -72,6 +66,20 @@ tables: if (flat->kind() == 5 && tagName == 'e') deletion.push_back(std::string(tagVal) + std::string(sv(flat->pubkey()))); } + CompressionDictionary: + fields: + - name: dict + type: ubytes + +tablesRaw: + ## Raw nostr event JSON, possibly compressed + ## keys are levIds + ## vals are prefixed with a type byte: + ## 0: no compression, payload follows + ## 1: zstd compression. Followed by Dictionary ID (native endian uint32) then compressed payload + EventPayload: + flags: 'MDB_INTEGERKEY' + config: - name: db desc: "Directory that contains strfry database" diff --git a/src/DBScan.h b/src/DBScan.h index 00fb479..a6ebd86 100644 --- a/src/DBScan.h +++ b/src/DBScan.h @@ -232,18 +232,18 @@ struct DBScan { } bool sent = false; - uint64_t quadId = lmdb::from_sv(v); + uint64_t levId = lmdb::from_sv(v); if (f.indexOnlyScans) { if (f.doesMatchTimes(created)) { - handleEvent(quadId); + handleEvent(levId); sent = true; } } else { - auto view = env.lookup_Event(txn, quadId); + auto view = env.lookup_Event(txn, levId); if (!view) throw herr("missing event from index, corrupt DB?"); if (f.doesMatch(view->flat_nested())) { - handleEvent(quadId); + handleEvent(levId); sent = true; } } @@ -298,16 +298,16 @@ struct DBScanQuery : NonCopyable { while (filterGroupIndex < sub.filterGroup.size()) { if (!scanner) scanner = std::make_unique(sub.filterGroup.filters[filterGroupIndex]); - bool complete = scanner->scan(txn, [&](uint64_t quadId){ + bool complete = scanner->scan(txn, [&](uint64_t levId){ // If this event came in after our query began, don't send it. It will be sent after the EOSE. - if (quadId > sub.latestEventId) return; + if (levId > sub.latestEventId) return; // We already sent this event - if (alreadySentEvents.find(quadId) != alreadySentEvents.end()) return; - alreadySentEvents.insert(quadId); + if (alreadySentEvents.find(levId) != alreadySentEvents.end()) return; + alreadySentEvents.insert(levId); currScanRecordsFound++; - cb(sub, quadId); + cb(sub, levId); }, [&]{ currScanRecordsTraversed++; return hoytech::curr_time_us() - startTime > timeBudgetMicroseconds; diff --git a/src/RelayCron.cpp b/src/RelayCron.cpp index c0da185..1f11cd0 100644 --- a/src/RelayCron.cpp +++ b/src/RelayCron.cpp @@ -2,17 +2,12 @@ void RelayServer::cleanupOldEvents() { - struct EventDel { - uint64_t nodeId; - uint64_t deletedNodeId; - }; - - std::vector expiredEvents; + std::vector expiredLevIds; { auto txn = env.txn_ro(); - auto mostRecent = getMostRecentEventId(txn); + auto mostRecent = getMostRecentLevId(txn); uint64_t cutoff = hoytech::curr_time_s() - cfg().events__ephemeralEventsLifetimeSeconds; uint64_t currKind = 20'000; @@ -31,10 +26,10 @@ void RelayServer::cleanupOldEvents() { return false; } - uint64_t nodeId = lmdb::from_sv(v); + uint64_t levId = lmdb::from_sv(v); - if (nodeId != mostRecent) { // prevent nodeId re-use - expiredEvents.emplace_back(nodeId, 0); + if (levId != mostRecent) { // prevent levId re-use + expiredLevIds.emplace_back(levId); } return true; @@ -44,29 +39,30 @@ void RelayServer::cleanupOldEvents() { } } - if (expiredEvents.size() > 0) { - LI << "Deleting " << expiredEvents.size() << " ephemeral events"; - + if (expiredLevIds.size() > 0) { auto txn = env.txn_rw(); quadrable::Quadrable qdb; qdb.init(txn); qdb.checkout("events"); + uint64_t numDeleted = 0; auto changes = qdb.change(); - for (auto &e : expiredEvents) { - auto view = env.lookup_Event(txn, e.nodeId); - if (!view) throw herr("missing event from index, corrupt DB?"); - changes.del(flatEventToQuadrableKey(view->flat_nested()), &e.deletedNodeId); + for (auto levId : expiredLevIds) { + auto view = env.lookup_Event(txn, levId); + if (!view) continue; // Deleted in between transactions + + numDeleted++; + changes.del(flatEventToQuadrableKey(view->flat_nested())); + env.delete_Event(txn, levId); + env.dbi_EventPayload.del(txn, lmdb::to_sv(levId)); } changes.apply(txn); - for (auto &e : expiredEvents) { - if (e.deletedNodeId) env.delete_Event(txn, e.nodeId); - } - txn.commit(); + + if (numDeleted) LI << "Deleted " << numDeleted << " ephemeral events"; } } diff --git a/src/RelayReqMonitor.cpp b/src/RelayReqMonitor.cpp index 717cb9c..7c513ab 100644 --- a/src/RelayReqMonitor.cpp +++ b/src/RelayReqMonitor.cpp @@ -22,7 +22,7 @@ void RelayServer::runReqMonitor(ThreadPool::Thread &thr) { auto txn = env.txn_ro(); - uint64_t latestEventId = getMostRecentEventId(txn); + uint64_t latestEventId = getMostRecentLevId(txn); if (currEventId > latestEventId) currEventId = latestEventId; for (auto &newMsg : newMsgs) { @@ -44,8 +44,8 @@ void RelayServer::runReqMonitor(ThreadPool::Thread &thr) { monitors.closeConn(msg->connId); } else if (std::get_if(&newMsg.msg)) { env.foreach_Event(txn, [&](auto &ev){ - monitors.process(txn, ev, [&](RecipientList &&recipients, uint64_t quadId){ - sendEventToBatch(std::move(recipients), std::string(getEventJson(txn, quadId))); + monitors.process(txn, ev, [&](RecipientList &&recipients, uint64_t levId){ + sendEventToBatch(std::move(recipients), std::string(getEventJson(txn, levId))); }); return true; }, false, currEventId + 1); diff --git a/src/RelayReqWorker.cpp b/src/RelayReqWorker.cpp index 16391ea..32e1a3b 100644 --- a/src/RelayReqWorker.cpp +++ b/src/RelayReqWorker.cpp @@ -9,7 +9,7 @@ struct ActiveQueries : NonCopyable { std::deque running; void addSub(lmdb::txn &txn, Subscription &&sub) { - sub.latestEventId = getMostRecentEventId(txn); + sub.latestEventId = getMostRecentLevId(txn); { auto *existing = findQuery(sub.connId, sub.subId); @@ -63,8 +63,8 @@ struct ActiveQueries : NonCopyable { return; } - bool complete = q->process(txn, cfg().relay__queryTimesliceBudgetMicroseconds, cfg().relay__logging__dbScanPerf, [&](const auto &sub, uint64_t quadId){ - server->sendEvent(sub.connId, sub.subId, getEventJson(txn, quadId)); + bool complete = q->process(txn, cfg().relay__queryTimesliceBudgetMicroseconds, cfg().relay__logging__dbScanPerf, [&](const auto &sub, uint64_t levId){ + server->sendEvent(sub.connId, sub.subId, getEventJson(txn, levId)); }); if (complete) { diff --git a/src/RelayWriter.cpp b/src/RelayWriter.cpp index d241552..10c60f5 100644 --- a/src/RelayWriter.cpp +++ b/src/RelayWriter.cpp @@ -37,7 +37,7 @@ void RelayServer::runWriter(ThreadPool::Thread &thr) { bool written = false; if (newEvent.status == EventWriteStatus::Written) { - LI << "Inserted event. id=" << eventIdHex << " qdbNodeId=" << newEvent.nodeId; + LI << "Inserted event. id=" << eventIdHex << " levId=" << newEvent.levId; written = true; } else if (newEvent.status == EventWriteStatus::Duplicate) { message = "duplicate: have this event"; diff --git a/src/RelayYesstr.cpp b/src/RelayYesstr.cpp index 9eabc4c..a81cf7c 100644 --- a/src/RelayYesstr.cpp +++ b/src/RelayYesstr.cpp @@ -53,20 +53,20 @@ void RelayServer::runYesstr(ThreadPool::Thread &thr) { // FIXME: The following blocks the whole thread for the query duration. Should interleave it // with other requests like RelayReqWorker does. - std::vector quadEventIds; + std::vector levIds; auto filterGroup = NostrFilterGroup::unwrapped(tao::json::from_string(filterStr)); Subscription sub(1, "junkSub", filterGroup); DBScanQuery query(sub); while (1) { - bool complete = query.process(txn, MAX_U64, cfg().relay__logging__dbScanPerf, [&](const auto &sub, uint64_t quadId){ - quadEventIds.push_back(quadId); + bool complete = query.process(txn, MAX_U64, cfg().relay__logging__dbScanPerf, [&](const auto &sub, uint64_t levId){ + levIds.push_back(levId); }); if (complete) break; } - LI << "Filter matched " << quadEventIds.size() << " local events"; + LI << "Filter matched " << levIds.size() << " local events"; qdb->withMemStore(s.m, [&]{ qdb->writeToMemStore = true; @@ -74,8 +74,8 @@ void RelayServer::runYesstr(ThreadPool::Thread &thr) { auto changes = qdb->change(); - for (auto id : quadEventIds) { - changes.putReuse(txn, id); + for (auto levId : levIds) { + changes.putReuse(txn, levId); } changes.apply(txn); diff --git a/src/cmd_monitor.cpp b/src/cmd_monitor.cpp index 03d3c00..91ad0c8 100644 --- a/src/cmd_monitor.cpp +++ b/src/cmd_monitor.cpp @@ -54,10 +54,10 @@ void cmd_monitor(const std::vector &subArgs) { } env.foreach_Event(txn, [&](auto &ev){ - monitors.process(txn, ev, [&](RecipientList &&recipients, uint64_t quadId){ + monitors.process(txn, ev, [&](RecipientList &&recipients, uint64_t levId){ for (auto &r : recipients) { if (r.connId == interestConnId && r.subId.str() == interestSubId) { - std::cout << getEventJson(txn, quadId) << "\n"; + std::cout << getEventJson(txn, levId) << "\n"; } } }); diff --git a/src/cmd_scan.cpp b/src/cmd_scan.cpp index 96b0700..10efce0 100644 --- a/src/cmd_scan.cpp +++ b/src/cmd_scan.cpp @@ -35,8 +35,8 @@ void cmd_scan(const std::vector &subArgs) { auto txn = env.txn_ro(); while (1) { - bool complete = query.process(txn, pause ? pause : MAX_U64, metrics, [&](const auto &sub, uint64_t quadId){ - std::cout << getEventJson(txn, quadId) << "\n"; + bool complete = query.process(txn, pause ? pause : MAX_U64, metrics, [&](const auto &sub, uint64_t levId){ + std::cout << getEventJson(txn, levId) << "\n"; }); if (complete) break; diff --git a/src/cmd_stream.cpp b/src/cmd_stream.cpp index 49f6ff1..ef54212 100644 --- a/src/cmd_stream.cpp +++ b/src/cmd_stream.cpp @@ -80,7 +80,7 @@ void cmd_stream(const std::vector &subArgs) { { auto txn = env.txn_ro(); - currEventId = getMostRecentEventId(txn); + currEventId = getMostRecentLevId(txn); } ws.onTrigger = [&]{ diff --git a/src/cmd_sync.cpp b/src/cmd_sync.cpp index d6723f1..0bc3e85 100644 --- a/src/cmd_sync.cpp +++ b/src/cmd_sync.cpp @@ -148,7 +148,7 @@ void cmd_sync(const std::vector &subArgs) { if (filterStr.size()) { - std::vector quadEventIds; + std::vector levIds; tao::json::value filterJson; @@ -167,14 +167,14 @@ void cmd_sync(const std::vector &subArgs) { auto txn = env.txn_ro(); while (1) { - bool complete = query.process(txn, MAX_U64, false, [&](const auto &sub, uint64_t quadId){ - quadEventIds.push_back(quadId); + bool complete = query.process(txn, MAX_U64, false, [&](const auto &sub, uint64_t levId){ + levIds.push_back(levId); }); if (complete) break; } - LI << "Filter matched " << quadEventIds.size() << " local events"; + LI << "Filter matched " << levIds.size() << " local events"; controller = std::make_unique(&qdb, &ws); @@ -184,8 +184,8 @@ void cmd_sync(const std::vector &subArgs) { auto changes = qdb.change(); - for (auto id : quadEventIds) { - changes.putReuse(txn, id); + for (auto levId : levIds) { + changes.putReuse(txn, levId); } changes.apply(txn); diff --git a/src/events.cpp b/src/events.cpp index 4e68df6..52dc3e5 100644 --- a/src/events.cpp +++ b/src/events.cpp @@ -158,22 +158,22 @@ std::optional lookupEventById(lmdb::txn &txn return output; } -uint64_t getMostRecentEventId(lmdb::txn &txn) { - uint64_t output = 0; +uint64_t getMostRecentLevId(lmdb::txn &txn) { + uint64_t levId = 0; env.foreach_Event(txn, [&](auto &ev){ - output = ev.primaryKeyId; + levId = ev.primaryKeyId; return false; }, true); - return output; + return levId; } -std::string_view getEventJson(lmdb::txn &txn, uint64_t quadId) { +std::string_view getEventJson(lmdb::txn &txn, uint64_t levId) { std::string_view raw; - bool found = env.dbiQuadrable_nodesLeaf.get(txn, lmdb::to_sv(quadId), raw); + bool found = env.dbi_EventPayload.get(txn, lmdb::to_sv(levId), raw); if (!found) throw herr("couldn't find leaf node in quadrable, corrupted DB?"); - return raw.substr(8 + 32 + 32); + return raw.substr(1); } @@ -183,7 +183,7 @@ void writeEvents(lmdb::txn &txn, quadrable::Quadrable &qdb, std::vector eventIdsToDelete; + std::vector levIdsToDelete; for (size_t i = 0; i < evs.size(); i++) { auto &ev = evs[i]; @@ -202,13 +202,13 @@ void writeEvents(lmdb::txn &txn, quadrable::Quadrable &qdb, std::vectorkind())) { auto searchKey = makeKey_StringUint64Uint64(sv(flat->pubkey()), flat->kind(), MAX_U64); - uint64_t otherEventId = 0; + uint64_t otherLevId = 0; env.generic_foreachFull(txn, env.dbi_Event__pubkeyKind, searchKey, lmdb::to_sv(MAX_U64), [&](auto k, auto v) { ParsedKey_StringUint64Uint64 parsedKey(k); if (parsedKey.s == sv(flat->pubkey()) && parsedKey.n1 == flat->kind()) { if (parsedKey.n2 < flat->created_at()) { - otherEventId = lmdb::from_sv(v); + otherLevId = lmdb::from_sv(v); } else { ev.status = EventWriteStatus::Replaced; } @@ -216,11 +216,11 @@ void writeEvents(lmdb::txn &txn, quadrable::Quadrable &qdb, std::vectorflat_nested())); - eventIdsToDelete.push_back(otherEventId); + levIdsToDelete.push_back(otherLevId); } } @@ -232,26 +232,33 @@ void writeEvents(lmdb::txn &txn, quadrable::Quadrable &qdb, std::vectorflat_nested()->pubkey()) == sv(flat->pubkey())) { LI << "Deleting event. id=" << to_hex(sv(tagPair->val())); changes.del(flatEventToQuadrableKey(otherEv->flat_nested())); - eventIdsToDelete.push_back(otherEv->primaryKeyId); + levIdsToDelete.push_back(otherEv->primaryKeyId); } } } } - if (ev.status == EventWriteStatus::Pending) { - changes.put(ev.quadKey, ev.jsonStr, &ev.nodeId); - } + if (ev.status == EventWriteStatus::Pending) changes.put(ev.quadKey, ""); } changes.apply(txn); - for (auto eventId : eventIdsToDelete) { - env.delete_Event(txn, eventId); + for (auto levId : levIdsToDelete) { + env.delete_Event(txn, levId); + env.dbi_EventPayload.del(txn, lmdb::to_sv(levId)); } + std::string tmpBuf; + for (auto &ev : evs) { if (ev.status == EventWriteStatus::Pending) { - env.insert_Event(txn, ev.nodeId, ev.receivedAt, ev.flatStr); + ev.levId = env.insert_Event(txn, ev.receivedAt, ev.flatStr); + + tmpBuf.clear(); + tmpBuf += '\x00'; + tmpBuf += ev.jsonStr; + env.dbi_EventPayload.put(txn, lmdb::to_sv(ev.levId), tmpBuf); + ev.status = EventWriteStatus::Written; } } diff --git a/src/events.h b/src/events.h index 8d73531..d6529b8 100644 --- a/src/events.h +++ b/src/events.h @@ -45,8 +45,8 @@ inline const NostrIndex::Event *flatStrToFlatEvent(std::string_view flatStr) { std::optional lookupEventById(lmdb::txn &txn, std::string_view id); -uint64_t getMostRecentEventId(lmdb::txn &txn); -std::string_view getEventJson(lmdb::txn &txn, uint64_t quadId); +uint64_t getMostRecentLevId(lmdb::txn &txn); +std::string_view getEventJson(lmdb::txn &txn, uint64_t levId); inline quadrable::Key flatEventToQuadrableKey(const NostrIndex::Event *flat) { return quadrable::Key::fromIntegerAndHash(flat->created_at(), sv(flat->id()).substr(0, 23)); @@ -72,8 +72,8 @@ struct EventToWrite { uint64_t receivedAt; void *userData = nullptr; quadrable::Key quadKey; - uint64_t nodeId = 0; EventWriteStatus status = EventWriteStatus::Pending; + uint64_t levId = 0; EventToWrite() {} From 60628d18c370dabc1269b20307c3d3f6c52af521 Mon Sep 17 00:00:00 2001 From: Doug Hoyte Date: Wed, 25 Jan 2023 00:26:03 -0500 Subject: [PATCH 04/51] DB compression --- Makefile | 2 +- src/Decompressor.cpp | 5 + src/Decompressor.h | 69 ++++++++++++ src/RelayReqMonitor.cpp | 5 +- src/RelayReqWorker.cpp | 7 +- src/RelayServer.h | 17 +-- src/RelayWebsocket.cpp | 62 ++--------- src/cmd_dict.cpp | 236 ++++++++++++++++++++++++++++++++++++++++ src/cmd_export.cpp | 4 +- src/cmd_monitor.cpp | 3 +- src/cmd_scan.cpp | 9 +- src/cmd_stream.cpp | 3 +- src/events.cpp | 41 ++++++- src/events.h | 4 +- src/filters.h | 12 +- src/render.h | 44 ++++++++ test/filterFuzzTest.pl | 8 +- test/strfry.conf | 6 - 18 files changed, 446 insertions(+), 91 deletions(-) create mode 100644 src/Decompressor.cpp create mode 100644 src/Decompressor.h create mode 100644 src/cmd_dict.cpp create mode 100644 src/render.h delete mode 100644 test/strfry.conf diff --git a/Makefile b/Makefile index 708da0b..87d74c9 100644 --- a/Makefile +++ b/Makefile @@ -3,4 +3,4 @@ OPT = -O3 -g include golpe/rules.mk -LDLIBS += -lsecp256k1 -lb2 +LDLIBS += -lsecp256k1 -lb2 -lzstd diff --git a/src/Decompressor.cpp b/src/Decompressor.cpp new file mode 100644 index 0000000..7455ddb --- /dev/null +++ b/src/Decompressor.cpp @@ -0,0 +1,5 @@ +#include "golpe.h" + +#include "Decompressor.h" + +DictionaryBroker globalDictionaryBroker; diff --git a/src/Decompressor.h b/src/Decompressor.h new file mode 100644 index 0000000..0a7a6ef --- /dev/null +++ b/src/Decompressor.h @@ -0,0 +1,69 @@ +#pragma once + +#include +#include + +#include +#include + +#include "golpe.h" + + +struct DictionaryBroker { + std::mutex mutex; + std::unordered_map dicts; + + ZSTD_DDict *getDict(lmdb::txn &txn, uint32_t dictId) { + std::lock_guard guard(mutex); + + auto it = dicts.find(dictId); + if (it != dicts.end()) return it->second; + + auto view = env.lookup_CompressionDictionary(txn, dictId); + if (!view) throw herr("couldn't find dictId ", dictId); + auto dictBuffer = view->dict(); + + auto *dict = dicts[dictId] = ZSTD_createDDict(dictBuffer.data(), dictBuffer.size()); + + return dict; + } +}; + +extern DictionaryBroker globalDictionaryBroker; + + +struct Decompressor { + ZSTD_DCtx *dctx; + std::unordered_map dicts; + std::string buffer; + + Decompressor() { + dctx = ZSTD_createDCtx(); + } + + ~Decompressor() { + ZSTD_freeDCtx(dctx); + } + + void reserve(size_t n) { + buffer.resize(n); + } + + // Return result only valid until one of: a) next call to decompress()/reserve(), or Decompressor destroyed + + std::string_view decompress(lmdb::txn &txn, uint32_t dictId, std::string_view src) { + auto it = dicts.find(dictId); + ZSTD_DDict *dict; + + if (it == dicts.end()) { + dict = dicts[dictId] = globalDictionaryBroker.getDict(txn, dictId); + } else { + dict = it->second; + } + + auto ret = ZSTD_decompress_usingDDict(dctx, buffer.data(), buffer.size(), src.data(), src.size(), dict); + if (ZDICT_isError(ret)) throw herr("zstd decompression failed: ", ZSTD_getErrorName(ret)); + + return std::string_view(buffer.data(), ret); + } +}; diff --git a/src/RelayReqMonitor.cpp b/src/RelayReqMonitor.cpp index 7c513ab..d9feb80 100644 --- a/src/RelayReqMonitor.cpp +++ b/src/RelayReqMonitor.cpp @@ -14,6 +14,7 @@ void RelayServer::runReqMonitor(ThreadPool::Thread &thr) { }); + Decompressor decomp; ActiveMonitors monitors; uint64_t currEventId = MAX_U64; @@ -29,7 +30,7 @@ void RelayServer::runReqMonitor(ThreadPool::Thread &thr) { if (auto msg = std::get_if(&newMsg.msg)) { env.foreach_Event(txn, [&](auto &ev){ if (msg->sub.filterGroup.doesMatch(ev.flat_nested())) { - sendEvent(msg->sub.connId, msg->sub.subId, getEventJson(txn, ev.primaryKeyId)); + sendEvent(msg->sub.connId, msg->sub.subId, getEventJson(txn, decomp, ev.primaryKeyId)); } return true; @@ -45,7 +46,7 @@ void RelayServer::runReqMonitor(ThreadPool::Thread &thr) { } else if (std::get_if(&newMsg.msg)) { env.foreach_Event(txn, [&](auto &ev){ monitors.process(txn, ev, [&](RecipientList &&recipients, uint64_t levId){ - sendEventToBatch(std::move(recipients), std::string(getEventJson(txn, levId))); + sendEventToBatch(std::move(recipients), std::string(getEventJson(txn, decomp, levId))); }); return true; }, false, currEventId + 1); diff --git a/src/RelayReqWorker.cpp b/src/RelayReqWorker.cpp index 32e1a3b..5b00d03 100644 --- a/src/RelayReqWorker.cpp +++ b/src/RelayReqWorker.cpp @@ -4,6 +4,7 @@ struct ActiveQueries : NonCopyable { + Decompressor decomp; using ConnQueries = std::map; std::map conns; // connId -> subId -> DBScanQuery* std::deque running; @@ -63,8 +64,12 @@ struct ActiveQueries : NonCopyable { return; } + auto cursor = lmdb::cursor::open(txn, env.dbi_EventPayload); + bool complete = q->process(txn, cfg().relay__queryTimesliceBudgetMicroseconds, cfg().relay__logging__dbScanPerf, [&](const auto &sub, uint64_t levId){ - server->sendEvent(sub.connId, sub.subId, getEventJson(txn, levId)); + std::string_view key = lmdb::to_sv(levId), val; + if (!cursor.get(key, val, MDB_SET_KEY)) throw herr("couldn't find event in EventPayload, corrupted DB?"); + server->sendEvent(sub.connId, sub.subId, decodeEventPayload(txn, decomp, val, nullptr, nullptr)); }); if (complete) { diff --git a/src/RelayServer.h b/src/RelayServer.h index bcad465..00b1ea1 100644 --- a/src/RelayServer.h +++ b/src/RelayServer.h @@ -168,23 +168,24 @@ struct RelayServer { hubTrigger->send(); } - void sendToConn(uint64_t connId, std::string &payload) { - tpWebsocket.dispatch(0, MsgWebsocket{MsgWebsocket::Send{connId, std::move(payload)}}); - hubTrigger->send(); - } - void sendToConnBinary(uint64_t connId, std::string &&payload) { tpWebsocket.dispatch(0, MsgWebsocket{MsgWebsocket::SendBinary{connId, std::move(payload)}}); hubTrigger->send(); } void sendEvent(uint64_t connId, const SubId &subId, std::string_view evJson) { - std::string reply = std::string("[\"EVENT\",\""); - reply += subId.sv(); + auto subIdSv = subId.sv(); + + std::string reply; + reply.reserve(13 + subIdSv.size() + evJson.size()); + + reply += "[\"EVENT\",\""; + reply += subIdSv; reply += "\","; reply += evJson; reply += "]"; - sendToConn(connId, reply); + + sendToConn(connId, std::move(reply)); } void sendEventToBatch(RecipientList &&list, std::string &&evJson) { diff --git a/src/RelayWebsocket.cpp b/src/RelayWebsocket.cpp index cea338c..0e856a8 100644 --- a/src/RelayWebsocket.cpp +++ b/src/RelayWebsocket.cpp @@ -1,6 +1,5 @@ -#include - #include "RelayServer.h" +#include "render.h" #include "app_git_version.h" @@ -19,46 +18,6 @@ static std::string preGenerateHttpResponse(const std::string &contentType, const }; -static std::string renderSize(uint64_t si) { - if (si < 1024) return std::to_string(si) + "b"; - - double s = si; - char buf[128]; - char unit; - - do { - s /= 1024; - if (s < 1024) { - unit = 'K'; - break; - } - - s /= 1024; - if (s < 1024) { - unit = 'M'; - break; - } - - s /= 1024; - if (s < 1024) { - unit = 'G'; - break; - } - - s /= 1024; - unit = 'T'; - } while(0); - - ::snprintf(buf, sizeof(buf), "%.2f%c", s, unit); - return std::string(buf); -} - -static std::string renderPercent(double p) { - char buf[128]; - ::snprintf(buf, sizeof(buf), "%.1f%%", p * 100); - return std::string(buf); -} - void RelayServer::runWebsocket(ThreadPool::Thread &thr) { struct Connection { @@ -198,15 +157,18 @@ void RelayServer::runWebsocket(ThreadPool::Thread &thr) { } else if (auto msg = std::get_if(&newMsg.msg)) { doSend(msg->connId, msg->payload, uWS::OpCode::BINARY); } else if (auto msg = std::get_if(&newMsg.msg)) { - for (auto &item : msg->list) { - tempBuf.clear(); - tempBuf += "[\"EVENT\",\""; - tempBuf += item.subId.sv(); - tempBuf += "\","; - tempBuf += msg->evJson; - tempBuf += "]"; + tempBuf.reserve(13 + MAX_SUBID_SIZE + msg->evJson.size()); + tempBuf.resize(10 + MAX_SUBID_SIZE); + tempBuf += "\","; + tempBuf += msg->evJson; + tempBuf += "]"; - doSend(item.connId, tempBuf, uWS::OpCode::TEXT); + for (auto &item : msg->list) { + auto subIdSv = item.subId.sv(); + auto *p = tempBuf.data() + MAX_SUBID_SIZE - subIdSv.size(); + memcpy(p, "[\"EVENT\",\"", 10); + memcpy(p + 10, subIdSv.data(), subIdSv.size()); + doSend(item.connId, std::string_view(p, 13 + subIdSv.size() + msg->evJson.size()), uWS::OpCode::TEXT); } } } diff --git a/src/cmd_dict.cpp b/src/cmd_dict.cpp new file mode 100644 index 0000000..fdf841d --- /dev/null +++ b/src/cmd_dict.cpp @@ -0,0 +1,236 @@ +#include +#include + +#include +#include + +#include +#include "golpe.h" + +#include "DBScan.h" +#include "events.h" +#include "render.h" + + +static const char USAGE[] = +R"( + Usage: + dict stats [--filter=] + dict train [--filter=] [--limit=] [--dictSize=] + dict compress [--filter=] [--dictId=] [--level=] + dict decompress [--filter=] +)"; + + +void cmd_dict(const std::vector &subArgs) { + std::map args = docopt::docopt(USAGE, subArgs, true, ""); + + std::string filterStr; + if (args["--filter"]) filterStr = args["--filter"].asString(); + else filterStr = "{}"; + + uint64_t limit = MAX_U64; + if (args["--limit"]) limit = args["--limit"].asLong(); + + uint64_t dictSize = 100'000; + if (args["--dictSize"]) dictSize = args["--dictSize"].asLong(); + + uint64_t dictId = 0; + if (args["--dictId"]) dictId = args["--dictId"].asLong(); + + int level = 3; + if (args["--level"]) level = args["--level"].asLong(); + + + Decompressor decomp; + std::vector levIds; + + { + auto txn = env.txn_ro(); + + auto filterGroup = NostrFilterGroup::unwrapped(tao::json::from_string(filterStr), MAX_U64); + Subscription sub(1, "junkSub", filterGroup); + DBScanQuery query(sub); + + while (1) { + bool complete = query.process(txn, MAX_U64, false, [&](const auto &sub, uint64_t levId){ + levIds.push_back(levId); + }); + + if (complete) break; + } + + LI << "Filter matched " << levIds.size() << " records"; + } + + + if (args["stats"].asBool()) { + uint64_t totalSize = 0; + uint64_t totalCompressedSize = 0; + uint64_t numCompressed = 0; + + auto txn = env.txn_ro(); + + std::map dicts; + + env.foreach_CompressionDictionary(txn, [&](auto &view){ + auto dictId = view.primaryKeyId; + if (!dicts.contains(dictId)) dicts[dictId] = 0; + return true; + }); + + for (auto levId : levIds) { + std::string_view raw; + + bool found = env.dbi_EventPayload.get(txn, lmdb::to_sv(levId), raw); + if (!found) throw herr("couldn't find event in EventPayload, corrupted DB?"); + + uint32_t dictId; + size_t outCompressedSize; + + auto json = decodeEventPayload(txn, decomp, raw, &dictId, &outCompressedSize); + + totalSize += json.size(); + totalCompressedSize += dictId ? outCompressedSize : json.size(); + + if (dictId) { + numCompressed++; + dicts[dictId]++; + } + } + + auto ratio = renderPercent(1.0 - (double)totalCompressedSize / totalSize); + + std::cout << "Num compressed: " << numCompressed << " / " << levIds.size() << "\n"; + std::cout << "Uncompressed size: " << renderSize(totalSize) << "\n"; + std::cout << "Compressed size: " << renderSize(totalCompressedSize) << " (" << ratio << ")" << "\n"; + std::cout << "\ndictId : events\n"; + + for (auto &[dictId, n] : dicts) { + std::cout << " " << dictId << " : " << n << "\n"; + } + } else if (args["train"].asBool()) { + std::string trainingBuf; + std::vector trainingSizes; + + { + auto txn = env.txn_ro(); + + if (levIds.size() > limit) { + LI << "Randomly selecting " << limit << " records"; + std::random_device rd; + std::mt19937 g(rd()); + std::shuffle(levIds.begin(), levIds.end(), g); + levIds.resize(limit); + } + + for (auto levId : levIds) { + std::string json = std::string(getEventJson(txn, decomp, levId)); + trainingBuf += json; + trainingSizes.emplace_back(json.size()); + } + } + + std::string dict(dictSize, '\0'); + + LI << "Performing zstd training..."; + + auto ret = ZDICT_trainFromBuffer(dict.data(), dict.size(), trainingBuf.data(), trainingSizes.data(), trainingSizes.size()); + if (ZDICT_isError(ret)) throw herr("zstd training failed: ", ZSTD_getErrorName(ret)); + + { + auto txn = env.txn_rw(); + + uint64_t newDictId = env.insert_CompressionDictionary(txn, dict); + + std::cout << "Saved new dictionary, dictId = " << newDictId << std::endl; + + txn.commit(); + } + } else if (args["compress"].asBool()) { + if (dictId == 0) throw herr("specify --dictId or --decompress"); + + auto txn = env.txn_rw(); + + auto view = env.lookup_CompressionDictionary(txn, dictId); + if (!view) throw herr("couldn't find dictId ", dictId); + auto dict = view->dict(); + + auto *cctx = ZSTD_createCCtx(); + auto *cdict = ZSTD_createCDict(dict.data(), dict.size(), level); + + uint64_t origSizes = 0; + uint64_t compressedSizes = 0; + uint64_t pendingFlush = 0; + uint64_t processed = 0; + + std::string compressedData(500'000, '\0'); + + for (auto levId : levIds) { + auto orig = getEventJson(txn, decomp, levId); + auto ret = ZSTD_compress_usingCDict(cctx, compressedData.data(), compressedData.size(), orig.data(), orig.size(), cdict); + if (ZDICT_isError(ret)) throw herr("zstd compression failed: ", ZSTD_getErrorName(ret)); + + origSizes += orig.size(); + compressedSizes += ret; + + std::string newVal; + + if (ret + 4 < orig.size()) { + newVal += '\x01'; + newVal += lmdb::to_sv(dictId); + newVal += std::string_view(compressedData.data(), ret); + } else { + newVal += '\x00'; + newVal += orig; + } + + env.dbi_EventPayload.put(txn, lmdb::to_sv(levId), newVal); + + pendingFlush++; + processed++; + if (pendingFlush > 10'000) { + txn.commit(); + + LI << "Progress: " << processed << "/" << levIds.size(); + pendingFlush = 0; + + txn = env.txn_rw(); + } + } + + txn.commit(); + + LI << "Original event sizes: " << origSizes; + LI << "New event sizes: " << compressedSizes; + } else if (args["decompress"].asBool()) { + auto txn = env.txn_rw(); + + uint64_t pendingFlush = 0; + uint64_t processed = 0; + + for (auto levId : levIds) { + auto orig = getEventJson(txn, decomp, levId); + + std::string newVal; + + newVal += '\x00'; + newVal += orig; + + env.dbi_EventPayload.put(txn, lmdb::to_sv(levId), newVal); + + pendingFlush++; + processed++; + if (pendingFlush > 10'000) { + txn.commit(); + + LI << "Progress: " << processed << "/" << levIds.size(); + pendingFlush = 0; + + txn = env.txn_rw(); + } + } + + txn.commit(); + } +} diff --git a/src/cmd_export.cpp b/src/cmd_export.cpp index 5228d41..90e94fd 100644 --- a/src/cmd_export.cpp +++ b/src/cmd_export.cpp @@ -20,6 +20,8 @@ void cmd_export(const std::vector &subArgs) { if (args["--since"]) since = args["--since"].asLong(); if (args["--until"]) until = args["--until"].asLong(); + Decompressor decomp; + auto txn = env.txn_ro(); env.generic_foreachFull(txn, env.dbi_Event__created_at, lmdb::to_sv(since), lmdb::to_sv(0), [&](auto k, auto v) { @@ -32,7 +34,7 @@ void cmd_export(const std::vector &subArgs) { if (isEphemeralEvent(view->flat_nested()->kind())) return true; } - std::cout << getEventJson(txn, view->primaryKeyId) << "\n"; + std::cout << getEventJson(txn, decomp, view->primaryKeyId) << "\n"; return true; }); diff --git a/src/cmd_monitor.cpp b/src/cmd_monitor.cpp index 91ad0c8..6c7304f 100644 --- a/src/cmd_monitor.cpp +++ b/src/cmd_monitor.cpp @@ -21,6 +21,7 @@ void cmd_monitor(const std::vector &subArgs) { auto txn = env.txn_ro(); + Decompressor decomp; ActiveMonitors monitors; std::string line; @@ -57,7 +58,7 @@ void cmd_monitor(const std::vector &subArgs) { monitors.process(txn, ev, [&](RecipientList &&recipients, uint64_t levId){ for (auto &r : recipients) { if (r.connId == interestConnId && r.subId.str() == interestSubId) { - std::cout << getEventJson(txn, levId) << "\n"; + std::cout << getEventJson(txn, decomp, levId) << "\n"; } } }); diff --git a/src/cmd_scan.cpp b/src/cmd_scan.cpp index 10efce0..b533ac4 100644 --- a/src/cmd_scan.cpp +++ b/src/cmd_scan.cpp @@ -23,20 +23,21 @@ void cmd_scan(const std::vector &subArgs) { bool metrics = false; if (args["--metrics"]) metrics = true; - std::string filterStr = args[""].asString(); - auto filterGroup = NostrFilterGroup::unwrapped(tao::json::from_string(filterStr)); + + auto filterGroup = NostrFilterGroup::unwrapped(tao::json::from_string(filterStr), MAX_U64); Subscription sub(1, "junkSub", filterGroup); - DBScanQuery query(sub); + Decompressor decomp; + auto txn = env.txn_ro(); while (1) { bool complete = query.process(txn, pause ? pause : MAX_U64, metrics, [&](const auto &sub, uint64_t levId){ - std::cout << getEventJson(txn, levId) << "\n"; + std::cout << getEventJson(txn, decomp, levId) << "\n"; }); if (complete) break; diff --git a/src/cmd_stream.cpp b/src/cmd_stream.cpp index ef54212..9917d10 100644 --- a/src/cmd_stream.cpp +++ b/src/cmd_stream.cpp @@ -34,6 +34,7 @@ void cmd_stream(const std::vector &subArgs) { std::unordered_set downloadedIds; WriterPipeline writer; WSConnection ws(url); + Decompressor decomp; ws.onConnect = [&]{ if (dir == "down" || dir == "both") { @@ -98,7 +99,7 @@ void cmd_stream(const std::vector &subArgs) { } std::string msg = std::string("[\"EVENT\","); - msg += getEventJson(txn, ev.primaryKeyId); + msg += getEventJson(txn, decomp, ev.primaryKeyId); msg += "]"; ws.send(msg); diff --git a/src/events.cpp b/src/events.cpp index 52dc3e5..80364e6 100644 --- a/src/events.cpp +++ b/src/events.cpp @@ -169,13 +169,44 @@ uint64_t getMostRecentLevId(lmdb::txn &txn) { return levId; } -std::string_view getEventJson(lmdb::txn &txn, uint64_t levId) { - std::string_view raw; - bool found = env.dbi_EventPayload.get(txn, lmdb::to_sv(levId), raw); - if (!found) throw herr("couldn't find leaf node in quadrable, corrupted DB?"); - return raw.substr(1); + +// Return result validity same as getEventJson(), see below + +std::string_view decodeEventPayload(lmdb::txn &txn, Decompressor &decomp, std::string_view raw, uint32_t *outDictId, size_t *outCompressedSize) { + if (raw.size() == 0) throw herr("empty event in EventPayload"); + + if (raw[0] == '\x00') { + if (outDictId) *outDictId = 0; + return raw.substr(1); + } else if (raw[0] == '\x01') { + raw = raw.substr(1); + if (raw.size() < 4) throw herr("EventPayload record too short to read dictId"); + uint32_t dictId = lmdb::from_sv(raw.substr(0, 4)); + raw = raw.substr(4); + + decomp.reserve(cfg().events__maxEventSize); + std::string_view buf = decomp.decompress(txn, dictId, raw); + + if (outDictId) *outDictId = dictId; + if (outCompressedSize) *outCompressedSize = raw.size(); + return buf; + } else { + throw("Unexpected first byte in EventPayload"); + } } +// Return result only valid until on of: next call to getEventJson/decodeEventPayload, write on or closing of txn, or any action on decomp object + +std::string_view getEventJson(lmdb::txn &txn, Decompressor &decomp, uint64_t levId) { + std::string_view raw; + + bool found = env.dbi_EventPayload.get(txn, lmdb::to_sv(levId), raw); + if (!found) throw herr("couldn't find event in EventPayload, corrupted DB?"); + + return decodeEventPayload(txn, decomp, raw, nullptr, nullptr); +} + + void writeEvents(lmdb::txn &txn, quadrable::Quadrable &qdb, std::vector &evs) { diff --git a/src/events.h b/src/events.h index d6529b8..b87b49b 100644 --- a/src/events.h +++ b/src/events.h @@ -4,6 +4,7 @@ #include "golpe.h" +#include "Decompressor.h" #include "constants.h" @@ -46,7 +47,8 @@ inline const NostrIndex::Event *flatStrToFlatEvent(std::string_view flatStr) { std::optional lookupEventById(lmdb::txn &txn, std::string_view id); uint64_t getMostRecentLevId(lmdb::txn &txn); -std::string_view getEventJson(lmdb::txn &txn, uint64_t levId); +std::string_view decodeEventPayload(lmdb::txn &txn, Decompressor &decomp, std::string_view raw, uint32_t *outDictId, size_t *outCompressedSize); +std::string_view getEventJson(lmdb::txn &txn, Decompressor &decomp, uint64_t levId); inline quadrable::Key flatEventToQuadrableKey(const NostrIndex::Event *flat) { return quadrable::Key::fromIntegerAndHash(flat->created_at(), sv(flat->id()).substr(0, 23)); diff --git a/src/filters.h b/src/filters.h index 02f121d..ab1d054 100644 --- a/src/filters.h +++ b/src/filters.h @@ -122,7 +122,7 @@ struct NostrFilter { bool neverMatch = false; bool indexOnlyScans = false; - explicit NostrFilter(const tao::json::value &filterObj) { + explicit NostrFilter(const tao::json::value &filterObj, uint64_t maxFilterLimit) { uint64_t numMajorFields = 0; for (const auto &[k, v] : filterObj.get_object()) { @@ -166,7 +166,7 @@ struct NostrFilter { if (tags.size() > 2) throw herr("too many tags in filter"); // O(N^2) in matching, just prohibit it - if (limit > cfg().relay__maxFilterLimit) limit = cfg().relay__maxFilterLimit; + if (limit > maxFilterLimit) limit = maxFilterLimit; indexOnlyScans = numMajorFields <= 1; // FIXME: pubkeyKind scan could be serviced index-only too @@ -219,18 +219,18 @@ struct NostrFilterGroup { std::vector filters; // Note that this expects the full array, so the first two items are "REQ" and the subId - NostrFilterGroup(const tao::json::value &req) { + NostrFilterGroup(const tao::json::value &req, uint64_t maxFilterLimit = cfg().relay__maxFilterLimit) { const auto &arr = req.get_array(); if (arr.size() < 3) throw herr("too small"); for (size_t i = 2; i < arr.size(); i++) { - filters.emplace_back(arr[i]); + filters.emplace_back(arr[i], maxFilterLimit); if (filters.back().neverMatch) filters.pop_back(); } } // Hacky! Deserves a refactor - static NostrFilterGroup unwrapped(tao::json::value filter) { + static NostrFilterGroup unwrapped(tao::json::value filter, uint64_t maxFilterLimit = cfg().relay__maxFilterLimit) { if (!filter.is_array()) { filter = tao::json::value::array({ filter }); } @@ -241,7 +241,7 @@ struct NostrFilterGroup { pretendReqQuery.push_back(e); } - return NostrFilterGroup(pretendReqQuery); + return NostrFilterGroup(pretendReqQuery, maxFilterLimit); } bool doesMatch(const NostrIndex::Event *ev) const { diff --git a/src/render.h b/src/render.h new file mode 100644 index 0000000..2acfb6b --- /dev/null +++ b/src/render.h @@ -0,0 +1,44 @@ +#pragma once + +#include + + +inline std::string renderSize(uint64_t si) { + if (si < 1024) return std::to_string(si) + "b"; + + double s = si; + char buf[128]; + char unit; + + do { + s /= 1024; + if (s < 1024) { + unit = 'K'; + break; + } + + s /= 1024; + if (s < 1024) { + unit = 'M'; + break; + } + + s /= 1024; + if (s < 1024) { + unit = 'G'; + break; + } + + s /= 1024; + unit = 'T'; + } while(0); + + ::snprintf(buf, sizeof(buf), "%.2f%c", s, unit); + return std::string(buf); +} + +inline std::string renderPercent(double p) { + char buf[128]; + ::snprintf(buf, sizeof(buf), "%.1f%%", p * 100); + return std::string(buf); +} diff --git a/test/filterFuzzTest.pl b/test/filterFuzzTest.pl index eab026d..ec5d869 100644 --- a/test/filterFuzzTest.pl +++ b/test/filterFuzzTest.pl @@ -188,8 +188,8 @@ sub testScan { #print JSON::XS->new->pretty(1)->encode($fg); print "$fge\n"; - my $resA = `./strfry --config test/strfry.conf export 2>/dev/null | perl test/dumbFilter.pl '$fge' | jq -r .pubkey | sort | sha256sum`; - my $resB = `./strfry --config test/strfry.conf scan '$fge' | jq -r .pubkey | sort | sha256sum`; + my $resA = `./strfry export 2>/dev/null | perl test/dumbFilter.pl '$fge' | jq -r .pubkey | sort | sha256sum`; + my $resB = `./strfry scan '$fge' | jq -r .pubkey | sort | sha256sum`; print "$resA\n$resB\n"; @@ -220,7 +220,7 @@ if ($cmd eq 'scan') { print "filt: $fge\n\n"; print "DOING MONS\n"; - my $pid = open2(my $outfile, my $infile, './strfry --config test/strfry.conf monitor | jq -r .pubkey | sort | sha256sum'); + my $pid = open2(my $outfile, my $infile, './strfry monitor | jq -r .pubkey | sort | sha256sum'); for my $c (@$monCmds) { print $infile encode_json($c), "\n"; } close($infile); @@ -231,7 +231,7 @@ if ($cmd eq 'scan') { die "monitor cmd died" if $child_exit_status; print "DOING SCAN\n"; - my $resB = `./strfry --config test/strfry.conf scan '$fge' 2>/dev/null | jq -r .pubkey | sort | sha256sum`; + my $resB = `./strfry scan '$fge' 2>/dev/null | jq -r .pubkey | sort | sha256sum`; print "$resA\n$resB\n"; diff --git a/test/strfry.conf b/test/strfry.conf deleted file mode 100644 index 1b4b462..0000000 --- a/test/strfry.conf +++ /dev/null @@ -1,6 +0,0 @@ -db = "./strfry-db/" - -relay { - port = 7777 - maxFilterLimit = 1000000000000 -} From eac2095c833cbdb2af5a24095e7ef7cc08159961 Mon Sep 17 00:00:00 2001 From: Doug Hoyte Date: Wed, 25 Jan 2023 12:21:23 -0500 Subject: [PATCH 05/51] indicate which config fields require restart --- golpe | 2 +- golpe.yaml | 4 ++++ strfry.conf | 14 +++++++++----- 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/golpe b/golpe index 511e70d..9d0519c 160000 --- a/golpe +++ b/golpe @@ -1 +1 @@ -Subproject commit 511e70d7b9a295d0861884ee0b368605bfe460c3 +Subproject commit 9d0519c1b019577dae72e1c0f58bce276ba769cd diff --git a/golpe.yaml b/golpe.yaml index 0babc3d..c66eadd 100644 --- a/golpe.yaml +++ b/golpe.yaml @@ -140,15 +140,19 @@ config: default: false - name: relay__numThreads__ingester + desc: Ingester threads: route incoming requests, validate events/sigs default: 3 noReload: true - name: relay__numThreads__reqWorker + desc: reqWorker threads: Handle initial DB scan for events default: 3 noReload: true - name: relay__numThreads__reqMonitor + desc: reqMonitor threads: Handle filtering of new events default: 3 noReload: true - name: relay__numThreads__yesstr + desc: yesstr threads: Experimental yesstr protocol default: 1 noReload: true diff --git a/strfry.conf b/strfry.conf index 49cf3bb..8b3620d 100644 --- a/strfry.conf +++ b/strfry.conf @@ -2,14 +2,14 @@ ## Default strfry config ## -# Directory that contains strfry database +# Directory that contains strfry database (restart required) db = "./strfry-db/" relay { - # Interface to listen on. Use 0.0.0.0 to listen on all interfaces + # Interface to listen on. Use 0.0.0.0 to listen on all interfaces (restart required) bind = "127.0.0.1" - # Port to open for the nostr websocket protocol + # Port to open for the nostr websocket protocol (restart required) port = 7777 info { @@ -26,10 +26,10 @@ relay { contact = "unset" } - # Maximum accepted incoming websocket frame size (should be larger than max event and yesstr msg) + # Maximum accepted incoming websocket frame size (should be larger than max event and yesstr msg) (restart required) maxWebsocketPayloadSize = 131072 - # Websocket-level PING message frequency (should be less than any reverse proxy idle timeouts) + # Websocket-level PING message frequency (should be less than any reverse proxy idle timeouts) (restart required) autoPingSeconds = 55 # If TCP keep-alive should be enabled (detect dropped connections to upstream reverse proxy) @@ -56,12 +56,16 @@ relay { } numThreads { + # Ingester threads: route incoming requests, validate events/sigs (restart required) ingester = 3 + # reqWorker threads: Handle initial DB scan for events (restart required) reqWorker = 3 + # reqMonitor threads: Handle filtering of new events (restart required) reqMonitor = 3 + # yesstr threads: Experimental yesstr protocol (restart required) yesstr = 1 } } From 485abee8edfbd440645ce382abcedf3f3645917c Mon Sep 17 00:00:00 2001 From: Doug Hoyte Date: Wed, 25 Jan 2023 15:16:39 -0500 Subject: [PATCH 06/51] quadrable node gc, clean-up writes, consolidate deletion --- golpe | 2 +- src/RelayCron.cpp | 5 +---- src/cmd_compact.cpp | 55 +++++++++++++++++++++++++++++++++++++-------- src/events.cpp | 37 ++++++++++++++---------------- src/events.h | 1 + 5 files changed, 66 insertions(+), 34 deletions(-) diff --git a/golpe b/golpe index 9d0519c..a655f8f 160000 --- a/golpe +++ b/golpe @@ -1 +1 @@ -Subproject commit 9d0519c1b019577dae72e1c0f58bce276ba769cd +Subproject commit a655f8f5b2dc90034ad62095f111ec2635a4f000 diff --git a/src/RelayCron.cpp b/src/RelayCron.cpp index 1f11cd0..ef1d80c 100644 --- a/src/RelayCron.cpp +++ b/src/RelayCron.cpp @@ -52,11 +52,8 @@ void RelayServer::cleanupOldEvents() { for (auto levId : expiredLevIds) { auto view = env.lookup_Event(txn, levId); if (!view) continue; // Deleted in between transactions - + deleteEvent(txn, changes, *view); numDeleted++; - changes.del(flatEventToQuadrableKey(view->flat_nested())); - env.delete_Event(txn, levId); - env.dbi_EventPayload.del(txn, lmdb::to_sv(levId)); } changes.apply(txn); diff --git a/src/cmd_compact.cpp b/src/cmd_compact.cpp index 11604f6..efffdb1 100644 --- a/src/cmd_compact.cpp +++ b/src/cmd_compact.cpp @@ -4,27 +4,64 @@ #include #include "golpe.h" +#include "render.h" + static const char USAGE[] = R"( Usage: - compact + compact export + compact quad-gc )"; void cmd_compact(const std::vector &subArgs) { std::map args = docopt::docopt(USAGE, subArgs, true, ""); - std::string outputFile = args[""].asString(); + if (args["export"].asBool()) { + std::string outputFile = args[""].asString(); - if (outputFile == "-") { - env.copy_fd(1); - } else { - if (access(outputFile.c_str(), F_OK) == 0) throw herr("output file '", outputFile, "' exists, not overwriting"); + if (outputFile == "-") { + env.copy_fd(1); + } else { + if (access(outputFile.c_str(), F_OK) == 0) throw herr("output file '", outputFile, "' exists, not overwriting"); - auto *f = ::fopen(outputFile.c_str(), "w"); - if (!f) throw herr("opening output file '", outputFile, "' failed: ", strerror(errno)); + auto *f = ::fopen(outputFile.c_str(), "w"); + if (!f) throw herr("opening output file '", outputFile, "' failed: ", strerror(errno)); - env.copy_fd(::fileno(f)); + env.copy_fd(::fileno(f)); + } + } else if (args["quad-gc"].asBool()) { + quadrable::Quadrable qdb; + { + auto txn = env.txn_ro(); + qdb.init(txn); + } + qdb.checkout("events"); + + quadrable::Quadrable::GarbageCollector gc(qdb); + + { + auto txn = env.txn_ro(); + gc.markAllHeads(txn); + } + + { + auto txn = env.txn_rw(); + + auto stats = gc.sweep(txn); + /* + auto stats = gc.sweep(txn, [&](uint64_t nodeId){ + quadrable::Quadrable::ParsedNode node(&qdb, txn, nodeId); + if (!node.isBranch()) throw herr("unexpected quadrable node type during gc: ", (int)node.nodeType); + return true; + }); + */ + + txn.commit(); + + LI << "Total nodes: " << stats.total; + LI << "Collected: " << stats.collected << " (" << renderPercent((double)stats.collected / stats.total) << ")"; + } } } diff --git a/src/events.cpp b/src/events.cpp index 80364e6..a7403fc 100644 --- a/src/events.cpp +++ b/src/events.cpp @@ -195,7 +195,7 @@ std::string_view decodeEventPayload(lmdb::txn &txn, Decompressor &decomp, std::s } } -// Return result only valid until on of: next call to getEventJson/decodeEventPayload, write on or closing of txn, or any action on decomp object +// Return result only valid until one of: next call to getEventJson/decodeEventPayload, write to/closing of txn, or any action on decomp object std::string_view getEventJson(lmdb::txn &txn, Decompressor &decomp, uint64_t levId) { std::string_view raw; @@ -209,12 +209,20 @@ std::string_view getEventJson(lmdb::txn &txn, Decompressor &decomp, uint64_t lev + +void deleteEvent(lmdb::txn &txn, quadrable::Quadrable::UpdateSet &changes, defaultDb::environment::View_Event &ev) { + changes.del(flatEventToQuadrableKey(ev.flat_nested())); + env.dbi_EventPayload.del(txn, lmdb::to_sv(ev.primaryKeyId)); + env.delete_Event(txn, ev.primaryKeyId); +} + + + void writeEvents(lmdb::txn &txn, quadrable::Quadrable &qdb, std::vector &evs) { std::sort(evs.begin(), evs.end(), [](auto &a, auto &b) { return a.quadKey < b.quadKey; }); auto changes = qdb.change(); - - std::vector levIdsToDelete; + std::string tmpBuf; for (size_t i = 0; i < evs.size(); i++) { auto &ev = evs[i]; @@ -250,8 +258,7 @@ void writeEvents(lmdb::txn &txn, quadrable::Quadrable &qdb, std::vectorflat_nested())); - levIdsToDelete.push_back(otherLevId); + deleteEvent(txn, changes, *otherEv); } } @@ -262,26 +269,12 @@ void writeEvents(lmdb::txn &txn, quadrable::Quadrable &qdb, std::vectorval())); if (otherEv && sv(otherEv->flat_nested()->pubkey()) == sv(flat->pubkey())) { LI << "Deleting event. id=" << to_hex(sv(tagPair->val())); - changes.del(flatEventToQuadrableKey(otherEv->flat_nested())); - levIdsToDelete.push_back(otherEv->primaryKeyId); + deleteEvent(txn, changes, *otherEv); } } } } - if (ev.status == EventWriteStatus::Pending) changes.put(ev.quadKey, ""); - } - - changes.apply(txn); - - for (auto levId : levIdsToDelete) { - env.delete_Event(txn, levId); - env.dbi_EventPayload.del(txn, lmdb::to_sv(levId)); - } - - std::string tmpBuf; - - for (auto &ev : evs) { if (ev.status == EventWriteStatus::Pending) { ev.levId = env.insert_Event(txn, ev.receivedAt, ev.flatStr); @@ -290,7 +283,11 @@ void writeEvents(lmdb::txn &txn, quadrable::Quadrable &qdb, std::vector(ev.levId), tmpBuf); + changes.put(ev.quadKey, ""); + ev.status = EventWriteStatus::Written; } } + + changes.apply(txn); } diff --git a/src/events.h b/src/events.h index b87b49b..a8f5eab 100644 --- a/src/events.h +++ b/src/events.h @@ -87,3 +87,4 @@ struct EventToWrite { void writeEvents(lmdb::txn &txn, quadrable::Quadrable &qdb, std::vector &evs); +void deleteEvent(lmdb::txn &txn, quadrable::Quadrable::UpdateSet &changes, defaultDb::environment::View_Event &ev); From 9e2bc45a466df940adcf7da9389575a6e89ac551 Mon Sep 17 00:00:00 2001 From: Doug Hoyte Date: Wed, 25 Jan 2023 16:31:31 -0500 Subject: [PATCH 07/51] use 5+27 length for quadrable keys instead of 9+23 - reduces quadrable branching - increases collision resistance - good until year 2514 --- golpe | 2 +- src/constants.h | 3 ++- src/events.cpp | 2 +- src/events.h | 4 +++- 4 files changed, 7 insertions(+), 4 deletions(-) diff --git a/golpe b/golpe index a655f8f..f84f71a 160000 --- a/golpe +++ b/golpe @@ -1 +1 @@ -Subproject commit a655f8f5b2dc90034ad62095f111ec2635a4f000 +Subproject commit f84f71a2f0758a7dd0df19e6091656737b9f9797 diff --git a/src/constants.h b/src/constants.h index fb10191..89ea623 100644 --- a/src/constants.h +++ b/src/constants.h @@ -1,3 +1,4 @@ #pragma once -const size_t MAX_SUBID_SIZE = 63; +const size_t MAX_SUBID_SIZE = 63; // Statically allocated size in SubId +const uint64_t MAX_TIMESTAMP = 17179869184; // Safety limit to ensure it can fit in quadrable key. Good until year 2514. diff --git a/src/events.cpp b/src/events.cpp index a7403fc..e66d9b1 100644 --- a/src/events.cpp +++ b/src/events.cpp @@ -119,7 +119,7 @@ void verifyEventTimestamp(const NostrIndex::Event *flat) { uint64_t latest = now + cfg().events__rejectEventsNewerThanSeconds; if (ts < earliest) throw herr("created_at too early"); - if (ts > latest) throw herr("created_at too late"); + if (ts > latest || ts > MAX_TIMESTAMP) throw herr("created_at too late"); } void parseAndVerifyEvent(const tao::json::value &origJson, secp256k1_context *secpCtx, bool verifyMsg, bool verifyTime, std::string &flatStr, std::string &jsonStr) { diff --git a/src/events.h b/src/events.h index a8f5eab..ff46c28 100644 --- a/src/events.h +++ b/src/events.h @@ -51,7 +51,9 @@ std::string_view decodeEventPayload(lmdb::txn &txn, Decompressor &decomp, std::s std::string_view getEventJson(lmdb::txn &txn, Decompressor &decomp, uint64_t levId); inline quadrable::Key flatEventToQuadrableKey(const NostrIndex::Event *flat) { - return quadrable::Key::fromIntegerAndHash(flat->created_at(), sv(flat->id()).substr(0, 23)); + uint64_t timestamp = flat->created_at(); + if (timestamp > MAX_TIMESTAMP) throw herr("timestamp is too large to encode in quadrable key"); + return quadrable::Key::fromIntegerAndHash(timestamp, sv(flat->id()).substr(0, 27)); } From 9b83093be7b44f70d8e65b932bf1e9e25444b7ca Mon Sep 17 00:00:00 2001 From: Doug Hoyte Date: Sat, 28 Jan 2023 15:40:24 -0500 Subject: [PATCH 08/51] bump --- golpe | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/golpe b/golpe index f84f71a..3da8c91 160000 --- a/golpe +++ b/golpe @@ -1 +1 @@ -Subproject commit f84f71a2f0758a7dd0df19e6091656737b9f9797 +Subproject commit 3da8c91ddb22b3bd7f62e4a94b4afbe56c2c65c9 From ee612416e08b20048a159932f98180ea1bb906d6 Mon Sep 17 00:00:00 2001 From: Doug Hoyte Date: Sun, 29 Jan 2023 04:22:30 -0500 Subject: [PATCH 09/51] index-only scans for pubkey+kind --- TODO | 1 - src/DBScan.h | 39 +++++++++++++++++++++++++++------------ src/filters.h | 3 +-- 3 files changed, 28 insertions(+), 15 deletions(-) diff --git a/TODO b/TODO index 5a58bbf..e062d76 100644 --- a/TODO +++ b/TODO @@ -15,7 +15,6 @@ features less verbose default logging nice new config "units" feature, ie 1d instead of 86400 make it easier for a thread to setup a quadrable env - opt: PubkeyKind scans could be done index-only rate limits slow-reader detection and back-pressure diff --git a/src/DBScan.h b/src/DBScan.h index a6ebd86..dddc178 100644 --- a/src/DBScan.h +++ b/src/DBScan.h @@ -49,10 +49,16 @@ struct DBScan { std::string resumeKey; uint64_t resumeVal; + enum class KeyMatchResult { + Yes, + No, + NoButContinue, + }; + std::function isComplete; std::function nextFilterItem; std::function resetResume; - std::function keyMatch; + std::function keyMatch; DBScan(const NostrFilter &f_) : f(f_) { remainingLimit = f.limit; @@ -74,7 +80,7 @@ struct DBScan { resumeVal = MAX_U64; }; keyMatch = [&, state](std::string_view k, bool&){ - return k.starts_with(state->prefix); + return k.starts_with(state->prefix) ? KeyMatchResult::Yes : KeyMatchResult::No; }; } else if (f.authors && f.kinds) { scanState = PubkeyKindScan{}; @@ -98,16 +104,22 @@ struct DBScan { resumeVal = MAX_U64; }; keyMatch = [&, state](std::string_view k, bool &skipBack){ - if (!k.starts_with(state->prefix)) return false; - if (state->prefix.size() == 32 + 8) return true; + if (!k.starts_with(state->prefix)) return KeyMatchResult::No; + if (state->prefix.size() == 32 + 8) return KeyMatchResult::Yes; ParsedKey_StringUint64Uint64 parsedKey(k); - if (parsedKey.n1 <= f.kinds->at(state->indexKind)) return true; + if (parsedKey.n1 == f.kinds->at(state->indexKind)) { + return KeyMatchResult::Yes; + } else if (parsedKey.n1 < f.kinds->at(state->indexKind)) { + // With a prefix pubkey, continue scanning (pubkey,kind) backwards because with this index + // we don't know the next pubkey to jump back to + return KeyMatchResult::NoButContinue; + } resumeKey = makeKey_StringUint64Uint64(parsedKey.s, f.kinds->at(state->indexKind), MAX_U64); resumeVal = MAX_U64; skipBack = true; - return false; + return KeyMatchResult::No; }; } else if (f.authors) { scanState = PubkeyScan{}; @@ -126,7 +138,7 @@ struct DBScan { resumeVal = MAX_U64; }; keyMatch = [&, state](std::string_view k, bool&){ - return k.starts_with(state->prefix); + return k.starts_with(state->prefix) ? KeyMatchResult::Yes : KeyMatchResult::No; }; } else if (f.tags.size()) { scanState = TagScan{f.tags.begin()}; @@ -150,7 +162,7 @@ struct DBScan { resumeVal = MAX_U64; }; keyMatch = [&, state](std::string_view k, bool&){ - return k.substr(0, state->search.size()) == state->search; + return k.substr(0, state->search.size()) == state->search ? KeyMatchResult::Yes : KeyMatchResult::No; }; } else if (f.kinds) { scanState = KindScan{}; @@ -170,7 +182,7 @@ struct DBScan { }; keyMatch = [&, state](std::string_view k, bool&){ ParsedKey_Uint64Uint64 parsedKey(k); - return parsedKey.n1 == state->kind; + return parsedKey.n1 == state->kind ? KeyMatchResult::Yes : KeyMatchResult::No; }; } else { scanState = CreatedAtScan{}; @@ -188,7 +200,7 @@ struct DBScan { resumeVal = MAX_U64; }; keyMatch = [&, state](std::string_view k, bool&){ - return true; + return KeyMatchResult::Yes; }; } } @@ -208,7 +220,8 @@ struct DBScan { return false; } - if (!keyMatch(k, skipBack)) return false; + auto matched = keyMatch(k, skipBack); + if (matched == KeyMatchResult::No) return false; uint64_t created; @@ -234,7 +247,9 @@ struct DBScan { bool sent = false; uint64_t levId = lmdb::from_sv(v); - if (f.indexOnlyScans) { + if (matched == KeyMatchResult::NoButContinue) { + // Don't attempt to match filter + } else if (f.indexOnlyScans) { if (f.doesMatchTimes(created)) { handleEvent(levId); sent = true; diff --git a/src/filters.h b/src/filters.h index ab1d054..cdabe44 100644 --- a/src/filters.h +++ b/src/filters.h @@ -168,8 +168,7 @@ struct NostrFilter { if (limit > maxFilterLimit) limit = maxFilterLimit; - indexOnlyScans = numMajorFields <= 1; - // FIXME: pubkeyKind scan could be serviced index-only too + indexOnlyScans = (numMajorFields <= 1) || (numMajorFields == 2 && authors && kinds); } bool doesMatchTimes(uint64_t created) const { From 5117485ebf4552349aab5328f02eb4e9321cb176 Mon Sep 17 00:00:00 2001 From: Doug Hoyte Date: Sun, 29 Jan 2023 15:52:27 -0500 Subject: [PATCH 10/51] GC improvements --- golpe | 2 +- src/RelayCron.cpp | 13 +++++++++++++ src/RelayServer.h | 1 + src/cmd_compact.cpp | 27 ++------------------------- src/cmd_import.cpp | 4 ++++ src/cmd_relay.cpp | 4 ++++ src/gc.h | 37 +++++++++++++++++++++++++++++++++++++ 7 files changed, 62 insertions(+), 26 deletions(-) create mode 100644 src/gc.h diff --git a/golpe b/golpe index 3da8c91..ea1ea8f 160000 --- a/golpe +++ b/golpe @@ -1 +1 @@ -Subproject commit 3da8c91ddb22b3bd7f62e4a94b4afbe56c2c65c9 +Subproject commit ea1ea8f5ce1208fef8fe895f68b623369276f8de diff --git a/src/RelayCron.cpp b/src/RelayCron.cpp index ef1d80c..5d41802 100644 --- a/src/RelayCron.cpp +++ b/src/RelayCron.cpp @@ -1,5 +1,7 @@ #include "RelayServer.h" +#include "gc.h" + void RelayServer::cleanupOldEvents() { std::vector expiredLevIds; @@ -63,3 +65,14 @@ void RelayServer::cleanupOldEvents() { if (numDeleted) LI << "Deleted " << numDeleted << " ephemeral events"; } } + +void RelayServer::garbageCollect() { + quadrable::Quadrable qdb; + { + auto txn = env.txn_ro(); + qdb.init(txn); + } + qdb.checkout("events"); + + quadrableGarbageCollect(qdb, 1); +} diff --git a/src/RelayServer.h b/src/RelayServer.h index 00b1ea1..d044e74 100644 --- a/src/RelayServer.h +++ b/src/RelayServer.h @@ -160,6 +160,7 @@ struct RelayServer { void runYesstr(ThreadPool::Thread &thr); void cleanupOldEvents(); + void garbageCollect(); // Utils (can be called by any thread) diff --git a/src/cmd_compact.cpp b/src/cmd_compact.cpp index efffdb1..2f994fd 100644 --- a/src/cmd_compact.cpp +++ b/src/cmd_compact.cpp @@ -4,7 +4,7 @@ #include #include "golpe.h" -#include "render.h" +#include "gc.h" static const char USAGE[] = @@ -39,29 +39,6 @@ void cmd_compact(const std::vector &subArgs) { } qdb.checkout("events"); - quadrable::Quadrable::GarbageCollector gc(qdb); - - { - auto txn = env.txn_ro(); - gc.markAllHeads(txn); - } - - { - auto txn = env.txn_rw(); - - auto stats = gc.sweep(txn); - /* - auto stats = gc.sweep(txn, [&](uint64_t nodeId){ - quadrable::Quadrable::ParsedNode node(&qdb, txn, nodeId); - if (!node.isBranch()) throw herr("unexpected quadrable node type during gc: ", (int)node.nodeType); - return true; - }); - */ - - txn.commit(); - - LI << "Total nodes: " << stats.total; - LI << "Collected: " << stats.collected << " (" << renderPercent((double)stats.collected / stats.total) << ")"; - } + quadrableGarbageCollect(qdb, 2); } } diff --git a/src/cmd_import.cpp b/src/cmd_import.cpp index f7e2de5..0100f01 100644 --- a/src/cmd_import.cpp +++ b/src/cmd_import.cpp @@ -5,6 +5,7 @@ #include "events.h" #include "filters.h" +#include "gc.h" static const char USAGE[] = @@ -59,6 +60,7 @@ void cmd_import(const std::vector &subArgs) { logStatus(); LI << "Committing " << numCommits << " records"; + txn.commit(); txn = env.txn_rw(); @@ -91,5 +93,7 @@ void cmd_import(const std::vector &subArgs) { flushChanges(); + quadrableGarbageCollect(qdb, 2); + txn.commit(); } diff --git a/src/cmd_relay.cpp b/src/cmd_relay.cpp index bc2f0d0..d1cc0b3 100644 --- a/src/cmd_relay.cpp +++ b/src/cmd_relay.cpp @@ -48,6 +48,10 @@ void RelayServer::run() { cleanupOldEvents(); }); + cron.repeat(60 * 60 * 1'000'000UL, [&]{ + garbageCollect(); + }); + cron.setupCb = []{ setThreadName("cron"); }; cron.run(); diff --git a/src/gc.h b/src/gc.h new file mode 100644 index 0000000..334e8bb --- /dev/null +++ b/src/gc.h @@ -0,0 +1,37 @@ +#pragma once + +#include + +#include "golpe.h" + +#include "render.h" + + +inline void quadrableGarbageCollect(quadrable::Quadrable &qdb, int logLevel = 0) { + quadrable::Quadrable::GarbageCollector> gc(qdb); + quadrable::Quadrable::GCStats stats; + + if (logLevel >= 2) LI << "Running garbage collection"; + + { + auto txn = env.txn_ro(); + + if (logLevel >= 2) LI << "GC: mark phase"; + gc.markAllHeads(txn); + if (logLevel >= 2) LI << "GC: sweep phase"; + stats = gc.sweep(txn); + } + + if (logLevel >= 2) { + LI << "GC: Total nodes: " << stats.total; + LI << "GC: Garbage nodes: " << stats.garbage << " (" << renderPercent((double)stats.garbage / stats.total) << ")"; + } + + if (stats.garbage) { + auto txn = env.txn_rw(); + if (logLevel >= 1) LI << "GC: deleting " << stats.garbage << " garbage nodes"; + gc.deleteNodes(txn); + txn.commit(); + } + +} From 8d0c9952ab7c025583ba93a9330a05cba57949a7 Mon Sep 17 00:00:00 2001 From: Doug Hoyte Date: Sun, 29 Jan 2023 17:31:28 -0500 Subject: [PATCH 11/51] flat maps/sets --- TODO | 4 ---- golpe | 2 +- golpe.yaml | 1 + src/ActiveMonitors.h | 18 +++++++++--------- src/DBScan.h | 4 ++-- src/Decompressor.h | 5 ++--- src/RelayReqWorker.cpp | 4 ++-- src/RelayWebsocket.cpp | 2 +- src/RelayYesstr.cpp | 2 +- src/Subscription.h | 15 +++++++++++++-- src/cmd_dict.cpp | 2 +- src/cmd_stream.cpp | 2 +- src/filters.h | 2 +- src/gc.h | 4 +--- src/global.h | 6 ++++++ 15 files changed, 42 insertions(+), 31 deletions(-) create mode 100644 src/global.h diff --git a/TODO b/TODO index e062d76..844145a 100644 --- a/TODO +++ b/TODO @@ -9,11 +9,7 @@ features * `strfry sync` command always takes at least 1 second due to batching delay. figure out better way to flush bool values in config config for compression - config for TCP keepalive - db versioning - document config options, detailed default config file less verbose default logging - nice new config "units" feature, ie 1d instead of 86400 make it easier for a thread to setup a quadrable env rate limits diff --git a/golpe b/golpe index ea1ea8f..dd543e3 160000 --- a/golpe +++ b/golpe @@ -1 +1 @@ -Subproject commit ea1ea8f5ce1208fef8fe895f68b623369276f8de +Subproject commit dd543e3fef89e976db92b6b4420bdac6fa2e2257 diff --git a/golpe.yaml b/golpe.yaml index c66eadd..c13e4b1 100644 --- a/golpe.yaml +++ b/golpe.yaml @@ -1,6 +1,7 @@ appName: strfry quadrable: true onAppStartup: true +useGlobalH: true flatBuffers: | include "../fbs/nostr-index.fbs"; diff --git a/src/ActiveMonitors.h b/src/ActiveMonitors.h index e192c15..3aa6890 100644 --- a/src/ActiveMonitors.h +++ b/src/ActiveMonitors.h @@ -15,19 +15,19 @@ struct ActiveMonitors : NonCopyable { Monitor(Subscription &sub_) : sub(std::move(sub_)) {} }; - using ConnMonitor = std::map; - std::map conns; // connId -> subId -> Monitor + using ConnMonitor = flat_hash_map; + flat_hash_map conns; // connId -> subId -> Monitor struct MonitorItem { Monitor *mon; uint64_t latestEventId; }; - using MonitorSet = std::map; // FIXME: flat_map here - std::map allIds; - std::map allAuthors; - std::map allTags; - std::map allKinds; + using MonitorSet = flat_hash_map; + btree_map allIds; + btree_map allAuthors; + btree_map allTags; + btree_map allKinds; MonitorSet allOthers; std::string tagSpecBuf = std::string(256, '\0'); @@ -92,7 +92,7 @@ struct ActiveMonitors : NonCopyable { } }; - auto processMonitorsPrefix = [&](std::map &m, const std::string &key, std::function matches){ + auto processMonitorsPrefix = [&](btree_map &m, const std::string &key, std::function matches){ auto it = m.lower_bound(key.substr(0, 1)); if (it == m.end()) return; @@ -103,7 +103,7 @@ struct ActiveMonitors : NonCopyable { } }; - auto processMonitorsExact = [&](std::map &m, const T &key, std::function matches){ + auto processMonitorsExact = [&](btree_map &m, const T &key, std::function matches){ auto it = m.upper_bound(key); if (it == m.begin()) return; diff --git a/src/DBScan.h b/src/DBScan.h index dddc178..b463bd5 100644 --- a/src/DBScan.h +++ b/src/DBScan.h @@ -30,7 +30,7 @@ struct DBScan { }; struct TagScan { - std::map::const_iterator indexTagName; + flat_hash_map::const_iterator indexTagName; size_t indexTagVal = 0; std::string search; }; @@ -295,7 +295,7 @@ struct DBScanQuery : NonCopyable { size_t filterGroupIndex = 0; bool dead = false; - std::unordered_set alreadySentEvents; // FIXME: flat_set here, or roaring bitmap/judy/whatever + flat_hash_set alreadySentEvents; uint64_t currScanTime = 0; uint64_t currScanSaveRestores = 0; diff --git a/src/Decompressor.h b/src/Decompressor.h index 0a7a6ef..a0f9905 100644 --- a/src/Decompressor.h +++ b/src/Decompressor.h @@ -3,7 +3,6 @@ #include #include -#include #include #include "golpe.h" @@ -11,7 +10,7 @@ struct DictionaryBroker { std::mutex mutex; - std::unordered_map dicts; + flat_hash_map dicts; ZSTD_DDict *getDict(lmdb::txn &txn, uint32_t dictId) { std::lock_guard guard(mutex); @@ -34,7 +33,7 @@ extern DictionaryBroker globalDictionaryBroker; struct Decompressor { ZSTD_DCtx *dctx; - std::unordered_map dicts; + flat_hash_map dicts; std::string buffer; Decompressor() { diff --git a/src/RelayReqWorker.cpp b/src/RelayReqWorker.cpp index 5b00d03..30ca2f0 100644 --- a/src/RelayReqWorker.cpp +++ b/src/RelayReqWorker.cpp @@ -5,8 +5,8 @@ struct ActiveQueries : NonCopyable { Decompressor decomp; - using ConnQueries = std::map; - std::map conns; // connId -> subId -> DBScanQuery* + using ConnQueries = flat_hash_map; + flat_hash_map conns; // connId -> subId -> DBScanQuery* std::deque running; void addSub(lmdb::txn &txn, Subscription &&sub) { diff --git a/src/RelayWebsocket.cpp b/src/RelayWebsocket.cpp index 0e856a8..a47c9f4 100644 --- a/src/RelayWebsocket.cpp +++ b/src/RelayWebsocket.cpp @@ -40,7 +40,7 @@ void RelayServer::runWebsocket(ThreadPool::Thread &thr) { uWS::Hub hub; uWS::Group *hubGroup; - std::map connIdToConnection; + flat_hash_map connIdToConnection; uint64_t nextConnectionId = 1; std::string tempBuf; diff --git a/src/RelayYesstr.cpp b/src/RelayYesstr.cpp index a81cf7c..4d5f2c5 100644 --- a/src/RelayYesstr.cpp +++ b/src/RelayYesstr.cpp @@ -20,7 +20,7 @@ void RelayServer::runYesstr(ThreadPool::Thread &thr) { struct SyncStateCollection { RelayServer *server; quadrable::Quadrable *qdb; - std::map> conns; // connId -> reqId -> SyncState + flat_hash_map> conns; // connId -> reqId -> SyncState SyncStateCollection(RelayServer *server_, quadrable::Quadrable *qdb_) : server(server_), qdb(qdb_) {} diff --git a/src/Subscription.h b/src/Subscription.h index 037bf6e..181be44 100644 --- a/src/Subscription.h +++ b/src/Subscription.h @@ -1,5 +1,7 @@ #pragma once +#include + #include "filters.h" @@ -28,10 +30,19 @@ struct SubId { std::string str() const { return std::string(sv()); } + + bool operator==(const SubId &o) const { + return o.sv() == sv(); + } }; -inline bool operator <(const SubId &s1, const SubId &s2) { - return s1.sv() < s2.sv(); +namespace std { + // inject specialization of std::hash + template<> struct hash { + std::size_t operator()(SubId const &p) const { + return phmap::HashState().combine(0, p.sv()); + } + }; } diff --git a/src/cmd_dict.cpp b/src/cmd_dict.cpp index fdf841d..79973b7 100644 --- a/src/cmd_dict.cpp +++ b/src/cmd_dict.cpp @@ -71,7 +71,7 @@ void cmd_dict(const std::vector &subArgs) { auto txn = env.txn_ro(); - std::map dicts; + btree_map dicts; env.foreach_CompressionDictionary(txn, [&](auto &view){ auto dictId = view.primaryKeyId; diff --git a/src/cmd_stream.cpp b/src/cmd_stream.cpp index 9917d10..7435e0b 100644 --- a/src/cmd_stream.cpp +++ b/src/cmd_stream.cpp @@ -31,7 +31,7 @@ void cmd_stream(const std::vector &subArgs) { if (dir != "up" && dir != "down" && dir != "both") throw herr("invalid direction: ", dir, ". Should be one of up/down/both"); - std::unordered_set downloadedIds; + flat_hash_set downloadedIds; WriterPipeline writer; WSConnection ws(url); Decompressor decomp; diff --git a/src/filters.h b/src/filters.h index cdabe44..0ef7ded 100644 --- a/src/filters.h +++ b/src/filters.h @@ -114,7 +114,7 @@ struct NostrFilter { std::optional ids; std::optional authors; std::optional kinds; - std::map tags; + flat_hash_map tags; uint64_t since = 0; uint64_t until = MAX_U64; diff --git a/src/gc.h b/src/gc.h index 334e8bb..6bfab0c 100644 --- a/src/gc.h +++ b/src/gc.h @@ -1,14 +1,12 @@ #pragma once -#include - #include "golpe.h" #include "render.h" inline void quadrableGarbageCollect(quadrable::Quadrable &qdb, int logLevel = 0) { - quadrable::Quadrable::GarbageCollector> gc(qdb); + quadrable::Quadrable::GarbageCollector> gc(qdb); quadrable::Quadrable::GCStats stats; if (logLevel >= 2) LI << "Running garbage collection"; diff --git a/src/global.h b/src/global.h new file mode 100644 index 0000000..a7a4ce7 --- /dev/null +++ b/src/global.h @@ -0,0 +1,6 @@ +#pragma once + +#include +#include + +using namespace phmap; From 4f3a245407de4ea78eebec3ea731017f33d6eeaf Mon Sep 17 00:00:00 2001 From: Doug Hoyte Date: Wed, 1 Feb 2023 09:23:28 -0500 Subject: [PATCH 12/51] config for compression --- TODO | 2 -- golpe.yaml | 9 +++++++++ src/RelayWebsocket.cpp | 9 ++++++++- strfry.conf | 8 ++++++++ 4 files changed, 25 insertions(+), 3 deletions(-) diff --git a/TODO b/TODO index 844145a..e67cc2b 100644 --- a/TODO +++ b/TODO @@ -7,8 +7,6 @@ features * limit on number of concurrent sync requests * full-db scan limited by since/until * `strfry sync` command always takes at least 1 second due to batching delay. figure out better way to flush - bool values in config - config for compression less verbose default logging make it easier for a thread to setup a quadrable env diff --git a/golpe.yaml b/golpe.yaml index c13e4b1..d315211 100644 --- a/golpe.yaml +++ b/golpe.yaml @@ -127,6 +127,15 @@ config: desc: "Maximum records that can be returned per filter" default: 500 + - name: relay__compression__enabled + desc: "Use permessage-deflate compression if supported by client. Reduces bandwidth, but slight increase in CPU" + default: true + noReload: true + - name: relay__compression__slidingWindow + desc: "Maintain a sliding window buffer for each connection. Improves compression, but uses more memory" + default: true + noReload: true + - name: relay__logging__dumpInAll desc: "Dump all incoming messages" default: false diff --git a/src/RelayWebsocket.cpp b/src/RelayWebsocket.cpp index a47c9f4..d286d91 100644 --- a/src/RelayWebsocket.cpp +++ b/src/RelayWebsocket.cpp @@ -69,7 +69,14 @@ void RelayServer::runWebsocket(ThreadPool::Thread &thr) { - hubGroup = hub.createGroup(uWS::PERMESSAGE_DEFLATE | uWS::SLIDING_DEFLATE_WINDOW, cfg().relay__maxWebsocketPayloadSize); + { + int extensionOptions = 0; + + if (cfg().relay__compression__enabled) extensionOptions |= uWS::PERMESSAGE_DEFLATE; + if (cfg().relay__compression__slidingWindow) extensionOptions |= uWS::SLIDING_DEFLATE_WINDOW; + + hubGroup = hub.createGroup(extensionOptions, cfg().relay__maxWebsocketPayloadSize); + } if (cfg().relay__autoPingSeconds) hubGroup->startAutoPing(cfg().relay__autoPingSeconds * 1'000); diff --git a/strfry.conf b/strfry.conf index 8b3620d..6b9f2a2 100644 --- a/strfry.conf +++ b/strfry.conf @@ -41,6 +41,14 @@ relay { # Maximum records that can be returned per filter maxFilterLimit = 500 + compression { + # Use permessage-deflate compression if supported by client. Reduces bandwidth, but slight increase in CPU + enabled = true + + # Maintain a sliding window buffer for each connection. Improves compression, but uses more memory + slidingWindow = true + } + logging { # Dump all incoming messages dumpInAll = false From baba729bc5f81ceaf150cf2cf61df43d920724a1 Mon Sep 17 00:00:00 2001 From: Doug Hoyte Date: Thu, 2 Feb 2023 15:18:18 -0500 Subject: [PATCH 13/51] custom dbParams settings, like maxreaders --- golpe | 2 +- golpe.yaml | 12 +++++++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/golpe b/golpe index dd543e3..92cd3de 160000 --- a/golpe +++ b/golpe @@ -1 +1 @@ -Subproject commit dd543e3fef89e976db92b6b4420bdac6fa2e2257 +Subproject commit 92cd3de18d2e7b464bc5965565a72d64654712b2 diff --git a/golpe.yaml b/golpe.yaml index d315211..9603fdb 100644 --- a/golpe.yaml +++ b/golpe.yaml @@ -2,6 +2,7 @@ appName: strfry quadrable: true onAppStartup: true useGlobalH: true +customLMDBSetup: true flatBuffers: | include "../fbs/nostr-index.fbs"; @@ -83,10 +84,19 @@ tablesRaw: config: - name: db - desc: "Directory that contains strfry database" + desc: "Directory that contains the strfry LMDB database" default: "./strfry-db/" noReload: true + - name: dbParams__maxreaders + desc: "Maximum number of threads/processes that can simultaneously have LMDB transactions open" + default: 256 + noReload: true + - name: dbParams__mapsize + desc: "Size of mmap() to use when loading LMDB (does *not* correspond to disk-space used, default is 10TB)" + default: 10995116277760 + noReload: true + - name: relay__bind desc: "Interface to listen on. Use 0.0.0.0 to listen on all interfaces" default: "127.0.0.1" From 271b1723ac3de9960aa50f403a63df8729687b98 Mon Sep 17 00:00:00 2001 From: Doug Hoyte Date: Thu, 2 Feb 2023 15:42:05 -0500 Subject: [PATCH 14/51] setrlimit nofiles config --- golpe.yaml | 4 ++++ src/onAppStartup.cpp | 25 ++++++++++++++++++++++++- strfry.conf | 17 ++++++++++++++--- 3 files changed, 42 insertions(+), 4 deletions(-) diff --git a/golpe.yaml b/golpe.yaml index 9603fdb..a47e032 100644 --- a/golpe.yaml +++ b/golpe.yaml @@ -105,6 +105,10 @@ config: desc: "Port to open for the nostr websocket protocol" default: 7777 noReload: true + - name: relay__nofiles + desc: "Set OS-limit on maximum number of open files/sockets (if 0, don't attempt to set)" + default: 1000000 + noReload: true - name: relay__info__name desc: "NIP-11: Name of this server. Short/descriptive (< 30 characters)" diff --git a/src/onAppStartup.cpp b/src/onAppStartup.cpp index 60c0cd6..26e810d 100644 --- a/src/onAppStartup.cpp +++ b/src/onAppStartup.cpp @@ -1,8 +1,13 @@ +#include +#include +#include +#include + #include "golpe.h" const size_t CURR_DB_VERSION = 1; -void onAppStartup(lmdb::txn &txn, const std::string &cmd) { +static void dbCheck(lmdb::txn &txn, const std::string &cmd) { auto dbTooOld = [&](uint64_t ver) { LE << "Database version too old: " << ver << ". Expected version " << CURR_DB_VERSION; LE << "You should 'strfry export' your events, delete (or move) the DB files, and 'strfry import' them"; @@ -47,3 +52,21 @@ void onAppStartup(lmdb::txn &txn, const std::string &cmd) { dbTooNew(s->dbVersion()); } } + +static void setRLimits() { + if (!cfg().relay__nofiles) return; + struct rlimit curr; + + if (getrlimit(RLIMIT_NOFILE, &curr)) throw herr("couldn't call getrlimit: ", strerror(errno)); + + if (cfg().relay__nofiles > curr.rlim_max) throw herr("Unable to set NOFILES limit to ", cfg().relay__nofiles, ", exceeds max of ", curr.rlim_max); + + curr.rlim_cur = cfg().relay__nofiles; + + if (setrlimit(RLIMIT_NOFILE, &curr)) throw herr("Failed setting NOFILES limit to ", cfg().relay__nofiles, ": ", strerror(errno)); +} + +void onAppStartup(lmdb::txn &txn, const std::string &cmd) { + dbCheck(txn, cmd); + setRLimits(); +} diff --git a/strfry.conf b/strfry.conf index 6b9f2a2..7308c20 100644 --- a/strfry.conf +++ b/strfry.conf @@ -2,9 +2,17 @@ ## Default strfry config ## -# Directory that contains strfry database (restart required) +# Directory that contains the strfry LMDB database (restart required) db = "./strfry-db/" +dbParams { + # Maximum number of threads/processes that can simultaneously have LMDB transactions open (restart required) + maxreaders = 256 + + # Size of mmap() to use when loading LMDB (does *not* correspond to disk-space used, default is 10TB) (restart required) + mapsize = 10995116277760 +} + relay { # Interface to listen on. Use 0.0.0.0 to listen on all interfaces (restart required) bind = "127.0.0.1" @@ -12,6 +20,9 @@ relay { # Port to open for the nostr websocket protocol (restart required) port = 7777 + # Set OS-limit on maximum number of open files/sockets (if 0, don't attempt to set) (restart required) + nofiles = 1000000 + info { # NIP-11: Name of this server. Short/descriptive (< 30 characters) name = "strfry default" @@ -42,10 +53,10 @@ relay { maxFilterLimit = 500 compression { - # Use permessage-deflate compression if supported by client. Reduces bandwidth, but slight increase in CPU + # Use permessage-deflate compression if supported by client. Reduces bandwidth, but slight increase in CPU (restart required) enabled = true - # Maintain a sliding window buffer for each connection. Improves compression, but uses more memory + # Maintain a sliding window buffer for each connection. Improves compression, but uses more memory (restart required) slidingWindow = true } From 6bcda784a65750e5fd05fd0f19009e383c57ca40 Mon Sep 17 00:00:00 2001 From: Doug Hoyte Date: Thu, 2 Feb 2023 15:43:10 -0500 Subject: [PATCH 15/51] todo --- TODO | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/TODO b/TODO index e67cc2b..0a5af0a 100644 --- a/TODO +++ b/TODO @@ -11,13 +11,19 @@ features make it easier for a thread to setup a quadrable env rate limits + ! event writes per second per ip slow-reader detection and back-pressure max connections per ip (nginx?) max bandwidth up/down (nginx?) - event writes per second per ip max number of concurrent REQs per connection/ip ? limit on total number of events from a DBScan, not just per filter +event sources + +multiple sync connections in one process/config + +relay block-list events + misc periodic reaping of disconnected sockets ? websocket-level pings From ac896400b90fe3fc0b7f0306c9883f27c67f7982 Mon Sep 17 00:00:00 2001 From: Doug Hoyte Date: Sat, 4 Feb 2023 01:32:31 -0500 Subject: [PATCH 16/51] todo --- TODO | 1 + 1 file changed, 1 insertion(+) diff --git a/TODO b/TODO index 0a5af0a..c79606c 100644 --- a/TODO +++ b/TODO @@ -25,5 +25,6 @@ multiple sync connections in one process/config relay block-list events misc + ! when disk is full it should log warning but not crash periodic reaping of disconnected sockets ? websocket-level pings From 1987c5a669eeb545a70bc81eb00604c0a213364d Mon Sep 17 00:00:00 2001 From: Doug Hoyte Date: Sun, 5 Feb 2023 01:50:26 -0500 Subject: [PATCH 17/51] increase tag limits, thanks to Jeremy for the tip --- golpe.yaml | 4 ++-- strfry.conf | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/golpe.yaml b/golpe.yaml index a47e032..8670529 100644 --- a/golpe.yaml +++ b/golpe.yaml @@ -197,7 +197,7 @@ config: default: 300 - name: events__maxNumTags desc: "Maximum number of tags allowed" - default: 250 + default: 2000 - name: events__maxTagValSize desc: "Maximum size for tag values, in bytes" - default: 255 + default: 1024 diff --git a/strfry.conf b/strfry.conf index 7308c20..02cd1dd 100644 --- a/strfry.conf +++ b/strfry.conf @@ -106,8 +106,8 @@ events { ephemeralEventsLifetimeSeconds = 300 # Maximum number of tags allowed - maxNumTags = 250 + maxNumTags = 2000 # Maximum size for tag values, in bytes - maxTagValSize = 255 + maxTagValSize = 1024 } From 93ca4b904433d6b778500a424e5be4788864f71d Mon Sep 17 00:00:00 2001 From: Doug Hoyte Date: Sun, 5 Feb 2023 15:02:36 -0500 Subject: [PATCH 18/51] allow filtering for indexed values > 255 bytes --- README.md | 2 +- src/filters.h | 11 +++++++---- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 17b1b8b..2dc117f 100644 --- a/README.md +++ b/README.md @@ -195,7 +195,7 @@ A `FilterGroup` is a vector of `Filter` objects. When the Ingester receives a `R In order to determine if an event matches against a `Filter`, first the `since` and `until` fields are checked. Then, each field of the event for which a filter item was specified is looked up in the corresponding lookup table. Specifically, the upper-bound index is determined using a binary search (for example `std::upper_bound`). This is the first element greater than the event's item. Then the preceeding table item is checked for either a prefix (`ids`/`authors`) or exact (everything else) match. -Since testing `Filter`s against events is performed so frequently, it is a performance-critical operation and some optimisations have been applied. For example, each filter item in the lookup table is represented by a 4 byte data structure, one of which is the first byte of the field and the rest are offset/size lookups into a single memory allocation containing the remaining bytes. Under typical scenarios, this will greatly reduce the amount of memory that needs to be loaded to process a filter. Filters with 16 or fewer items can often be rejected with the load of a single cache line. Because filters aren't scanned linearly, the number of items in a filter (ie amount of pubkeys) doesn't have a significant impact on processing resources. +Since testing `Filter`s against events is performed so frequently, it is a performance-critical operation and some optimisations have been applied. For example, each filter item in the lookup table is represented by a 8 byte data structure, one of which is the first byte of the field and the rest are offset/size lookups into a single memory allocation containing the remaining bytes. Under typical scenarios, this will greatly reduce the amount of memory that needs to be loaded to process a filter. Filters with 8 or fewer items can often be rejected with the load of a single cache line. Because filters aren't scanned linearly, the number of items in a filter (ie amount of pubkeys) doesn't have a significant impact on processing resources. #### DBScan diff --git a/src/filters.h b/src/filters.h index 0ef7ded..f97dcf5 100644 --- a/src/filters.h +++ b/src/filters.h @@ -7,9 +7,10 @@ struct FilterSetBytes { struct Item { - uint16_t offset; - uint8_t size; + uint32_t offset; + uint16_t size; uint8_t firstByte; + uint8_t padding; }; std::vector items; @@ -18,6 +19,8 @@ struct FilterSetBytes { // Sizes are post-hex decode FilterSetBytes(const tao::json::value &arrHex, bool hexDecode, size_t minSize, size_t maxSize) { + if (maxSize > std::numeric_limits::max()) throw herr("filter maxSize too big"); + std::vector arr; uint64_t totalSize = 0; @@ -34,11 +37,11 @@ struct FilterSetBytes { for (const auto &item : arr) { if (items.size() > 0 && item.starts_with(at(items.size() - 1))) continue; // remove duplicates and redundant prefixes - items.emplace_back(Item{ (uint16_t)buf.size(), (uint8_t)item.size(), (uint8_t)item[0] }); + items.emplace_back(Item{ (uint32_t)buf.size(), (uint16_t)item.size(), (uint8_t)item[0] }); buf += item; } - if (buf.size() > 65535) throw herr("total filter items too large"); + if (buf.size() > 1'000'000) throw herr("total filter items too large"); } std::string at(size_t n) const { From fc6d5eea2e00d6e6f64fc4ef167cc6641ec1bc31 Mon Sep 17 00:00:00 2001 From: Doug Hoyte Date: Sun, 5 Feb 2023 15:06:14 -0500 Subject: [PATCH 19/51] bump golpe for better logging if inotify fails --- golpe | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/golpe b/golpe index 92cd3de..31b657a 160000 --- a/golpe +++ b/golpe @@ -1 +1 @@ -Subproject commit 92cd3de18d2e7b464bc5965565a72d64654712b2 +Subproject commit 31b657a426277a14847e40512b621d6a89b85e51 From 43cdd649560a19ee93e9d827fb935c80855a0715 Mon Sep 17 00:00:00 2001 From: Doug Hoyte Date: Sun, 5 Feb 2023 15:41:59 -0500 Subject: [PATCH 20/51] increase max subscription ID length, needed for hamstr client --- src/Subscription.h | 6 +++--- src/constants.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Subscription.h b/src/Subscription.h index 181be44..4ffe786 100644 --- a/src/Subscription.h +++ b/src/Subscription.h @@ -6,11 +6,11 @@ struct SubId { - char buf[64]; + char buf[72]; SubId(std::string_view val) { - static_assert(MAX_SUBID_SIZE == 63, "MAX_SUBID_SIZE mismatch"); - if (val.size() > 63) throw herr("subscription id too long"); + static_assert(MAX_SUBID_SIZE == 71, "MAX_SUBID_SIZE mismatch"); + if (val.size() > 71) throw herr("subscription id too long"); if (val.size() == 0) throw herr("subscription id too short"); auto badChar = [](char c){ diff --git a/src/constants.h b/src/constants.h index 89ea623..831e58f 100644 --- a/src/constants.h +++ b/src/constants.h @@ -1,4 +1,4 @@ #pragma once -const size_t MAX_SUBID_SIZE = 63; // Statically allocated size in SubId +const size_t MAX_SUBID_SIZE = 71; // Statically allocated size in SubId const uint64_t MAX_TIMESTAMP = 17179869184; // Safety limit to ensure it can fit in quadrable key. Good until year 2514. From 2c86254fb9cb9591529fc53f5031510e27fa2eef Mon Sep 17 00:00:00 2001 From: Doug Hoyte Date: Sun, 5 Feb 2023 15:43:11 -0500 Subject: [PATCH 21/51] limit on max number of concurrent REQs --- golpe.yaml | 3 +++ src/ActiveMonitors.h | 7 ++++++- src/RelayReqMonitor.cpp | 8 ++++++-- src/RelayReqWorker.cpp | 15 +++++++++++++-- strfry.conf | 3 +++ 5 files changed, 31 insertions(+), 5 deletions(-) diff --git a/golpe.yaml b/golpe.yaml index 8670529..2dfa186 100644 --- a/golpe.yaml +++ b/golpe.yaml @@ -140,6 +140,9 @@ config: - name: relay__maxFilterLimit desc: "Maximum records that can be returned per filter" default: 500 + - name: relay__maxSubsPerConnection + desc: "Maximum number of subscriptions (concurrent REQs) a connection can have open at any time" + default: 20 - name: relay__compression__enabled desc: "Use permessage-deflate compression if supported by client. Reduces bandwidth, but slight increase in CPU" diff --git a/src/ActiveMonitors.h b/src/ActiveMonitors.h index 3aa6890..129b917 100644 --- a/src/ActiveMonitors.h +++ b/src/ActiveMonitors.h @@ -40,7 +40,7 @@ struct ActiveMonitors : NonCopyable { public: - void addSub(lmdb::txn &txn, Subscription &&sub, uint64_t currEventId) { + bool addSub(lmdb::txn &txn, Subscription &&sub, uint64_t currEventId) { if (sub.latestEventId != currEventId) throw herr("sub not up to date"); { @@ -51,10 +51,15 @@ struct ActiveMonitors : NonCopyable { auto res = conns.try_emplace(sub.connId); auto &connMonitors = res.first->second; + if (connMonitors.size() >= cfg().relay__maxSubsPerConnection) { + return false; + } + auto subId = sub.subId; auto *m = &connMonitors.try_emplace(subId, sub).first->second; installLookups(m, currEventId); + return true; } void removeSub(uint64_t connId, const SubId &subId) { diff --git a/src/RelayReqMonitor.cpp b/src/RelayReqMonitor.cpp index d9feb80..4fadd70 100644 --- a/src/RelayReqMonitor.cpp +++ b/src/RelayReqMonitor.cpp @@ -28,9 +28,11 @@ void RelayServer::runReqMonitor(ThreadPool::Thread &thr) { for (auto &newMsg : newMsgs) { if (auto msg = std::get_if(&newMsg.msg)) { + auto connId = msg->sub.connId; + env.foreach_Event(txn, [&](auto &ev){ if (msg->sub.filterGroup.doesMatch(ev.flat_nested())) { - sendEvent(msg->sub.connId, msg->sub.subId, getEventJson(txn, decomp, ev.primaryKeyId)); + sendEvent(connId, msg->sub.subId, getEventJson(txn, decomp, ev.primaryKeyId)); } return true; @@ -38,7 +40,9 @@ void RelayServer::runReqMonitor(ThreadPool::Thread &thr) { msg->sub.latestEventId = latestEventId; - monitors.addSub(txn, std::move(msg->sub), latestEventId); + if (!monitors.addSub(txn, std::move(msg->sub), latestEventId)) { + sendNoticeError(connId, std::string("too many concurrent REQs")); + } } else if (auto msg = std::get_if(&newMsg.msg)) { monitors.removeSub(msg->connId, msg->subId); } else if (auto msg = std::get_if(&newMsg.msg)) { diff --git a/src/RelayReqWorker.cpp b/src/RelayReqWorker.cpp index 30ca2f0..dfd4b19 100644 --- a/src/RelayReqWorker.cpp +++ b/src/RelayReqWorker.cpp @@ -9,7 +9,7 @@ struct ActiveQueries : NonCopyable { flat_hash_map conns; // connId -> subId -> DBScanQuery* std::deque running; - void addSub(lmdb::txn &txn, Subscription &&sub) { + bool addSub(lmdb::txn &txn, Subscription &&sub) { sub.latestEventId = getMostRecentLevId(txn); { @@ -20,10 +20,16 @@ struct ActiveQueries : NonCopyable { auto res = conns.try_emplace(sub.connId); auto &connQueries = res.first->second; + if (connQueries.size() >= cfg().relay__maxSubsPerConnection) { + return false; + } + DBScanQuery *q = new DBScanQuery(sub); connQueries.try_emplace(q->sub.subId, q); running.push_front(q); + + return true; } DBScanQuery *findQuery(uint64_t connId, const SubId &subId) { @@ -98,7 +104,12 @@ void RelayServer::runReqWorker(ThreadPool::Thread &thr) { for (auto &newMsg : newMsgs) { if (auto msg = std::get_if(&newMsg.msg)) { - queries.addSub(txn, std::move(msg->sub)); + auto connId = msg->sub.connId; + + if (!queries.addSub(txn, std::move(msg->sub))) { + sendNoticeError(connId, std::string("too many concurrent REQs")); + } + queries.process(this, txn); } else if (auto msg = std::get_if(&newMsg.msg)) { queries.removeSub(msg->connId, msg->subId); diff --git a/strfry.conf b/strfry.conf index 02cd1dd..f41bebc 100644 --- a/strfry.conf +++ b/strfry.conf @@ -52,6 +52,9 @@ relay { # Maximum records that can be returned per filter maxFilterLimit = 500 + # Maximum number of subscriptions (concurrent REQs) a connection can have open at any time + maxSubsPerConnection = 2 + compression { # Use permessage-deflate compression if supported by client. Reduces bandwidth, but slight increase in CPU (restart required) enabled = true From b3109d3e57acb1545c49133d5aa6f43fdb9f08be Mon Sep 17 00:00:00 2001 From: Doug Hoyte Date: Sun, 5 Feb 2023 15:55:31 -0500 Subject: [PATCH 22/51] unused var --- src/filters.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/filters.h b/src/filters.h index f97dcf5..25040f9 100644 --- a/src/filters.h +++ b/src/filters.h @@ -23,14 +23,11 @@ struct FilterSetBytes { std::vector arr; - uint64_t totalSize = 0; - for (const auto &i : arrHex.get_array()) { arr.emplace_back(hexDecode ? from_hex(i.get_string(), false) : i.get_string()); size_t itemSize = arr.back().size(); if (itemSize < minSize) throw herr("filter item too small"); if (itemSize > maxSize) throw herr("filter item too large"); - totalSize += itemSize; } std::sort(arr.begin(), arr.end()); From 89540bc4c708b62849e06176533c1808c3d68fd5 Mon Sep 17 00:00:00 2001 From: Doug Hoyte Date: Sun, 5 Feb 2023 16:49:29 -0500 Subject: [PATCH 23/51] todo --- TODO | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/TODO b/TODO index c79606c..fa04ef1 100644 --- a/TODO +++ b/TODO @@ -1,3 +1,12 @@ +0.1 release + event sources + rate limits + NIP-40 expiration + NIP-33 param replaceable events + fix sync + when disk is full it should log warning but not crash + ensure DB upgrade flow works + features finish syncing * logging of bytes up/down @@ -9,22 +18,17 @@ features * `strfry sync` command always takes at least 1 second due to batching delay. figure out better way to flush less verbose default logging make it easier for a thread to setup a quadrable env + multiple sync connections in one process/config + relay block-list events + NIP-42 AUTH + procmail-like API for event filtering rate limits ! event writes per second per ip slow-reader detection and back-pressure max connections per ip (nginx?) max bandwidth up/down (nginx?) - max number of concurrent REQs per connection/ip ? limit on total number of events from a DBScan, not just per filter -event sources - -multiple sync connections in one process/config - -relay block-list events - misc - ! when disk is full it should log warning but not crash - periodic reaping of disconnected sockets - ? websocket-level pings + ? periodic reaping of disconnected sockets (maybe autoping is doing this already) From 376d7cbf1fa739885bc07a5cee7ff93dc9588dbb Mon Sep 17 00:00:00 2001 From: Doug Hoyte Date: Tue, 7 Feb 2023 08:08:29 -0500 Subject: [PATCH 24/51] track sources of events --- golpe | 2 +- golpe.yaml | 3 +++ src/RelayIngester.cpp | 6 +++--- src/RelayServer.h | 4 +++- src/RelayWebsocket.cpp | 2 +- src/RelayWriter.cpp | 3 ++- src/WriterPipeline.h | 19 +++++++++++++------ src/cmd_import.cpp | 2 +- src/cmd_stream.cpp | 2 +- src/cmd_sync.cpp | 2 +- src/events.cpp | 2 +- src/events.h | 13 ++++++++++++- 12 files changed, 42 insertions(+), 18 deletions(-) diff --git a/golpe b/golpe index 31b657a..c95388c 160000 --- a/golpe +++ b/golpe @@ -1 +1 @@ -Subproject commit 31b657a426277a14847e40512b621d6a89b85e51 +Subproject commit c95388c4619fffad4ff6632ccafc2780a6d32663 diff --git a/golpe.yaml b/golpe.yaml index 2dfa186..013acf6 100644 --- a/golpe.yaml +++ b/golpe.yaml @@ -27,6 +27,9 @@ tables: - name: flat type: ubytes nestedFlat: NostrIndex.Event + - name: sourceType + - name: sourceInfo + type: ubytes indices: created_at: diff --git a/src/RelayIngester.cpp b/src/RelayIngester.cpp index f02ca80..dc8139a 100644 --- a/src/RelayIngester.cpp +++ b/src/RelayIngester.cpp @@ -29,7 +29,7 @@ void RelayServer::runIngester(ThreadPool::Thread &thr) { if (cfg().relay__logging__dumpInEvents) LI << "[" << msg->connId << "] dumpInEvent: " << msg->payload; try { - ingesterProcessEvent(txn, msg->connId, secpCtx, arr[1], writerMsgs); + ingesterProcessEvent(txn, msg->connId, msg->ipAddr, secpCtx, arr[1], writerMsgs); } catch (std::exception &e) { sendOKResponse(msg->connId, arr[1].at("id").get_string(), false, std::string("invalid: ") + e.what()); LI << "Rejected invalid event: " << e.what(); @@ -82,7 +82,7 @@ void RelayServer::runIngester(ThreadPool::Thread &thr) { } } -void RelayServer::ingesterProcessEvent(lmdb::txn &txn, uint64_t connId, secp256k1_context *secpCtx, const tao::json::value &origJson, std::vector &output) { +void RelayServer::ingesterProcessEvent(lmdb::txn &txn, uint64_t connId, std::string ipAddr, secp256k1_context *secpCtx, const tao::json::value &origJson, std::vector &output) { std::string flatStr, jsonStr; parseAndVerifyEvent(origJson, secpCtx, true, true, flatStr, jsonStr); @@ -98,7 +98,7 @@ void RelayServer::ingesterProcessEvent(lmdb::txn &txn, uint64_t connId, secp256k } } - output.emplace_back(MsgWriter{MsgWriter::AddEvent{connId, hoytech::curr_time_us(), std::move(flatStr), std::move(jsonStr)}}); + output.emplace_back(MsgWriter{MsgWriter::AddEvent{connId, std::move(ipAddr), hoytech::curr_time_us(), std::move(flatStr), std::move(jsonStr)}}); } void RelayServer::ingesterProcessReq(lmdb::txn &txn, uint64_t connId, const tao::json::value &arr) { diff --git a/src/RelayServer.h b/src/RelayServer.h index d044e74..4585dd0 100644 --- a/src/RelayServer.h +++ b/src/RelayServer.h @@ -47,6 +47,7 @@ struct MsgWebsocket : NonCopyable { struct MsgIngester : NonCopyable { struct ClientMessage { uint64_t connId; + std::string ipAddr; std::string payload; }; @@ -62,6 +63,7 @@ struct MsgIngester : NonCopyable { struct MsgWriter : NonCopyable { struct AddEvent { uint64_t connId; + std::string ipAddr; uint64_t receivedAt; std::string flatStr; std::string jsonStr; @@ -147,7 +149,7 @@ struct RelayServer { void runWebsocket(ThreadPool::Thread &thr); void runIngester(ThreadPool::Thread &thr); - void ingesterProcessEvent(lmdb::txn &txn, uint64_t connId, secp256k1_context *secpCtx, const tao::json::value &origJson, std::vector &output); + void ingesterProcessEvent(lmdb::txn &txn, uint64_t connId, std::string ipAddr, secp256k1_context *secpCtx, const tao::json::value &origJson, std::vector &output); void ingesterProcessReq(lmdb::txn &txn, uint64_t connId, const tao::json::value &origJson); void ingesterProcessClose(lmdb::txn &txn, uint64_t connId, const tao::json::value &origJson); diff --git a/src/RelayWebsocket.cpp b/src/RelayWebsocket.cpp index d286d91..b6dc1d3 100644 --- a/src/RelayWebsocket.cpp +++ b/src/RelayWebsocket.cpp @@ -139,7 +139,7 @@ void RelayServer::runWebsocket(ThreadPool::Thread &thr) { c.stats.bytesDown += length; c.stats.bytesDownCompressed += compressedSize; - tpIngester.dispatch(c.connId, MsgIngester{MsgIngester::ClientMessage{c.connId, std::string(message, length)}}); + tpIngester.dispatch(c.connId, MsgIngester{MsgIngester::ClientMessage{c.connId, std::string(message, length), ws->getAddressBytes()}}); }); diff --git a/src/RelayWriter.cpp b/src/RelayWriter.cpp index 10c60f5..7810a69 100644 --- a/src/RelayWriter.cpp +++ b/src/RelayWriter.cpp @@ -18,7 +18,8 @@ void RelayServer::runWriter(ThreadPool::Thread &thr) { for (auto &newMsg : newMsgs) { if (auto msg = std::get_if(&newMsg.msg)) { - newEvents.emplace_back(std::move(msg->flatStr), std::move(msg->jsonStr), msg->receivedAt, msg); + EventSourceType sourceType = msg->ipAddr.size() == 4 ? EventSourceType::IP4 : EventSourceType::IP6; + newEvents.emplace_back(std::move(msg->flatStr), std::move(msg->jsonStr), msg->receivedAt, sourceType, std::move(msg->ipAddr), msg); } } diff --git a/src/WriterPipeline.h b/src/WriterPipeline.h index de82c7f..6eb4011 100644 --- a/src/WriterPipeline.h +++ b/src/WriterPipeline.h @@ -7,9 +7,16 @@ #include "events.h" +struct WriterPipelineInput { + tao::json::value eventJson; + EventSourceType sourceType; + std::string sourceInfo; +}; + + struct WriterPipeline { public: - hoytech::protected_queue inbox; + hoytech::protected_queue inbox; hoytech::protected_queue flushInbox; private: @@ -28,7 +35,7 @@ struct WriterPipeline { auto msgs = inbox.pop_all(); for (auto &m : msgs) { - if (m.is_null()) { + if (m.eventJson.is_null()) { writerInbox.push_move({}); break; } @@ -37,13 +44,13 @@ struct WriterPipeline { std::string jsonStr; try { - parseAndVerifyEvent(m, secpCtx, true, true, flatStr, jsonStr); + parseAndVerifyEvent(m.eventJson, secpCtx, true, true, flatStr, jsonStr); } catch (std::exception &e) { - LW << "Rejected event: " << m << " reason: " << e.what(); + LW << "Rejected event: " << m.eventJson << " reason: " << e.what(); continue; } - writerInbox.push_move({ std::move(flatStr), std::move(jsonStr), hoytech::curr_time_us() }); + writerInbox.push_move({ std::move(flatStr), std::move(jsonStr), hoytech::curr_time_us(), m.sourceType, std::move(m.sourceInfo) }); } } }); @@ -120,7 +127,7 @@ struct WriterPipeline { } void flush() { - inbox.push_move(tao::json::null); + inbox.push_move({ tao::json::null, EventSourceType::None, "" }); flushInbox.wait(); } }; diff --git a/src/cmd_import.cpp b/src/cmd_import.cpp index 0100f01..cb0f1df 100644 --- a/src/cmd_import.cpp +++ b/src/cmd_import.cpp @@ -86,7 +86,7 @@ void cmd_import(const std::vector &subArgs) { continue; } - newEvents.emplace_back(std::move(flatStr), std::move(jsonStr), hoytech::curr_time_us()); + newEvents.emplace_back(std::move(flatStr), std::move(jsonStr), hoytech::curr_time_us(), EventSourceType::Import, ""); if (newEvents.size() >= 10'000) flushChanges(); } diff --git a/src/cmd_stream.cpp b/src/cmd_stream.cpp index 7435e0b..96799d7 100644 --- a/src/cmd_stream.cpp +++ b/src/cmd_stream.cpp @@ -64,7 +64,7 @@ void cmd_stream(const std::vector &subArgs) { if (origJson.get_array().size() < 3) throw herr("array too short"); auto &evJson = origJson.at(2); downloadedIds.emplace(from_hex(evJson.at("id").get_string())); - writer.inbox.push_move(std::move(evJson)); + writer.inbox.push_move({ std::move(evJson), EventSourceType::Stream, url }); } else { LW << "Unexpected EVENT"; } diff --git a/src/cmd_sync.cpp b/src/cmd_sync.cpp index 0bc3e85..25b3d4e 100644 --- a/src/cmd_sync.cpp +++ b/src/cmd_sync.cpp @@ -228,7 +228,7 @@ void cmd_sync(const std::vector &subArgs) { controller->finish(txn, [&](std::string_view newLeaf){ // FIXME: relay could crash client here by sending invalid JSON - writer.inbox.push_move(tao::json::from_string(std::string(newLeaf))); + writer.inbox.push_move(WriterPipelineInput{ tao::json::from_string(std::string(newLeaf)), EventSourceType::Sync, url }); }, [&](std::string_view){ } diff --git a/src/events.cpp b/src/events.cpp index e66d9b1..758c61c 100644 --- a/src/events.cpp +++ b/src/events.cpp @@ -276,7 +276,7 @@ void writeEvents(lmdb::txn &txn, quadrable::Quadrable &qdb, std::vector(flatStr.data()); quadKey = flatEventToQuadrableKey(flat); } From 67d11ced309ec854cdb974e5c66648ab9d7a2e61 Mon Sep 17 00:00:00 2001 From: Doug Hoyte Date: Tue, 7 Feb 2023 08:19:13 -0500 Subject: [PATCH 25/51] output DB version in info command --- src/cmd_info.cpp | 13 +++++++++++++ src/constants.h | 1 + src/onAppStartup.cpp | 3 ++- 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/src/cmd_info.cpp b/src/cmd_info.cpp index 835e22b..df9b9cd 100644 --- a/src/cmd_info.cpp +++ b/src/cmd_info.cpp @@ -23,5 +23,18 @@ void cmd_info(const std::vector &subArgs) { auto txn = env.txn_ro(); + uint64_t dbVersion; + + { + auto s = env.lookup_Meta(txn, 1); + + if (s) { + dbVersion = s->dbVersion(); + } else { + dbVersion = 0; + } + } + + std::cout << "DB version: " << dbVersion << "\n"; std::cout << "merkle root: " << to_hex(qdb.root(txn)) << "\n"; } diff --git a/src/constants.h b/src/constants.h index 831e58f..6cd8bca 100644 --- a/src/constants.h +++ b/src/constants.h @@ -1,4 +1,5 @@ #pragma once +const uint64_t CURR_DB_VERSION = 1; const size_t MAX_SUBID_SIZE = 71; // Statically allocated size in SubId const uint64_t MAX_TIMESTAMP = 17179869184; // Safety limit to ensure it can fit in quadrable key. Good until year 2514. diff --git a/src/onAppStartup.cpp b/src/onAppStartup.cpp index 26e810d..a5cc4f6 100644 --- a/src/onAppStartup.cpp +++ b/src/onAppStartup.cpp @@ -5,7 +5,8 @@ #include "golpe.h" -const size_t CURR_DB_VERSION = 1; +#include "constants.h" + static void dbCheck(lmdb::txn &txn, const std::string &cmd) { auto dbTooOld = [&](uint64_t ver) { From 67331a6e6f8aa740fc1f46f6749518ac5b580c02 Mon Sep 17 00:00:00 2001 From: Doug Hoyte Date: Tue, 7 Feb 2023 11:22:52 -0500 Subject: [PATCH 26/51] simplify quadrable instance creation --- golpe | 2 +- golpe.yaml | 1 - src/RelayCron.cpp | 13 +++---------- src/RelayWriter.cpp | 7 +------ src/RelayYesstr.cpp | 7 +------ src/WriterPipeline.h | 7 +------ src/cmd_compact.cpp | 7 +------ src/cmd_import.cpp | 7 +------ src/cmd_info.cpp | 7 +------ src/cmd_sync.cpp | 10 +--------- src/global.h | 6 ++++++ src/onAppStartup.cpp | 17 +++++++++++++++++ 12 files changed, 34 insertions(+), 57 deletions(-) diff --git a/golpe b/golpe index c95388c..e938a71 160000 --- a/golpe +++ b/golpe @@ -1 +1 @@ -Subproject commit c95388c4619fffad4ff6632ccafc2780a6d32663 +Subproject commit e938a71c0d5bda1bf89594d3f745056af70ff7ec diff --git a/golpe.yaml b/golpe.yaml index 013acf6..e1c1262 100644 --- a/golpe.yaml +++ b/golpe.yaml @@ -1,5 +1,4 @@ appName: strfry -quadrable: true onAppStartup: true useGlobalH: true customLMDBSetup: true diff --git a/src/RelayCron.cpp b/src/RelayCron.cpp index 5d41802..7e19d81 100644 --- a/src/RelayCron.cpp +++ b/src/RelayCron.cpp @@ -42,11 +42,9 @@ void RelayServer::cleanupOldEvents() { } if (expiredLevIds.size() > 0) { - auto txn = env.txn_rw(); + auto qdb = getQdbInstance(); - quadrable::Quadrable qdb; - qdb.init(txn); - qdb.checkout("events"); + auto txn = env.txn_rw(); uint64_t numDeleted = 0; auto changes = qdb.change(); @@ -67,12 +65,7 @@ void RelayServer::cleanupOldEvents() { } void RelayServer::garbageCollect() { - quadrable::Quadrable qdb; - { - auto txn = env.txn_ro(); - qdb.init(txn); - } - qdb.checkout("events"); + auto qdb = getQdbInstance(); quadrableGarbageCollect(qdb, 1); } diff --git a/src/RelayWriter.cpp b/src/RelayWriter.cpp index 7810a69..94a1413 100644 --- a/src/RelayWriter.cpp +++ b/src/RelayWriter.cpp @@ -2,12 +2,7 @@ void RelayServer::runWriter(ThreadPool::Thread &thr) { - quadrable::Quadrable qdb; - { - auto txn = env.txn_ro(); - qdb.init(txn); - } - qdb.checkout("events"); + auto qdb = getQdbInstance(); while(1) { auto newMsgs = thr.inbox.pop_all(); diff --git a/src/RelayYesstr.cpp b/src/RelayYesstr.cpp index 4d5f2c5..fef3eb8 100644 --- a/src/RelayYesstr.cpp +++ b/src/RelayYesstr.cpp @@ -6,12 +6,7 @@ void RelayServer::runYesstr(ThreadPool::Thread &thr) { - quadrable::Quadrable qdb; - { - auto txn = env.txn_ro(); - qdb.init(txn); - } - + auto qdb = getQdbInstance(); struct SyncState { quadrable::MemStore m; diff --git a/src/WriterPipeline.h b/src/WriterPipeline.h index 6eb4011..0fd041f 100644 --- a/src/WriterPipeline.h +++ b/src/WriterPipeline.h @@ -58,12 +58,7 @@ struct WriterPipeline { writerThread = std::thread([&]() { setThreadName("Writer"); - quadrable::Quadrable qdb; - { - auto txn = env.txn_ro(); - qdb.init(txn); - } - qdb.checkout("events"); + auto qdb = getQdbInstance(); while (1) { // Debounce diff --git a/src/cmd_compact.cpp b/src/cmd_compact.cpp index 2f994fd..7d009d9 100644 --- a/src/cmd_compact.cpp +++ b/src/cmd_compact.cpp @@ -32,12 +32,7 @@ void cmd_compact(const std::vector &subArgs) { env.copy_fd(::fileno(f)); } } else if (args["quad-gc"].asBool()) { - quadrable::Quadrable qdb; - { - auto txn = env.txn_ro(); - qdb.init(txn); - } - qdb.checkout("events"); + auto qdb = getQdbInstance(); quadrableGarbageCollect(qdb, 2); } diff --git a/src/cmd_import.cpp b/src/cmd_import.cpp index cb0f1df..f9ba6f9 100644 --- a/src/cmd_import.cpp +++ b/src/cmd_import.cpp @@ -23,12 +23,7 @@ void cmd_import(const std::vector &subArgs) { if (noVerify) LW << "not verifying event IDs or signatures!"; - quadrable::Quadrable qdb; - { - auto txn = env.txn_ro(); - qdb.init(txn); - } - qdb.checkout("events"); + auto qdb = getQdbInstance(); auto txn = env.txn_rw(); diff --git a/src/cmd_info.cpp b/src/cmd_info.cpp index df9b9cd..dfcdc71 100644 --- a/src/cmd_info.cpp +++ b/src/cmd_info.cpp @@ -14,12 +14,7 @@ R"( void cmd_info(const std::vector &subArgs) { std::map args = docopt::docopt(USAGE, subArgs, true, ""); - quadrable::Quadrable qdb; - { - auto txn = env.txn_ro(); - qdb.init(txn); - } - qdb.checkout("events"); + auto qdb = getQdbInstance(); auto txn = env.txn_ro(); diff --git a/src/cmd_sync.cpp b/src/cmd_sync.cpp index 25b3d4e..cf1dc71 100644 --- a/src/cmd_sync.cpp +++ b/src/cmd_sync.cpp @@ -133,20 +133,12 @@ void cmd_sync(const std::vector &subArgs) { std::unique_ptr controller; WriterPipeline writer; WSConnection ws(url); - - quadrable::Quadrable qdb; - { - auto txn = env.txn_ro(); - qdb.init(txn); - } - qdb.checkout("events"); - + auto qdb = getQdbInstance(); ws.reconnect = false; - if (filterStr.size()) { std::vector levIds; diff --git a/src/global.h b/src/global.h index a7a4ce7..222d363 100644 --- a/src/global.h +++ b/src/global.h @@ -4,3 +4,9 @@ #include using namespace phmap; + + +#include + +quadrable::Quadrable getQdbInstance(lmdb::txn &txn); +quadrable::Quadrable getQdbInstance(); diff --git a/src/onAppStartup.cpp b/src/onAppStartup.cpp index a5cc4f6..6127a1f 100644 --- a/src/onAppStartup.cpp +++ b/src/onAppStartup.cpp @@ -67,7 +67,24 @@ static void setRLimits() { if (setrlimit(RLIMIT_NOFILE, &curr)) throw herr("Failed setting NOFILES limit to ", cfg().relay__nofiles, ": ", strerror(errno)); } + + +quadrable::Quadrable getQdbInstance(lmdb::txn &txn) { + quadrable::Quadrable qdb; + qdb.init(txn); + qdb.checkout("events"); + return qdb; +} + +quadrable::Quadrable getQdbInstance() { + auto txn = env.txn_ro(); + return getQdbInstance(txn); +} + void onAppStartup(lmdb::txn &txn, const std::string &cmd) { dbCheck(txn, cmd); + setRLimits(); + + (void)getQdbInstance(txn); } From 79dcceaee00d4b140a8e51dd0d43810887dc2833 Mon Sep 17 00:00:00 2001 From: Doug Hoyte Date: Tue, 7 Feb 2023 11:58:17 -0500 Subject: [PATCH 27/51] nice rendering of IPs --- src/RelayWebsocket.cpp | 17 ++++++++--------- src/global.h | 3 +++ src/misc.cpp | 17 +++++++++++++++++ 3 files changed, 28 insertions(+), 9 deletions(-) create mode 100644 src/misc.cpp diff --git a/src/RelayWebsocket.cpp b/src/RelayWebsocket.cpp index b6dc1d3..6503b14 100644 --- a/src/RelayWebsocket.cpp +++ b/src/RelayWebsocket.cpp @@ -92,21 +92,20 @@ void RelayServer::runWebsocket(ThreadPool::Thread &thr) { }); hubGroup->onConnection([&](uWS::WebSocket *ws, uWS::HttpRequest req) { - std::string addr = ws->getAddress().address; uint64_t connId = nextConnectionId++; + Connection *c = new Connection(ws, connId); + c->ipAddr = ws->getAddressBytes(); + ws->setUserData((void*)c); + connIdToConnection.emplace(connId, c); + bool compEnabled, compSlidingWindow; ws->getCompressionState(compEnabled, compSlidingWindow); - LI << "[" << connId << "] Connect from " << addr + LI << "[" << connId << "] Connect from " << renderIP(c->ipAddr) << " compression=" << (compEnabled ? 'Y' : 'N') << " sliding=" << (compSlidingWindow ? 'Y' : 'N') ; - Connection *c = new Connection(ws, connId); - c->ipAddr = addr; - ws->setUserData((void*)c); - connIdToConnection.emplace(connId, c); - if (cfg().relay__enableTcpKeepalive) { int optval = 1; if (setsockopt(ws->getFd(), SOL_SOCKET, SO_KEEPALIVE, &optval, sizeof(optval))) { @@ -122,7 +121,7 @@ void RelayServer::runWebsocket(ThreadPool::Thread &thr) { auto upComp = renderPercent(1.0 - (double)c->stats.bytesUpCompressed / c->stats.bytesUp); auto downComp = renderPercent(1.0 - (double)c->stats.bytesDownCompressed / c->stats.bytesDown); - LI << "[" << connId << "] Disconnect from " << c->ipAddr + LI << "[" << connId << "] Disconnect from " << renderIP(c->ipAddr) << " UP: " << renderSize(c->stats.bytesUp) << " (" << upComp << " compressed)" << " DN: " << renderSize(c->stats.bytesDown) << " (" << downComp << " compressed)" ; @@ -139,7 +138,7 @@ void RelayServer::runWebsocket(ThreadPool::Thread &thr) { c.stats.bytesDown += length; c.stats.bytesDownCompressed += compressedSize; - tpIngester.dispatch(c.connId, MsgIngester{MsgIngester::ClientMessage{c.connId, std::string(message, length), ws->getAddressBytes()}}); + tpIngester.dispatch(c.connId, MsgIngester{MsgIngester::ClientMessage{c.connId, c.ipAddr, std::string(message, length)}}); }); diff --git a/src/global.h b/src/global.h index 222d363..da202a5 100644 --- a/src/global.h +++ b/src/global.h @@ -10,3 +10,6 @@ using namespace phmap; quadrable::Quadrable getQdbInstance(lmdb::txn &txn); quadrable::Quadrable getQdbInstance(); + + +std::string renderIP(std::string_view ipBytes); diff --git a/src/misc.cpp b/src/misc.cpp new file mode 100644 index 0000000..90267a8 --- /dev/null +++ b/src/misc.cpp @@ -0,0 +1,17 @@ +#include + +#include "golpe.h" + +std::string renderIP(std::string_view ipBytes) { + char buf[128]; + + if (ipBytes.size() == 4) { + inet_ntop(AF_INET, ipBytes.data(), buf, sizeof(buf)); + } else if (ipBytes.size() == 16) { + inet_ntop(AF_INET6, ipBytes.data(), buf, sizeof(buf)); + } else { + throw herr("invalid size of ipBytes, unable to render IP"); + } + + return std::string(buf); +} From 1d2295bd7dc8ccff4111e9383d4e33118ad899fd Mon Sep 17 00:00:00 2001 From: Doug Hoyte Date: Tue, 7 Feb 2023 14:00:21 -0500 Subject: [PATCH 28/51] write policy wip --- TODO | 1 + golpe | 2 +- golpe.yaml | 6 ++- src/PluginWritePolicy.h | 117 ++++++++++++++++++++++++++++++++++++++++ src/RelayWriter.cpp | 5 ++ strfry.conf | 8 ++- 6 files changed, 135 insertions(+), 4 deletions(-) create mode 100644 src/PluginWritePolicy.h diff --git a/TODO b/TODO index fa04ef1..7241902 100644 --- a/TODO +++ b/TODO @@ -6,6 +6,7 @@ fix sync when disk is full it should log warning but not crash ensure DB upgrade flow works + ? why isn't the LMDB mapping CLOEXEC features finish syncing diff --git a/golpe b/golpe index e938a71..620e823 160000 --- a/golpe +++ b/golpe @@ -1 +1 @@ -Subproject commit e938a71c0d5bda1bf89594d3f745056af70ff7ec +Subproject commit 620e8233da82fb853d9a63797a7ca0ae95bddc8e diff --git a/golpe.yaml b/golpe.yaml index e1c1262..0637677 100644 --- a/golpe.yaml +++ b/golpe.yaml @@ -95,7 +95,7 @@ config: default: 256 noReload: true - name: dbParams__mapsize - desc: "Size of mmap() to use when loading LMDB (does *not* correspond to disk-space used, default is 10TB)" + desc: "Size of mmap() to use when loading LMDB (default is 10TB, does *not* correspond to disk-space used)" default: 10995116277760 noReload: true @@ -146,6 +146,10 @@ config: desc: "Maximum number of subscriptions (concurrent REQs) a connection can have open at any time" default: 20 + - name: relay__plugins__writePolicyPath + desc: "" + default: "" + - name: relay__compression__enabled desc: "Use permessage-deflate compression if supported by client. Reduces bandwidth, but slight increase in CPU" default: true diff --git a/src/PluginWritePolicy.h b/src/PluginWritePolicy.h new file mode 100644 index 0000000..11d3290 --- /dev/null +++ b/src/PluginWritePolicy.h @@ -0,0 +1,117 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "golpe.h" + + +struct PluginWritePolicy { + struct Pipe : NonCopyable { + int fds[2] = { -1, -1 }; + + Pipe() { + if (::pipe(fds)) throw herr("pipe failed: ", strerror(errno)); + } + + Pipe(int fd0, int fd1) { + fds[0] = fd0; + fds[1] = fd1; + } + + ~Pipe() { + if (fds[0] != -1) ::close(fds[0]); + if (fds[1] != -1) ::close(fds[1]); + } + + int saveFd(int offset) { + int fd = fds[offset]; + fds[offset] = -1; + return fd; + } + }; + + struct RunningPlugin { + pid_t pid; + std::string currPluginPath; + FILE *r; + FILE *w; + + RunningPlugin(pid_t pid, int rfd, int wfd, std::string currPluginPath) : pid(pid), currPluginPath(currPluginPath) { + r = fdopen(rfd, "r"); + w = fdopen(wfd, "w"); + setlinebuf(w); + } + + ~RunningPlugin() { + fclose(r); + fclose(w); + waitpid(pid, nullptr, 0); + } + }; + + std::unique_ptr running; + + bool acceptEvent(std::string_view jsonStr, uint64_t receivedAt, EventSourceType sourceType, std::string_view sourceInfo) { + if (cfg().relay__plugins__writePolicyPath.size() == 0) return true; + + if (!running) { + try { + setupPlugin(); + } catch (std::exception &e) { + LE << "Couldn't setup PluginWritePolicy: " << e.what(); + return false; + } + } + + std::string output; + output += jsonStr; + output += "\n"; + + ::fwrite(output.data(), output.size(), 1, running->w); + + { + char buf[4096]; + fgets(buf, sizeof(buf), running->r); + auto j = tao::json::from_string(buf); + LI << "QQQ " << j; + } + + return true; + } + + + void setupPlugin() { + auto path = cfg().relay__plugins__writePolicyPath; + + Pipe outPipe; + Pipe inPipe; + + pid_t pid; + char *argv[] = { nullptr, }; + + posix_spawn_file_actions_t file_actions; + + if ( + posix_spawn_file_actions_init(&file_actions) || + posix_spawn_file_actions_adddup2(&file_actions, outPipe.fds[0], 0) || + posix_spawn_file_actions_adddup2(&file_actions, inPipe.fds[1], 1) || + posix_spawn_file_actions_addclose(&file_actions, outPipe.fds[0]) || + posix_spawn_file_actions_addclose(&file_actions, outPipe.fds[1]) || + posix_spawn_file_actions_addclose(&file_actions, inPipe.fds[0]) || + posix_spawn_file_actions_addclose(&file_actions, inPipe.fds[1]) + ) throw herr("posix_span_file_actions failed: ", strerror(errno)); + + auto ret = posix_spawn(&pid, path.c_str(), &file_actions, nullptr, argv, nullptr); + if (ret) throw herr("posix_spawn failed when to invoke '", path, "': ", strerror(errno)); + + running = make_unique(pid, inPipe.saveFd(0), outPipe.saveFd(1), path); + } +}; diff --git a/src/RelayWriter.cpp b/src/RelayWriter.cpp index 94a1413..4bbc77b 100644 --- a/src/RelayWriter.cpp +++ b/src/RelayWriter.cpp @@ -1,9 +1,13 @@ #include "RelayServer.h" +#include "PluginWritePolicy.h" + void RelayServer::runWriter(ThreadPool::Thread &thr) { auto qdb = getQdbInstance(); + PluginWritePolicy writePolicy; + while(1) { auto newMsgs = thr.inbox.pop_all(); @@ -14,6 +18,7 @@ void RelayServer::runWriter(ThreadPool::Thread &thr) { for (auto &newMsg : newMsgs) { if (auto msg = std::get_if(&newMsg.msg)) { EventSourceType sourceType = msg->ipAddr.size() == 4 ? EventSourceType::IP4 : EventSourceType::IP6; + if (!writePolicy.acceptEvent(msg->jsonStr, msg->receivedAt, sourceType, msg->ipAddr)) continue; newEvents.emplace_back(std::move(msg->flatStr), std::move(msg->jsonStr), msg->receivedAt, sourceType, std::move(msg->ipAddr), msg); } } diff --git a/strfry.conf b/strfry.conf index f41bebc..9a06608 100644 --- a/strfry.conf +++ b/strfry.conf @@ -9,7 +9,7 @@ dbParams { # Maximum number of threads/processes that can simultaneously have LMDB transactions open (restart required) maxreaders = 256 - # Size of mmap() to use when loading LMDB (does *not* correspond to disk-space used, default is 10TB) (restart required) + # Size of mmap() to use when loading LMDB (default is 10TB, does *not* correspond to disk-space used) (restart required) mapsize = 10995116277760 } @@ -53,7 +53,11 @@ relay { maxFilterLimit = 500 # Maximum number of subscriptions (concurrent REQs) a connection can have open at any time - maxSubsPerConnection = 2 + maxSubsPerConnection = 20 + + plugins { + writePolicyPath = "./test.pl" + } compression { # Use permessage-deflate compression if supported by client. Reduces bandwidth, but slight increase in CPU (restart required) From d6df5c65b1330891d61427303b2267ca7d83cc1b Mon Sep 17 00:00:00 2001 From: Doug Hoyte Date: Wed, 8 Feb 2023 05:46:40 -0500 Subject: [PATCH 29/51] Revert "allow filtering for indexed values > 255 bytes" This reverts commit 93ca4b904433d6b778500a424e5be4788864f71d. --- README.md | 2 +- src/filters.h | 11 ++++------- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 2dc117f..17b1b8b 100644 --- a/README.md +++ b/README.md @@ -195,7 +195,7 @@ A `FilterGroup` is a vector of `Filter` objects. When the Ingester receives a `R In order to determine if an event matches against a `Filter`, first the `since` and `until` fields are checked. Then, each field of the event for which a filter item was specified is looked up in the corresponding lookup table. Specifically, the upper-bound index is determined using a binary search (for example `std::upper_bound`). This is the first element greater than the event's item. Then the preceeding table item is checked for either a prefix (`ids`/`authors`) or exact (everything else) match. -Since testing `Filter`s against events is performed so frequently, it is a performance-critical operation and some optimisations have been applied. For example, each filter item in the lookup table is represented by a 8 byte data structure, one of which is the first byte of the field and the rest are offset/size lookups into a single memory allocation containing the remaining bytes. Under typical scenarios, this will greatly reduce the amount of memory that needs to be loaded to process a filter. Filters with 8 or fewer items can often be rejected with the load of a single cache line. Because filters aren't scanned linearly, the number of items in a filter (ie amount of pubkeys) doesn't have a significant impact on processing resources. +Since testing `Filter`s against events is performed so frequently, it is a performance-critical operation and some optimisations have been applied. For example, each filter item in the lookup table is represented by a 4 byte data structure, one of which is the first byte of the field and the rest are offset/size lookups into a single memory allocation containing the remaining bytes. Under typical scenarios, this will greatly reduce the amount of memory that needs to be loaded to process a filter. Filters with 16 or fewer items can often be rejected with the load of a single cache line. Because filters aren't scanned linearly, the number of items in a filter (ie amount of pubkeys) doesn't have a significant impact on processing resources. #### DBScan diff --git a/src/filters.h b/src/filters.h index 25040f9..c81c954 100644 --- a/src/filters.h +++ b/src/filters.h @@ -7,10 +7,9 @@ struct FilterSetBytes { struct Item { - uint32_t offset; - uint16_t size; + uint16_t offset; + uint8_t size; uint8_t firstByte; - uint8_t padding; }; std::vector items; @@ -19,8 +18,6 @@ struct FilterSetBytes { // Sizes are post-hex decode FilterSetBytes(const tao::json::value &arrHex, bool hexDecode, size_t minSize, size_t maxSize) { - if (maxSize > std::numeric_limits::max()) throw herr("filter maxSize too big"); - std::vector arr; for (const auto &i : arrHex.get_array()) { @@ -34,11 +31,11 @@ struct FilterSetBytes { for (const auto &item : arr) { if (items.size() > 0 && item.starts_with(at(items.size() - 1))) continue; // remove duplicates and redundant prefixes - items.emplace_back(Item{ (uint32_t)buf.size(), (uint16_t)item.size(), (uint8_t)item[0] }); + items.emplace_back(Item{ (uint16_t)buf.size(), (uint8_t)item.size(), (uint8_t)item[0] }); buf += item; } - if (buf.size() > 1'000'000) throw herr("total filter items too large"); + if (buf.size() > 65535) throw herr("total filter items too large"); } std::string at(size_t n) const { From b32999cee8e6f184e15264d0b4b8f93e8213d334 Mon Sep 17 00:00:00 2001 From: Doug Hoyte Date: Wed, 8 Feb 2023 06:03:54 -0500 Subject: [PATCH 30/51] max indexed tag size --- src/constants.h | 1 + src/events.cpp | 10 ++++++---- src/events.h | 1 - src/filters.h | 6 +++--- src/global.h | 3 +++ src/onAppStartup.cpp | 2 -- 6 files changed, 13 insertions(+), 10 deletions(-) diff --git a/src/constants.h b/src/constants.h index 6cd8bca..b331055 100644 --- a/src/constants.h +++ b/src/constants.h @@ -3,3 +3,4 @@ const uint64_t CURR_DB_VERSION = 1; const size_t MAX_SUBID_SIZE = 71; // Statically allocated size in SubId const uint64_t MAX_TIMESTAMP = 17179869184; // Safety limit to ensure it can fit in quadrable key. Good until year 2514. +const size_t MAX_INDEXED_TAG_VAL_SIZE = 255; diff --git a/src/events.cpp b/src/events.cpp index 758c61c..78455f2 100644 --- a/src/events.cpp +++ b/src/events.cpp @@ -40,10 +40,12 @@ std::string nostrJsonToFlat(const tao::json::value &v) { } else { if (tagVal.size() < 1 || tagVal.size() > cfg().events__maxTagValSize) throw herr("tag val too small/large: ", tagVal.size()); - tagsGeneral.emplace_back(NostrIndex::CreateTagGeneral(builder, - (uint8_t)tagName[0], - builder.CreateVector((uint8_t*)tagVal.data(), tagVal.size()) - )); + if (tagVal.size() <= MAX_INDEXED_TAG_VAL_SIZE) { + tagsGeneral.emplace_back(NostrIndex::CreateTagGeneral(builder, + (uint8_t)tagName[0], + builder.CreateVector((uint8_t*)tagVal.data(), tagVal.size()) + )); + } } } diff --git a/src/events.h b/src/events.h index c05ce08..21f839b 100644 --- a/src/events.h +++ b/src/events.h @@ -5,7 +5,6 @@ #include "golpe.h" #include "Decompressor.h" -#include "constants.h" diff --git a/src/filters.h b/src/filters.h index c81c954..0363749 100644 --- a/src/filters.h +++ b/src/filters.h @@ -2,8 +2,6 @@ #include "golpe.h" -#include "constants.h" - struct FilterSetBytes { struct Item { @@ -18,6 +16,8 @@ struct FilterSetBytes { // Sizes are post-hex decode FilterSetBytes(const tao::json::value &arrHex, bool hexDecode, size_t minSize, size_t maxSize) { + if (maxSize > MAX_INDEXED_TAG_VAL_SIZE) throw herr("maxSize bigger than max indexed tag size"); + std::vector arr; for (const auto &i : arrHex.get_array()) { @@ -145,7 +145,7 @@ struct NostrFilter { if (tag == 'p' || tag == 'e') { tags.emplace(tag, FilterSetBytes(v, true, 32, 32)); } else { - tags.emplace(tag, FilterSetBytes(v, false, 1, cfg().events__maxTagValSize)); + tags.emplace(tag, FilterSetBytes(v, false, 1, MAX_INDEXED_TAG_VAL_SIZE)); } } else { throw herr("unindexed tag filter"); diff --git a/src/global.h b/src/global.h index da202a5..ce7a282 100644 --- a/src/global.h +++ b/src/global.h @@ -13,3 +13,6 @@ quadrable::Quadrable getQdbInstance(); std::string renderIP(std::string_view ipBytes); + + +#include "constants.h" diff --git a/src/onAppStartup.cpp b/src/onAppStartup.cpp index 6127a1f..c63db6d 100644 --- a/src/onAppStartup.cpp +++ b/src/onAppStartup.cpp @@ -5,8 +5,6 @@ #include "golpe.h" -#include "constants.h" - static void dbCheck(lmdb::txn &txn, const std::string &cmd) { auto dbTooOld = [&](uint64_t ver) { From 7661865bcc14bf3c9e74b7da327c50d8d6fcd03d Mon Sep 17 00:00:00 2001 From: Doug Hoyte Date: Wed, 8 Feb 2023 06:48:38 -0500 Subject: [PATCH 31/51] NIP-40 expiration timestamp --- fbs/nostr-index.fbs | 1 + golpe.yaml | 7 +++++ src/RelayWebsocket.cpp | 1 - src/cmd_dict.cpp | 1 - src/events.cpp | 18 ++++++++++--- src/gc.h | 2 -- src/global.h | 9 ++++--- src/misc.cpp | 60 ++++++++++++++++++++++++++++++++++++++++++ src/render.h | 44 ------------------------------- 9 files changed, 88 insertions(+), 55 deletions(-) delete mode 100644 src/render.h diff --git a/fbs/nostr-index.fbs b/fbs/nostr-index.fbs index 73680a1..580e6d8 100644 --- a/fbs/nostr-index.fbs +++ b/fbs/nostr-index.fbs @@ -21,6 +21,7 @@ table Event { kind: uint64; tagsGeneral: [TagGeneral]; tagsFixed32: [TagFixed32]; + expiration: uint64; } table Empty {} diff --git a/golpe.yaml b/golpe.yaml index 0637677..2a57d0c 100644 --- a/golpe.yaml +++ b/golpe.yaml @@ -46,6 +46,9 @@ tables: multi: true deletion: # eventId, pubkey multi: true + expiration: + integer: true + multi: true indexPrelude: | auto *flat = v.flat_nested(); @@ -70,6 +73,10 @@ tables: if (flat->kind() == 5 && tagName == 'e') deletion.push_back(std::string(tagVal) + std::string(sv(flat->pubkey()))); } + if (flat->expiration() != 0) { + expiration.push_back(flat->expiration()); + } + CompressionDictionary: fields: - name: dict diff --git a/src/RelayWebsocket.cpp b/src/RelayWebsocket.cpp index 6503b14..c0912b4 100644 --- a/src/RelayWebsocket.cpp +++ b/src/RelayWebsocket.cpp @@ -1,5 +1,4 @@ #include "RelayServer.h" -#include "render.h" #include "app_git_version.h" diff --git a/src/cmd_dict.cpp b/src/cmd_dict.cpp index 79973b7..d5120e9 100644 --- a/src/cmd_dict.cpp +++ b/src/cmd_dict.cpp @@ -9,7 +9,6 @@ #include "DBScan.h" #include "events.h" -#include "render.h" static const char USAGE[] = diff --git a/src/events.cpp b/src/events.cpp index 78455f2..49c058b 100644 --- a/src/events.cpp +++ b/src/events.cpp @@ -19,13 +19,14 @@ std::string nostrJsonToFlat(const tao::json::value &v) { std::vector> tagsGeneral; std::vector> tagsFixed32; + uint64_t expiration = 0; + if (v.at("tags").get_array().size() > cfg().events__maxNumTags) throw herr("too many tags: ", v.at("tags").get_array().size()); for (auto &tagArr : v.at("tags").get_array()) { auto &tag = tagArr.get_array(); if (tag.size() < 2) throw herr("too few fields in tag"); auto tagName = tag.at(0).get_string(); - if (tagName.size() != 1) continue; // only single-char tags need indexing auto tagVal = tag.at(1).get_string(); @@ -37,8 +38,14 @@ std::string nostrJsonToFlat(const tao::json::value &v) { (uint8_t)tagName[0], (NostrIndex::Fixed32Bytes*)tagVal.data() )); - } else { - if (tagVal.size() < 1 || tagVal.size() > cfg().events__maxTagValSize) throw herr("tag val too small/large: ", tagVal.size()); + } else if (tagName == "expiration") { + if (expiration == 0) { + expiration = parseUint64(tagVal); + if (expiration == 0) expiration = 1; // special value to indicate expiration of 0 was set + } + } else if (tagName.size() == 1) { + if (tagVal.size() == 0) throw herr("tag val empty"); + if (tagVal.size() > cfg().events__maxTagValSize) throw herr("tag val too large: ", tagVal.size()); if (tagVal.size() <= MAX_INDEXED_TAG_VAL_SIZE) { tagsGeneral.emplace_back(NostrIndex::CreateTagGeneral(builder, @@ -57,7 +64,8 @@ std::string nostrJsonToFlat(const tao::json::value &v) { created_at, kind, builder.CreateVector>(tagsGeneral), - builder.CreateVector>(tagsFixed32) + builder.CreateVector>(tagsFixed32), + expiration ); builder.Finish(eventPtr); @@ -122,6 +130,8 @@ void verifyEventTimestamp(const NostrIndex::Event *flat) { if (ts < earliest) throw herr("created_at too early"); if (ts > latest || ts > MAX_TIMESTAMP) throw herr("created_at too late"); + + if (flat->expiration() != 0 && flat->expiration() <= now) throw herr("event expired"); } void parseAndVerifyEvent(const tao::json::value &origJson, secp256k1_context *secpCtx, bool verifyMsg, bool verifyTime, std::string &flatStr, std::string &jsonStr) { diff --git a/src/gc.h b/src/gc.h index 6bfab0c..90da437 100644 --- a/src/gc.h +++ b/src/gc.h @@ -2,8 +2,6 @@ #include "golpe.h" -#include "render.h" - inline void quadrableGarbageCollect(quadrable::Quadrable &qdb, int logLevel = 0) { quadrable::Quadrable::GarbageCollector> gc(qdb); diff --git a/src/global.h b/src/global.h index ce7a282..aeb23e9 100644 --- a/src/global.h +++ b/src/global.h @@ -12,7 +12,10 @@ quadrable::Quadrable getQdbInstance(lmdb::txn &txn); quadrable::Quadrable getQdbInstance(); -std::string renderIP(std::string_view ipBytes); - - #include "constants.h" + + +std::string renderIP(std::string_view ipBytes); +std::string renderSize(uint64_t si); +std::string renderPercent(double p); +uint64_t parseUint64(const std::string &s); diff --git a/src/misc.cpp b/src/misc.cpp index 90267a8..4cfbfeb 100644 --- a/src/misc.cpp +++ b/src/misc.cpp @@ -1,7 +1,12 @@ #include +#include + +#include +#include #include "golpe.h" + std::string renderIP(std::string_view ipBytes) { char buf[128]; @@ -15,3 +20,58 @@ std::string renderIP(std::string_view ipBytes) { return std::string(buf); } + + +std::string renderSize(uint64_t si) { + if (si < 1024) return std::to_string(si) + "b"; + + double s = si; + char buf[128]; + char unit; + + do { + s /= 1024; + if (s < 1024) { + unit = 'K'; + break; + } + + s /= 1024; + if (s < 1024) { + unit = 'M'; + break; + } + + s /= 1024; + if (s < 1024) { + unit = 'G'; + break; + } + + s /= 1024; + unit = 'T'; + } while(0); + + ::snprintf(buf, sizeof(buf), "%.2f%c", s, unit); + return std::string(buf); +} + + + +std::string renderPercent(double p) { + char buf[128]; + ::snprintf(buf, sizeof(buf), "%.1f%%", p * 100); + return std::string(buf); +} + + + +uint64_t parseUint64(const std::string &s) { + auto digitChar = [](char c){ + return c >= '0' && c <= '9'; + }; + + if (!std::all_of(s.begin(), s.end(), digitChar)) throw herr("non-digit character"); + + return std::stoull(s); +} diff --git a/src/render.h b/src/render.h deleted file mode 100644 index 2acfb6b..0000000 --- a/src/render.h +++ /dev/null @@ -1,44 +0,0 @@ -#pragma once - -#include - - -inline std::string renderSize(uint64_t si) { - if (si < 1024) return std::to_string(si) + "b"; - - double s = si; - char buf[128]; - char unit; - - do { - s /= 1024; - if (s < 1024) { - unit = 'K'; - break; - } - - s /= 1024; - if (s < 1024) { - unit = 'M'; - break; - } - - s /= 1024; - if (s < 1024) { - unit = 'G'; - break; - } - - s /= 1024; - unit = 'T'; - } while(0); - - ::snprintf(buf, sizeof(buf), "%.2f%c", s, unit); - return std::string(buf); -} - -inline std::string renderPercent(double p) { - char buf[128]; - ::snprintf(buf, sizeof(buf), "%.1f%%", p * 100); - return std::string(buf); -} From 5c4fddb4a8a50cfeb79a0134a8c412a75b67a4c0 Mon Sep 17 00:00:00 2001 From: Doug Hoyte Date: Wed, 8 Feb 2023 07:30:14 -0500 Subject: [PATCH 32/51] NIP-33: parameterized replaceable events --- golpe.yaml | 7 +++++++ src/events.cpp | 42 +++++++++++++++++++++++++++++++++++------- 2 files changed, 42 insertions(+), 7 deletions(-) diff --git a/golpe.yaml b/golpe.yaml index 2a57d0c..12f758c 100644 --- a/golpe.yaml +++ b/golpe.yaml @@ -49,6 +49,8 @@ tables: expiration: integer: true multi: true + replace: # pubkey, d-tag, kind + multi: true indexPrelude: | auto *flat = v.flat_nested(); @@ -63,7 +65,12 @@ tables: for (const auto &tagPair : *(flat->tagsGeneral())) { auto tagName = (char)tagPair->key(); auto tagVal = sv(tagPair->val()); + tag.push_back(makeKey_StringUint64(std::string(1, tagName) + std::string(tagVal), indexTime)); + + if (tagName == 'd' && replace.size() == 0) { + replace.push_back(makeKey_StringUint64(std::string(sv(flat->pubkey())) + std::string(tagVal), flat->kind())); + } } for (const auto &tagPair : *(flat->tagsFixed32())) { diff --git a/src/events.cpp b/src/events.cpp index 49c058b..af0003f 100644 --- a/src/events.cpp +++ b/src/events.cpp @@ -253,24 +253,52 @@ void writeEvents(lmdb::txn &txn, quadrable::Quadrable &qdb, std::vectorkind())) { auto searchKey = makeKey_StringUint64Uint64(sv(flat->pubkey()), flat->kind(), MAX_U64); - uint64_t otherLevId = 0; env.generic_foreachFull(txn, env.dbi_Event__pubkeyKind, searchKey, lmdb::to_sv(MAX_U64), [&](auto k, auto v) { ParsedKey_StringUint64Uint64 parsedKey(k); if (parsedKey.s == sv(flat->pubkey()) && parsedKey.n1 == flat->kind()) { if (parsedKey.n2 < flat->created_at()) { - otherLevId = lmdb::from_sv(v); + auto otherEv = env.lookup_Event(txn, lmdb::from_sv(v)); + if (!otherEv) throw herr("missing event from index, corrupt DB?"); + LI << "Deleting event (replaceable). id=" << to_hex(sv(otherEv->flat_nested()->pubkey())); + deleteEvent(txn, changes, *otherEv); } else { ev.status = EventWriteStatus::Replaced; } } return false; }, true); + } else { + std::string replace; - if (otherLevId) { - auto otherEv = env.lookup_Event(txn, otherLevId); - if (!otherEv) throw herr("missing event from index, corrupt DB?"); - deleteEvent(txn, changes, *otherEv); + for (const auto &tagPair : *(flat->tagsGeneral())) { + auto tagName = (char)tagPair->key(); + if (tagName != 'd') continue; + replace = std::string(sv(tagPair->val())); + break; + } + + if (replace.size()) { + auto searchStr = std::string(sv(flat->pubkey())) + replace; + auto searchKey = makeKey_StringUint64(searchStr, flat->kind()); + LI << to_hex(searchKey); + + env.generic_foreachFull(txn, env.dbi_Event__replace, searchKey, lmdb::to_sv(MAX_U64), [&](auto k, auto v) { + ParsedKey_StringUint64 parsedKey(k); + if (parsedKey.s == searchStr && parsedKey.n == flat->kind()) { + auto otherEv = env.lookup_Event(txn, lmdb::from_sv(v)); + if (!otherEv) throw herr("missing event from index, corrupt DB?"); + + if (otherEv->flat_nested()->created_at() < flat->created_at()) { + LI << "Deleting event (d-tag). id=" << to_hex(sv(otherEv->flat_nested()->pubkey())); + deleteEvent(txn, changes, *otherEv); + } else { + ev.status = EventWriteStatus::Replaced; + } + } + + return false; + }, true); } } @@ -280,7 +308,7 @@ void writeEvents(lmdb::txn &txn, quadrable::Quadrable &qdb, std::vectorkey() == 'e') { auto otherEv = lookupEventById(txn, sv(tagPair->val())); if (otherEv && sv(otherEv->flat_nested()->pubkey()) == sv(flat->pubkey())) { - LI << "Deleting event. id=" << to_hex(sv(tagPair->val())); + LI << "Deleting event (kind 5). id=" << to_hex(sv(tagPair->val())); deleteEvent(txn, changes, *otherEv); } } From 679d3834e4081157ac017ea03534e45bb6c03204 Mon Sep 17 00:00:00 2001 From: Doug Hoyte Date: Wed, 8 Feb 2023 10:15:35 -0500 Subject: [PATCH 33/51] import --no-gc option --- src/cmd_import.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/cmd_import.cpp b/src/cmd_import.cpp index f9ba6f9..b5de31d 100644 --- a/src/cmd_import.cpp +++ b/src/cmd_import.cpp @@ -11,7 +11,7 @@ static const char USAGE[] = R"( Usage: - import [--show-rejected] [--no-verify] + import [--show-rejected] [--no-verify] [--no-gc] )"; @@ -20,6 +20,7 @@ void cmd_import(const std::vector &subArgs) { bool showRejected = args["--show-rejected"].asBool(); bool noVerify = args["--no-verify"].asBool(); + bool noGc = args["--no-gc"].asBool(); if (noVerify) LW << "not verifying event IDs or signatures!"; @@ -88,7 +89,7 @@ void cmd_import(const std::vector &subArgs) { flushChanges(); - quadrableGarbageCollect(qdb, 2); - txn.commit(); + + if (!noGc) quadrableGarbageCollect(qdb, 2); } From fd0caaad66d2d9baa677af375b07599625e87f76 Mon Sep 17 00:00:00 2001 From: Doug Hoyte Date: Wed, 8 Feb 2023 13:26:32 -0500 Subject: [PATCH 34/51] tests --- test/dumbFilter.pl | 4 +- test/filterFuzzTest.pl | 61 +++++++----- test/strfry.conf | 1 + test/writeTest.pl | 210 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 248 insertions(+), 28 deletions(-) create mode 100644 test/strfry.conf create mode 100644 test/writeTest.pl diff --git a/test/dumbFilter.pl b/test/dumbFilter.pl index 43243b3..138be4d 100644 --- a/test/dumbFilter.pl +++ b/test/dumbFilter.pl @@ -1,9 +1,9 @@ #!/usr/bin/env perl -use JSON::XS; - use strict; +use JSON::XS; + binmode(STDOUT, ":utf8"); my $filterJson = shift || die "need filter"; diff --git a/test/filterFuzzTest.pl b/test/filterFuzzTest.pl index ec5d869..e2921ae 100644 --- a/test/filterFuzzTest.pl +++ b/test/filterFuzzTest.pl @@ -1,3 +1,5 @@ +#!/usr/bin/env perl + use strict; use Data::Dumper; use JSON::XS; @@ -202,6 +204,38 @@ sub testScan { } +sub testMonitor { + my $monCmds = shift; + my $interestFg = shift; + + my $fge = encode_json($interestFg); + print "filt: $fge\n\n"; + + print "DOING MONS\n"; + my $pid = open2(my $outfile, my $infile, './strfry monitor | jq -r .pubkey | sort | sha256sum'); + for my $c (@$monCmds) { print $infile encode_json($c), "\n"; } + close($infile); + + my $resA = <$outfile>; + + waitpid($pid, 0); + my $child_exit_status = $? >> 8; + die "monitor cmd died" if $child_exit_status; + + print "DOING SCAN\n"; + my $resB = `./strfry scan '$fge' 2>/dev/null | jq -r .pubkey | sort | sha256sum`; + + print "$resA\n$resB\n"; + + if ($resA eq $resB) { + print "-----------MATCH OK-------------\n\n\n"; + } else { + print STDERR "$fge\n"; + die "MISMATCH"; + } +} + + srand($ENV{SEED} || 0); @@ -215,32 +249,7 @@ if ($cmd eq 'scan') { } elsif ($cmd eq 'monitor') { while (1) { my ($monCmds, $interestFg) = genRandomMonitorCmds(); - - my $fge = encode_json($interestFg); - print "filt: $fge\n\n"; - - print "DOING MONS\n"; - my $pid = open2(my $outfile, my $infile, './strfry monitor | jq -r .pubkey | sort | sha256sum'); - for my $c (@$monCmds) { print $infile encode_json($c), "\n"; } - close($infile); - - my $resA = <$outfile>; - - waitpid($pid, 0); - my $child_exit_status = $? >> 8; - die "monitor cmd died" if $child_exit_status; - - print "DOING SCAN\n"; - my $resB = `./strfry scan '$fge' 2>/dev/null | jq -r .pubkey | sort | sha256sum`; - - print "$resA\n$resB\n"; - - if ($resA eq $resB) { - print "-----------MATCH OK-------------\n\n\n"; - } else { - print STDERR "$fge\n"; - die "MISMATCH"; - } + testMonitor($monCmds, $interestFg); } } else { die "unknown cmd: $cmd"; diff --git a/test/strfry.conf b/test/strfry.conf new file mode 100644 index 0000000..f2512b5 --- /dev/null +++ b/test/strfry.conf @@ -0,0 +1 @@ +db = "./strfry-db-test/" diff --git a/test/writeTest.pl b/test/writeTest.pl new file mode 100644 index 0000000..3a23cac --- /dev/null +++ b/test/writeTest.pl @@ -0,0 +1,210 @@ +#!/usr/bin/env perl + +use strict; + +use Data::Dumper; +use JSON::XS; + + +my $ids = [ + { + sec => 'c1eee22f68dc218d98263cfecb350db6fc6b3e836b47423b66c62af7ae3e32bb', + pub => '003ba9b2c5bd8afeed41a4ce362a8b7fc3ab59c25b6a1359cae9093f296dac01', + }, + { + sec => 'a0b459d9ff90e30dc9d1749b34c4401dfe80ac2617c7732925ff994e8d5203ff', + pub => 'cc49e2a58373abc226eee84bee9ba954615aa2ef1563c4f955a74c4606a3b1fa', + }, +]; + + + +## Basic insert + +doTest({ + events => [ + qq{--sec $ids->[0]->{sec} --content "hi" --kind 1 }, + qq{--sec $ids->[0]->{sec} --content "hi 2" --kind 1 }, + ], + verify => [ 0, 1, ], +}); + +## Replacement, newer timestamp + +doTest({ + events => [ + qq{--sec $ids->[0]->{sec} --content "hi" --kind 10000 --created-at 5000 }, + qq{--sec $ids->[0]->{sec} --content "hi 2" --kind 10000 --created-at 5001 }, + qq{--sec $ids->[0]->{sec} --content "hi" --kind 10000 --created-at 5000 }, + ], + verify => [ 1, ], +}); + +## Replacement is dropped + +doTest({ + events => [ + qq{--sec $ids->[0]->{sec} --content "hi" --kind 10000 --created-at 5001 }, + qq{--sec $ids->[0]->{sec} --content "hi 2" --kind 10000 --created-at 5000 }, + ], + verify => [ 0, ], +}); + +## Doesn't replace some else's event + +doTest({ + events => [ + qq{--sec $ids->[0]->{sec} --content "hi" --kind 10000 --created-at 5000 }, + qq{--sec $ids->[1]->{sec} --content "hi 2" --kind 10000 --created-at 5001 }, + ], + verify => [ 0, 1, ], +}); + +## Doesn't replace different kind + +doTest({ + events => [ + qq{--sec $ids->[0]->{sec} --content "hi" --kind 10001 --created-at 5000 }, + qq{--sec $ids->[1]->{sec} --content "hi 2" --kind 10000 --created-at 5001 }, + ], + verify => [ 0, 1, ], +}); + + +## Deletion + +doTest({ + events => [ + qq{--sec $ids->[0]->{sec} --content "hi" --kind 1 --created-at 5000 }, + qq{--sec $ids->[0]->{sec} --content "hi" --kind 1 --created-at 5001 }, + qq{--sec $ids->[0]->{sec} --content "hi" --kind 1 --created-at 5002 }, + qq{--sec $ids->[0]->{sec} --content "blah" --kind 5 --created-at 6000 -e EV_2 -e EV_0 }, + ], + verify => [ 1, 3, ], +}); + +## Can't delete someone else's event + +doTest({ + events => [ + qq{--sec $ids->[0]->{sec} --content "hi" --kind 1 --created-at 5000 }, + qq{--sec $ids->[1]->{sec} --content "blah" --kind 5 --created-at 6000 -e EV_0 }, + ], + verify => [ 0, 1, ], +}); + +## Deletion prevents re-adding same event + +doTest({ + events => [ + qq{--sec $ids->[0]->{sec} --content "hi" --kind 1 --created-at 5000 }, + qq{--sec $ids->[0]->{sec} --content "blah" --kind 5 --created-at 6000 -e EV_0 }, + qq{--sec $ids->[0]->{sec} --content "hi" --kind 1 --created-at 5000 }, + ], + verify => [ 1, ], +}); + + + +## Parameterized Replaceable Events + +doTest({ + events => [ + qq{--sec $ids->[0]->{sec} --content "hi1" --kind 1 --created-at 5000 --tag d myrepl }, + qq{--sec $ids->[0]->{sec} --content "hi2" --kind 1 --created-at 5001 --tag d myrepl }, + ], + verify => [ 1, ], +}); + +## d tags have to match + +doTest({ + events => [ + qq{--sec $ids->[0]->{sec} --content "hi1" --kind 1 --created-at 5000 --tag d myrepl }, + qq{--sec $ids->[0]->{sec} --content "hi2" --kind 1 --created-at 5001 --tag d myrepl2 }, + qq{--sec $ids->[0]->{sec} --content "hi3" --kind 1 --created-at 5002 --tag d myrepl }, + ], + verify => [ 1, 2, ], +}); + +## Kinds have to match + +doTest({ + events => [ + qq{--sec $ids->[0]->{sec} --content "hi1" --kind 1 --created-at 5000 --tag d myrepl }, + qq{--sec $ids->[0]->{sec} --content "hi2" --kind 2 --created-at 5001 --tag d myrepl }, + ], + verify => [ 0, 1, ], +}); + +## Pubkeys have to match + +doTest({ + events => [ + qq{--sec $ids->[0]->{sec} --content "hi1" --kind 1 --created-at 5000 --tag d myrepl }, + qq{--sec $ids->[1]->{sec} --content "hi2" --kind 1 --created-at 5001 --tag d myrepl }, + ], + verify => [ 0, 1, ], +}); + +## Timestamp + +doTest({ + events => [ + qq{--sec $ids->[0]->{sec} --content "hi1" --kind 1 --created-at 5001 --tag d myrepl }, + qq{--sec $ids->[0]->{sec} --content "hi2" --kind 1 --created-at 5000 --tag d myrepl }, + ], + verify => [ 0, ], +}); + + + +sub doTest { + my $spec = shift; + + cleanDb(); + + my $eventIds = []; + + for my $ev (@{ $spec->{events} }) { + $ev =~ s{EV_(\d+)}{$eventIds->[$1]}eg; + push @$eventIds, addEvent($ev); + } + + my $finalEventIds = []; + + { + open(my $fh, '-|', './strfry --config test/strfry.conf export 2>/dev/null') || die "$!"; + while(<$fh>) { + push @$finalEventIds, decode_json($_)->{id}; + } + } + + die "incorrect eventIds lengths" if @{$spec->{verify}} != @$finalEventIds; + + for (my $i = 0; $i < @$finalEventIds; $i++) { + die "id mismatch" if $eventIds->[$spec->{verify}->[$i]] ne $finalEventIds->[$i]; + } +} + + +sub cleanDb { + system("mkdir -p strfry-db-test"); + system("rm -f strfry-db-test/data.mdb"); +} + +sub addEvent { + my $ev = shift; + + system(qq{ nostril $ev >test-eventXYZ.json }); + + my $eventJson = `cat test-eventXYZ.json`; + + system(qq{ /dev/null }); + + system(qq{ rm test-eventXYZ.json }); + + my $event = decode_json($eventJson); + + return $event->{id}; +} From 50a3b5ed7158d927810fff7bffc856ba8a85ee9b Mon Sep 17 00:00:00 2001 From: Doug Hoyte Date: Wed, 8 Feb 2023 14:05:15 -0500 Subject: [PATCH 35/51] todo --- TODO | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/TODO b/TODO index 7241902..96d623c 100644 --- a/TODO +++ b/TODO @@ -1,15 +1,12 @@ 0.1 release - event sources - rate limits - NIP-40 expiration - NIP-33 param replaceable events - fix sync + write policy plugin when disk is full it should log warning but not crash ensure DB upgrade flow works + disable sync ? why isn't the LMDB mapping CLOEXEC -features - finish syncing +0.2 release + fix sync * logging of bytes up/down * up/both directions * error handling and reporting @@ -17,19 +14,20 @@ features * limit on number of concurrent sync requests * full-db scan limited by since/until * `strfry sync` command always takes at least 1 second due to batching delay. figure out better way to flush + +features less verbose default logging - make it easier for a thread to setup a quadrable env multiple sync connections in one process/config - relay block-list events NIP-42 AUTH - procmail-like API for event filtering + slow-reader detection and back-pressure + ? relay block-list events rate limits ! event writes per second per ip - slow-reader detection and back-pressure max connections per ip (nginx?) max bandwidth up/down (nginx?) ? limit on total number of events from a DBScan, not just per filter + ? time limit on DBScan misc ? periodic reaping of disconnected sockets (maybe autoping is doing this already) From 4eb7a4fe53a2aaa2e1b71dff0173244d47067aa0 Mon Sep 17 00:00:00 2001 From: Doug Hoyte Date: Wed, 8 Feb 2023 15:04:59 -0500 Subject: [PATCH 36/51] more work on write policy plugins --- TODO | 1 + golpe.yaml | 5 +- src/PluginWritePolicy.h | 163 +++++++++++++++++++++++++++------------- src/RelayWriter.cpp | 15 +++- src/events.h | 9 +++ strfry.conf | 6 +- 6 files changed, 143 insertions(+), 56 deletions(-) diff --git a/TODO b/TODO index 96d623c..cdd0745 100644 --- a/TODO +++ b/TODO @@ -3,6 +3,7 @@ when disk is full it should log warning but not crash ensure DB upgrade flow works disable sync + get IP from HTTP header ? why isn't the LMDB mapping CLOEXEC 0.2 release diff --git a/golpe.yaml b/golpe.yaml index 12f758c..97f5c08 100644 --- a/golpe.yaml +++ b/golpe.yaml @@ -160,9 +160,12 @@ config: desc: "Maximum number of subscriptions (concurrent REQs) a connection can have open at any time" default: 20 - - name: relay__plugins__writePolicyPath + - name: relay__writePolicy__plugin desc: "" default: "" + - name: relay__writePolicy__lookbackSeconds + desc: "" + default: 21600 - name: relay__compression__enabled desc: "Use permessage-deflate compression if supported by client. Reduces bandwidth, but slight increase in CPU" diff --git a/src/PluginWritePolicy.h b/src/PluginWritePolicy.h index 11d3290..a274c4f 100644 --- a/src/PluginWritePolicy.h +++ b/src/PluginWritePolicy.h @@ -7,13 +7,123 @@ #include #include #include +#include +#include +#include #include #include "golpe.h" +enum class WritePolicyResult { + Accept, + Reject, + ShadowReject, +}; + + struct PluginWritePolicy { + struct RunningPlugin { + pid_t pid; + std::string currPluginPath; + struct timespec lastModTime; + FILE *r; + FILE *w; + + RunningPlugin(pid_t pid, int rfd, int wfd, std::string currPluginPath) : pid(pid), currPluginPath(currPluginPath) { + r = fdopen(rfd, "r"); + w = fdopen(wfd, "w"); + setlinebuf(w); + { + struct stat statbuf; + if (stat(currPluginPath.c_str(), &statbuf)) throw herr("couldn't stat plugin: ", currPluginPath); + lastModTime = statbuf.st_mtim; + } + } + + ~RunningPlugin() { + fclose(r); + fclose(w); + kill(pid, SIGTERM); + waitpid(pid, nullptr, 0); + } + }; + + std::unique_ptr running; + + WritePolicyResult acceptEvent(std::string_view jsonStr, uint64_t receivedAt, EventSourceType sourceType, std::string_view sourceInfo, std::string &okMsg) { + const auto &pluginPath = cfg().relay__writePolicy__plugin; + + if (pluginPath.size() == 0) { + running.reset(); + return WritePolicyResult::Accept; + } + + try { + if (running) { + if (pluginPath != running->currPluginPath) { + running.reset(); + } else { + struct stat statbuf; + if (stat(pluginPath.c_str(), &statbuf)) throw herr("couldn't stat plugin: ", pluginPath); + if (statbuf.st_mtim.tv_sec != running->lastModTime.tv_sec || statbuf.st_mtim.tv_nsec != running->lastModTime.tv_nsec) { + running.reset(); + } + } + } + + if (!running) setupPlugin(); + + auto json = tao::json::from_string(jsonStr); + + auto request = tao::json::value({ + { "type", "new" }, + { "event", json }, + { "receivedAt", receivedAt }, + { "sourceType", eventSourceTypeToStr(sourceType) }, + { "sourceInfo", sourceType == EventSourceType::IP4 || sourceType == EventSourceType::IP6 ? renderIP(sourceInfo) : sourceInfo }, + }); + + std::string output = tao::json::to_string(request); + output += "\n"; + + ::fwrite(output.data(), output.size(), 1, running->w); + + tao::json::value response; + + while (1) { + char buf[8192]; + if (!fgets(buf, sizeof(buf), running->r)) throw herr("pipe to plugin was closed (plugin crashed?)"); + + try { + response = tao::json::from_string(buf); + } catch (std::exception &e) { + LW << "Got unparseable line from write policy plugin: " << buf; + continue; + } + // FIXME: verify id + + break; + } + + okMsg = response.optional("msg").value_or(""); + + auto action = response.at("action").get_string(); + if (action == "accept") return WritePolicyResult::Accept; + else if (action == "reject") return WritePolicyResult::Reject; + else if (action == "shadowReject") return WritePolicyResult::ShadowReject; + else throw herr("unknown action: ", action); + } catch (std::exception &e) { + LE << "Couldn't setup PluginWritePolicy: " << e.what(); + running.reset(); + okMsg = "error: internal error"; + return WritePolicyResult::Reject; + } + } + + + struct Pipe : NonCopyable { int fds[2] = { -1, -1 }; @@ -38,58 +148,9 @@ struct PluginWritePolicy { } }; - struct RunningPlugin { - pid_t pid; - std::string currPluginPath; - FILE *r; - FILE *w; - - RunningPlugin(pid_t pid, int rfd, int wfd, std::string currPluginPath) : pid(pid), currPluginPath(currPluginPath) { - r = fdopen(rfd, "r"); - w = fdopen(wfd, "w"); - setlinebuf(w); - } - - ~RunningPlugin() { - fclose(r); - fclose(w); - waitpid(pid, nullptr, 0); - } - }; - - std::unique_ptr running; - - bool acceptEvent(std::string_view jsonStr, uint64_t receivedAt, EventSourceType sourceType, std::string_view sourceInfo) { - if (cfg().relay__plugins__writePolicyPath.size() == 0) return true; - - if (!running) { - try { - setupPlugin(); - } catch (std::exception &e) { - LE << "Couldn't setup PluginWritePolicy: " << e.what(); - return false; - } - } - - std::string output; - output += jsonStr; - output += "\n"; - - ::fwrite(output.data(), output.size(), 1, running->w); - - { - char buf[4096]; - fgets(buf, sizeof(buf), running->r); - auto j = tao::json::from_string(buf); - LI << "QQQ " << j; - } - - return true; - } - - void setupPlugin() { - auto path = cfg().relay__plugins__writePolicyPath; + auto path = cfg().relay__writePolicy__plugin; + LI << "Setting up write policy plugin: " << path; Pipe outPipe; Pipe inPipe; diff --git a/src/RelayWriter.cpp b/src/RelayWriter.cpp index 4bbc77b..217148d 100644 --- a/src/RelayWriter.cpp +++ b/src/RelayWriter.cpp @@ -18,8 +18,19 @@ void RelayServer::runWriter(ThreadPool::Thread &thr) { for (auto &newMsg : newMsgs) { if (auto msg = std::get_if(&newMsg.msg)) { EventSourceType sourceType = msg->ipAddr.size() == 4 ? EventSourceType::IP4 : EventSourceType::IP6; - if (!writePolicy.acceptEvent(msg->jsonStr, msg->receivedAt, sourceType, msg->ipAddr)) continue; - newEvents.emplace_back(std::move(msg->flatStr), std::move(msg->jsonStr), msg->receivedAt, sourceType, std::move(msg->ipAddr), msg); + std::string okMsg; + auto res = writePolicy.acceptEvent(msg->jsonStr, msg->receivedAt, sourceType, msg->ipAddr, okMsg); + + if (res == WritePolicyResult::Accept) { + newEvents.emplace_back(std::move(msg->flatStr), std::move(msg->jsonStr), msg->receivedAt, sourceType, std::move(msg->ipAddr), msg); + } else { + auto *flat = flatbuffers::GetRoot(msg->flatStr.data()); + auto eventIdHex = to_hex(sv(flat->id())); + + LI << "[" << msg->connId << "] write policy blocked event " << eventIdHex << ": " << okMsg; + + sendOKResponse(msg->connId, eventIdHex, res == WritePolicyResult::ShadowReject, okMsg); + } } } diff --git a/src/events.h b/src/events.h index 21f839b..a380abb 100644 --- a/src/events.h +++ b/src/events.h @@ -67,6 +67,15 @@ enum class EventSourceType { Sync = 5, }; +inline std::string eventSourceTypeToStr(EventSourceType t) { + if (t == EventSourceType::IP4) return "IP4"; + else if (t == EventSourceType::IP6) return "IP6"; + else if (t == EventSourceType::Import) return "Import"; + else if (t == EventSourceType::Stream) return "Stream"; + else if (t == EventSourceType::Sync) return "Sync"; + else return "?"; +} + enum class EventWriteStatus { diff --git a/strfry.conf b/strfry.conf index 9a06608..8c06e02 100644 --- a/strfry.conf +++ b/strfry.conf @@ -55,8 +55,10 @@ relay { # Maximum number of subscriptions (concurrent REQs) a connection can have open at any time maxSubsPerConnection = 20 - plugins { - writePolicyPath = "./test.pl" + writePolicy { + plugin = "" + + lookbackSeconds = 21600 } compression { From 51243ce62f8981a2d733c207bf73f050682dfecf Mon Sep 17 00:00:00 2001 From: Doug Hoyte Date: Wed, 8 Feb 2023 16:08:03 -0500 Subject: [PATCH 37/51] option to extract client's IP from HTTP header (ie X-Real-IP) --- TODO | 1 - golpe.yaml | 3 +++ src/RelayWebsocket.cpp | 10 +++++++++- src/global.h | 1 + src/misc.cpp | 10 ++++++++++ strfry.conf | 3 +++ 6 files changed, 26 insertions(+), 2 deletions(-) diff --git a/TODO b/TODO index cdd0745..96d623c 100644 --- a/TODO +++ b/TODO @@ -3,7 +3,6 @@ when disk is full it should log warning but not crash ensure DB upgrade flow works disable sync - get IP from HTTP header ? why isn't the LMDB mapping CLOEXEC 0.2 release diff --git a/golpe.yaml b/golpe.yaml index 97f5c08..f959654 100644 --- a/golpe.yaml +++ b/golpe.yaml @@ -125,6 +125,9 @@ config: desc: "Set OS-limit on maximum number of open files/sockets (if 0, don't attempt to set)" default: 1000000 noReload: true + - name: relay__realIpHeader + desc: "HTTP header that contains the client's real IP, before reverse proxying (ie x-real-ip) (MUST be all lower-case)" + default: "" - name: relay__info__name desc: "NIP-11: Name of this server. Short/descriptive (< 30 characters)" diff --git a/src/RelayWebsocket.cpp b/src/RelayWebsocket.cpp index c0912b4..6643695 100644 --- a/src/RelayWebsocket.cpp +++ b/src/RelayWebsocket.cpp @@ -94,7 +94,15 @@ void RelayServer::runWebsocket(ThreadPool::Thread &thr) { uint64_t connId = nextConnectionId++; Connection *c = new Connection(ws, connId); - c->ipAddr = ws->getAddressBytes(); + + if (cfg().relay__realIpHeader.size()) { + auto header = req.getHeader(cfg().relay__realIpHeader.c_str()).toString(); + c->ipAddr = parseIP(header); + if (c->ipAddr.size() == 0) LW << "Couldn't parse IP from header " << cfg().relay__realIpHeader << ": " << header; + } + + if (c->ipAddr.size() == 0) c->ipAddr = ws->getAddressBytes(); + ws->setUserData((void*)c); connIdToConnection.emplace(connId, c); diff --git a/src/global.h b/src/global.h index aeb23e9..c6b478f 100644 --- a/src/global.h +++ b/src/global.h @@ -19,3 +19,4 @@ std::string renderIP(std::string_view ipBytes); std::string renderSize(uint64_t si); std::string renderPercent(double p); uint64_t parseUint64(const std::string &s); +std::string parseIP(const std::string &ip); diff --git a/src/misc.cpp b/src/misc.cpp index 4cfbfeb..0f23f20 100644 --- a/src/misc.cpp +++ b/src/misc.cpp @@ -21,6 +21,16 @@ std::string renderIP(std::string_view ipBytes) { return std::string(buf); } +std::string parseIP(const std::string &ip) { + int af = ip.find(':') != std::string::npos ? AF_INET6 : AF_INET; + unsigned char buf[16]; + + int ret = inet_pton(af, ip.c_str(), &buf[0]); + if (ret == 0) return ""; + + return std::string((const char*)&buf[0], af == AF_INET6 ? 16 : 4); +} + std::string renderSize(uint64_t si) { if (si < 1024) return std::to_string(si) + "b"; diff --git a/strfry.conf b/strfry.conf index 8c06e02..d6d440e 100644 --- a/strfry.conf +++ b/strfry.conf @@ -23,6 +23,9 @@ relay { # Set OS-limit on maximum number of open files/sockets (if 0, don't attempt to set) (restart required) nofiles = 1000000 + # HTTP header that contains the client's real IP, before reverse proxying (ie x-real-ip) (case-insensitive) + realIpHeader = "" + info { # NIP-11: Name of this server. Short/descriptive (< 30 characters) name = "strfry default" From 61f2638f88a3212bdfa2d3107186f5be13a6819d Mon Sep 17 00:00:00 2001 From: Doug Hoyte Date: Wed, 8 Feb 2023 16:44:53 -0500 Subject: [PATCH 38/51] work on write policy plugin --- golpe.yaml | 15 +++++++----- src/DBScan.h | 2 +- src/PluginWritePolicy.h | 52 +++++++++++++++++++++++++++++++++++------ src/cmd_export.cpp | 2 +- strfry.conf | 6 +++-- 5 files changed, 60 insertions(+), 17 deletions(-) diff --git a/golpe.yaml b/golpe.yaml index f959654..97180df 100644 --- a/golpe.yaml +++ b/golpe.yaml @@ -31,7 +31,9 @@ tables: type: ubytes indices: - created_at: + createdAt: + integer: true + receivedAt: integer: true id: comparator: StringUint64 @@ -54,8 +56,9 @@ tables: indexPrelude: | auto *flat = v.flat_nested(); - created_at = flat->created_at(); - uint64_t indexTime = *created_at; + createdAt = flat->created_at(); + uint64_t indexTime = *createdAt; + receivedAt = v.receivedAt(); id = makeKey_StringUint64(sv(flat->id()), indexTime); pubkey = makeKey_StringUint64(sv(flat->pubkey()), indexTime); @@ -164,11 +167,11 @@ config: default: 20 - name: relay__writePolicy__plugin - desc: "" + desc: "If non-empty, path to an executable script that implements the writePolicy plugin logic" default: "" - name: relay__writePolicy__lookbackSeconds - desc: "" - default: 21600 + desc: "Number of seconds to search backwards for lookback events when starting the writePolicy plugin (0 for no lookback)" + default: 0 - name: relay__compression__enabled desc: "Use permessage-deflate compression if supported by client. Reduces bandwidth, but slight increase in CPU" diff --git a/src/DBScan.h b/src/DBScan.h index b463bd5..63a964d 100644 --- a/src/DBScan.h +++ b/src/DBScan.h @@ -187,7 +187,7 @@ struct DBScan { } else { scanState = CreatedAtScan{}; auto *state = std::get_if(&scanState); - indexDbi = env.dbi_Event__created_at; + indexDbi = env.dbi_Event__createdAt; isComplete = [&, state]{ return state->done; diff --git a/src/PluginWritePolicy.h b/src/PluginWritePolicy.h index a274c4f..76fb8a7 100644 --- a/src/PluginWritePolicy.h +++ b/src/PluginWritePolicy.h @@ -73,14 +73,15 @@ struct PluginWritePolicy { } } - if (!running) setupPlugin(); - - auto json = tao::json::from_string(jsonStr); + if (!running) { + setupPlugin(); + sendLookbackEvents(); + } auto request = tao::json::value({ { "type", "new" }, - { "event", json }, - { "receivedAt", receivedAt }, + { "event", tao::json::from_string(jsonStr) }, + { "receivedAt", receivedAt / 1000000 }, { "sourceType", eventSourceTypeToStr(sourceType) }, { "sourceInfo", sourceType == EventSourceType::IP4 || sourceType == EventSourceType::IP6 ? renderIP(sourceInfo) : sourceInfo }, }); @@ -88,7 +89,7 @@ struct PluginWritePolicy { std::string output = tao::json::to_string(request); output += "\n"; - ::fwrite(output.data(), output.size(), 1, running->w); + if (::fwrite(output.data(), 1, output.size(), running->w) != output.size()) throw herr("error writing to plugin"); tao::json::value response; @@ -102,7 +103,8 @@ struct PluginWritePolicy { LW << "Got unparseable line from write policy plugin: " << buf; continue; } - // FIXME: verify id + + if (response.at("id").get_string() != request.at("event").at("id").get_string()) throw herr("id mismatch"); break; } @@ -175,4 +177,40 @@ struct PluginWritePolicy { running = make_unique(pid, inPipe.saveFd(0), outPipe.saveFd(1), path); } + + void sendLookbackEvents() { + if (cfg().relay__writePolicy__lookbackSeconds == 0) return; + + Decompressor decomp; + auto now = hoytech::curr_time_us(); + + uint64_t start = now - (cfg().relay__writePolicy__lookbackSeconds * 1'000'000); + + auto txn = env.txn_ro(); + + env.generic_foreachFull(txn, env.dbi_Event__receivedAt, lmdb::to_sv(start), lmdb::to_sv(0), [&](auto k, auto v) { + if (lmdb::from_sv(k) > now) return false; + + auto ev = env.lookup_Event(txn, lmdb::from_sv(v)); + if (!ev) throw herr("unable to look up event, corrupt DB?"); + + auto sourceType = (EventSourceType)ev->sourceType(); + std::string_view sourceInfo = ev->sourceInfo(); + + auto request = tao::json::value({ + { "type", "lookback" }, + { "event", tao::json::from_string(getEventJson(txn, decomp, ev->primaryKeyId)) }, + { "receivedAt", ev->receivedAt() / 1000000 }, + { "sourceType", eventSourceTypeToStr(sourceType) }, + { "sourceInfo", sourceType == EventSourceType::IP4 || sourceType == EventSourceType::IP6 ? renderIP(sourceInfo) : sourceInfo }, + }); + + std::string output = tao::json::to_string(request); + output += "\n"; + + if (::fwrite(output.data(), 1, output.size(), running->w) != output.size()) throw herr("error writing to plugin"); + + return true; + }); + } }; diff --git a/src/cmd_export.cpp b/src/cmd_export.cpp index 90e94fd..8dc1b88 100644 --- a/src/cmd_export.cpp +++ b/src/cmd_export.cpp @@ -24,7 +24,7 @@ void cmd_export(const std::vector &subArgs) { auto txn = env.txn_ro(); - env.generic_foreachFull(txn, env.dbi_Event__created_at, lmdb::to_sv(since), lmdb::to_sv(0), [&](auto k, auto v) { + env.generic_foreachFull(txn, env.dbi_Event__createdAt, lmdb::to_sv(since), lmdb::to_sv(0), [&](auto k, auto v) { if (lmdb::from_sv(k) > until) return false; auto view = env.lookup_Event(txn, lmdb::from_sv(v)); diff --git a/strfry.conf b/strfry.conf index d6d440e..0b909e7 100644 --- a/strfry.conf +++ b/strfry.conf @@ -23,7 +23,7 @@ relay { # Set OS-limit on maximum number of open files/sockets (if 0, don't attempt to set) (restart required) nofiles = 1000000 - # HTTP header that contains the client's real IP, before reverse proxying (ie x-real-ip) (case-insensitive) + # HTTP header that contains the client's real IP, before reverse proxying (ie x-real-ip) (MUST be all lower-case) realIpHeader = "" info { @@ -59,9 +59,11 @@ relay { maxSubsPerConnection = 20 writePolicy { + # If non-empty, path to an executable script that implements the writePolicy plugin logic plugin = "" - lookbackSeconds = 21600 + # Number of seconds to search backwards for lookback events when starting the writePolicy plugin (0 for no lookback) + lookbackSeconds = 0 } compression { From 7c0477c2025d28971de9214eba26bca86705a4da Mon Sep 17 00:00:00 2001 From: Doug Hoyte Date: Thu, 9 Feb 2023 02:50:23 -0500 Subject: [PATCH 39/51] re-init plugin when lookback seconds config changes --- src/PluginWritePolicy.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/PluginWritePolicy.h b/src/PluginWritePolicy.h index 76fb8a7..197b4b8 100644 --- a/src/PluginWritePolicy.h +++ b/src/PluginWritePolicy.h @@ -27,11 +27,12 @@ struct PluginWritePolicy { struct RunningPlugin { pid_t pid; std::string currPluginPath; + uint64_t lookbackSeconds; struct timespec lastModTime; FILE *r; FILE *w; - RunningPlugin(pid_t pid, int rfd, int wfd, std::string currPluginPath) : pid(pid), currPluginPath(currPluginPath) { + RunningPlugin(pid_t pid, int rfd, int wfd, std::string currPluginPath, uint64_t lookbackSeconds) : pid(pid), currPluginPath(currPluginPath), lookbackSeconds(lookbackSeconds) { r = fdopen(rfd, "r"); w = fdopen(wfd, "w"); setlinebuf(w); @@ -62,7 +63,7 @@ struct PluginWritePolicy { try { if (running) { - if (pluginPath != running->currPluginPath) { + if (pluginPath != running->currPluginPath || cfg().relay__writePolicy__lookbackSeconds != running->lookbackSeconds) { running.reset(); } else { struct stat statbuf; @@ -175,16 +176,16 @@ struct PluginWritePolicy { auto ret = posix_spawn(&pid, path.c_str(), &file_actions, nullptr, argv, nullptr); if (ret) throw herr("posix_spawn failed when to invoke '", path, "': ", strerror(errno)); - running = make_unique(pid, inPipe.saveFd(0), outPipe.saveFd(1), path); + running = make_unique(pid, inPipe.saveFd(0), outPipe.saveFd(1), path, cfg().relay__writePolicy__lookbackSeconds); } void sendLookbackEvents() { - if (cfg().relay__writePolicy__lookbackSeconds == 0) return; + if (running->lookbackSeconds == 0) return; Decompressor decomp; auto now = hoytech::curr_time_us(); - uint64_t start = now - (cfg().relay__writePolicy__lookbackSeconds * 1'000'000); + uint64_t start = now - (running->lookbackSeconds * 1'000'000); auto txn = env.txn_ro(); From d89034bd9fe78d6ede6ca1f6f92fbe4bc125d1df Mon Sep 17 00:00:00 2001 From: Doug Hoyte Date: Thu, 9 Feb 2023 03:25:45 -0500 Subject: [PATCH 40/51] plugin docs --- docs/plugins.md | 86 +++++++++++++++++++++++++++++++++++++++++ src/PluginWritePolicy.h | 2 +- 2 files changed, 87 insertions(+), 1 deletion(-) create mode 100644 docs/plugins.md diff --git a/docs/plugins.md b/docs/plugins.md new file mode 100644 index 0000000..74f4d33 --- /dev/null +++ b/docs/plugins.md @@ -0,0 +1,86 @@ +# Write policy plugins + +In order to reduce complexity, strfry's design attempts to keep policy logic out of its core relay functionality. Instead, this logic can be implemented by operators by installing a write policy plugin. Among other things, plugins can be used for the following: + +* White/black-lists (particular pubkeys can/can't post events) +* Rate-limits +* Spam filtering + +A plugin can be implemented in any programming language that supports reading lines from stdin, decoding JSON, and printing JSON to stdout. If a plugin is installed, strfry will send the event (along with some other information like IP address) to the plugin over stdin. The plugin should then decide what to do with it and print out a JSON object containing this decision. + +Whenever the script's modification-time changes, or the plugin settings in `strfry.conf` change, the plugin will be reloaded upon the next write attempt. + +If configured, When a plugin is loaded some number of recently stored events will be sent to it as a "lookback". This is useful for populating the initial rate-limiting state. Plugins should print nothing in response to a lookback message. + + +## Input messages + +Input messages contain the following keys: + +* `type`: Either `new` or `lookback` +* `event`: The event posted by the client, with all the required fields such as `id`, `pubkey`, etc +* `receivedAt`: Unix timestamp of when this event was received by the relay +* `sourceType`: Where this event came from. Typically will be `IP4` or `IP6`, but in lookback can also be `Import`, `Stream`, or `Sync`. +* `sourceInfo`: Specifics of the event's source. Either an IP address or a relay URL (for stream/sync) + + +## Output messages + +In response to `new` events, the plugin should print a JSONL message (minified JSON followed by a newline). It should contain the following keys: + +* `id`: The event ID taken from the `event.id` field of the input message +* `action`: Either `accept`, `reject`, or `rejectShadow` +* `msg`: The NIP-20 response message to be sent to the client. Only used for `reject` + + +## Example: Whitelist + +Here is a simple example `whitelist.js` plugin that will reject all events except for those in a whitelist: + + #!/usr/bin/env node + + const whiteList = { + '003ba9b2c5bd8afeed41a4ce362a8b7fc3ab59c25b6a1359cae9093f296dac01': true, + }; + + const rl = require('readline').createInterface({ + input: process.stdin, + output: process.stdout, + terminal: false + }); + + rl.on('line', (line) => { + let req = JSON.parse(line); + + if (req.type === 'lookback') { + return; // do nothing + } + + if (req.type !== 'new') { + console.error("unexpected request type"); // will appear in strfry logs + return; + } + + let res = { id: req.event.id }; // must echo the event's id + + if (whiteList[req.event.pubkey]) { + res.action = 'accept'; + } else { + res.action = 'reject'; + res.msg = 'blocked: not on white-list'; + } + + console.log(JSON.stringify(res)); + }); + +To install: + +* Make the script executable: `chmod a+x whitelist.js` +* In `strfry.conf`, configure `relay.writePolicy.plugin` to `./whitelist.js` + + +## Notes + +* If applicable, you should ensure stdout is *line buffered* (for example, in perl use `$|++`). +* If events are being rejected with `error: internal error`, then check the strfry logs. The plugin is misconfigured or failing. +* When returning an action of `accept`, it doesn't necessarily guarantee that the event will be accepted. The regular strfry checks are still subsequently applied, such as expiration, deletion, etc. diff --git a/src/PluginWritePolicy.h b/src/PluginWritePolicy.h index 197b4b8..9b80f1e 100644 --- a/src/PluginWritePolicy.h +++ b/src/PluginWritePolicy.h @@ -174,7 +174,7 @@ struct PluginWritePolicy { ) throw herr("posix_span_file_actions failed: ", strerror(errno)); auto ret = posix_spawn(&pid, path.c_str(), &file_actions, nullptr, argv, nullptr); - if (ret) throw herr("posix_spawn failed when to invoke '", path, "': ", strerror(errno)); + if (ret) throw herr("posix_spawn failed to invoke '", path, "': ", strerror(errno)); running = make_unique(pid, inPipe.saveFd(0), outPipe.saveFd(1), path, cfg().relay__writePolicy__lookbackSeconds); } From cdb25aed4584937445ac73947e1ea6c1ee102ae2 Mon Sep 17 00:00:00 2001 From: Doug Hoyte Date: Thu, 9 Feb 2023 03:27:49 -0500 Subject: [PATCH 41/51] todo --- TODO | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/TODO b/TODO index 96d623c..5789555 100644 --- a/TODO +++ b/TODO @@ -1,11 +1,10 @@ 0.1 release - write policy plugin - when disk is full it should log warning but not crash ensure DB upgrade flow works + when disk is full it should log warning but not crash disable sync - ? why isn't the LMDB mapping CLOEXEC 0.2 release + ? why isn't the LMDB mapping CLOEXEC fix sync * logging of bytes up/down * up/both directions @@ -21,6 +20,7 @@ features NIP-42 AUTH slow-reader detection and back-pressure ? relay block-list events + ? if a client disconnects, delete all its pending write messages rate limits ! event writes per second per ip From 3d4b8853f114b87d3587d7bf3cdfd0e743095dfc Mon Sep 17 00:00:00 2001 From: Doug Hoyte Date: Thu, 9 Feb 2023 04:31:31 -0500 Subject: [PATCH 42/51] ensure you can export a version 0 DB using a new binary --- TODO | 1 - golpe.yaml | 6 +++--- src/DBScan.h | 2 +- src/cmd_export.cpp | 13 ++++++++++++- src/cmd_import.cpp | 2 +- src/cmd_info.cpp | 14 +------------- src/events.cpp | 9 ++++----- src/events.h | 2 +- src/global.h | 1 + src/misc.cpp | 18 ++++++++++++++++++ src/onAppStartup.cpp | 4 ++-- 11 files changed, 44 insertions(+), 28 deletions(-) diff --git a/TODO b/TODO index 5789555..cb9c150 100644 --- a/TODO +++ b/TODO @@ -1,5 +1,4 @@ 0.1 release - ensure DB upgrade flow works when disk is full it should log warning but not crash disable sync diff --git a/golpe.yaml b/golpe.yaml index 97180df..592c838 100644 --- a/golpe.yaml +++ b/golpe.yaml @@ -31,7 +31,7 @@ tables: type: ubytes indices: - createdAt: + created_at: integer: true receivedAt: integer: true @@ -56,8 +56,8 @@ tables: indexPrelude: | auto *flat = v.flat_nested(); - createdAt = flat->created_at(); - uint64_t indexTime = *createdAt; + created_at = flat->created_at(); + uint64_t indexTime = *created_at; receivedAt = v.receivedAt(); id = makeKey_StringUint64(sv(flat->id()), indexTime); diff --git a/src/DBScan.h b/src/DBScan.h index 63a964d..b463bd5 100644 --- a/src/DBScan.h +++ b/src/DBScan.h @@ -187,7 +187,7 @@ struct DBScan { } else { scanState = CreatedAtScan{}; auto *state = std::get_if(&scanState); - indexDbi = env.dbi_Event__createdAt; + indexDbi = env.dbi_Event__created_at; isComplete = [&, state]{ return state->done; diff --git a/src/cmd_export.cpp b/src/cmd_export.cpp index 8dc1b88..3fb5e14 100644 --- a/src/cmd_export.cpp +++ b/src/cmd_export.cpp @@ -24,12 +24,23 @@ void cmd_export(const std::vector &subArgs) { auto txn = env.txn_ro(); - env.generic_foreachFull(txn, env.dbi_Event__createdAt, lmdb::to_sv(since), lmdb::to_sv(0), [&](auto k, auto v) { + auto dbVersion = getDBVersion(txn); + auto qdb = getQdbInstance(txn); + + env.generic_foreachFull(txn, env.dbi_Event__created_at, lmdb::to_sv(since), lmdb::to_sv(0), [&](auto k, auto v) { if (lmdb::from_sv(k) > until) return false; auto view = env.lookup_Event(txn, lmdb::from_sv(v)); if (!view) throw herr("missing event from index, corrupt DB?"); + if (dbVersion == 0) { + std::string_view raw; + bool found = qdb.dbi_nodesLeaf.get(txn, lmdb::to_sv(view->primaryKeyId), raw); + if (!found) throw herr("couldn't find leaf node in quadrable, corrupted DB?"); + std::cout << raw.substr(8 + 32 + 32) << "\n"; + return true; + } + if (!args["--include-ephemeral"].asBool()) { if (isEphemeralEvent(view->flat_nested()->kind())) return true; } diff --git a/src/cmd_import.cpp b/src/cmd_import.cpp index b5de31d..eedd399 100644 --- a/src/cmd_import.cpp +++ b/src/cmd_import.cpp @@ -39,7 +39,7 @@ void cmd_import(const std::vector &subArgs) { }; auto flushChanges = [&]{ - writeEvents(txn, qdb, newEvents); + writeEvents(txn, qdb, newEvents, 0); uint64_t numCommits = 0; diff --git a/src/cmd_info.cpp b/src/cmd_info.cpp index dfcdc71..e5d1dda 100644 --- a/src/cmd_info.cpp +++ b/src/cmd_info.cpp @@ -18,18 +18,6 @@ void cmd_info(const std::vector &subArgs) { auto txn = env.txn_ro(); - uint64_t dbVersion; - - { - auto s = env.lookup_Meta(txn, 1); - - if (s) { - dbVersion = s->dbVersion(); - } else { - dbVersion = 0; - } - } - - std::cout << "DB version: " << dbVersion << "\n"; + std::cout << "DB version: " << getDBVersion(txn) << "\n"; std::cout << "merkle root: " << to_hex(qdb.root(txn)) << "\n"; } diff --git a/src/events.cpp b/src/events.cpp index af0003f..c5725be 100644 --- a/src/events.cpp +++ b/src/events.cpp @@ -230,7 +230,7 @@ void deleteEvent(lmdb::txn &txn, quadrable::Quadrable::UpdateSet &changes, defau -void writeEvents(lmdb::txn &txn, quadrable::Quadrable &qdb, std::vector &evs) { +void writeEvents(lmdb::txn &txn, quadrable::Quadrable &qdb, std::vector &evs, uint64_t logLevel) { std::sort(evs.begin(), evs.end(), [](auto &a, auto &b) { return a.quadKey < b.quadKey; }); auto changes = qdb.change(); @@ -260,7 +260,7 @@ void writeEvents(lmdb::txn &txn, quadrable::Quadrable &qdb, std::vectorcreated_at()) { auto otherEv = env.lookup_Event(txn, lmdb::from_sv(v)); if (!otherEv) throw herr("missing event from index, corrupt DB?"); - LI << "Deleting event (replaceable). id=" << to_hex(sv(otherEv->flat_nested()->pubkey())); + if (logLevel >= 1) LI << "Deleting event (replaceable). id=" << to_hex(sv(otherEv->flat_nested()->id())); deleteEvent(txn, changes, *otherEv); } else { ev.status = EventWriteStatus::Replaced; @@ -281,7 +281,6 @@ void writeEvents(lmdb::txn &txn, quadrable::Quadrable &qdb, std::vectorpubkey())) + replace; auto searchKey = makeKey_StringUint64(searchStr, flat->kind()); - LI << to_hex(searchKey); env.generic_foreachFull(txn, env.dbi_Event__replace, searchKey, lmdb::to_sv(MAX_U64), [&](auto k, auto v) { ParsedKey_StringUint64 parsedKey(k); @@ -290,7 +289,7 @@ void writeEvents(lmdb::txn &txn, quadrable::Quadrable &qdb, std::vectorflat_nested()->created_at() < flat->created_at()) { - LI << "Deleting event (d-tag). id=" << to_hex(sv(otherEv->flat_nested()->pubkey())); + if (logLevel >= 1) LI << "Deleting event (d-tag). id=" << to_hex(sv(otherEv->flat_nested()->id())); deleteEvent(txn, changes, *otherEv); } else { ev.status = EventWriteStatus::Replaced; @@ -308,7 +307,7 @@ void writeEvents(lmdb::txn &txn, quadrable::Quadrable &qdb, std::vectorkey() == 'e') { auto otherEv = lookupEventById(txn, sv(tagPair->val())); if (otherEv && sv(otherEv->flat_nested()->pubkey()) == sv(flat->pubkey())) { - LI << "Deleting event (kind 5). id=" << to_hex(sv(tagPair->val())); + if (logLevel >= 1) LI << "Deleting event (kind 5). id=" << to_hex(sv(tagPair->val())); deleteEvent(txn, changes, *otherEv); } } diff --git a/src/events.h b/src/events.h index a380abb..a59ac07 100644 --- a/src/events.h +++ b/src/events.h @@ -107,5 +107,5 @@ struct EventToWrite { }; -void writeEvents(lmdb::txn &txn, quadrable::Quadrable &qdb, std::vector &evs); +void writeEvents(lmdb::txn &txn, quadrable::Quadrable &qdb, std::vector &evs, uint64_t logLevel = 1); void deleteEvent(lmdb::txn &txn, quadrable::Quadrable::UpdateSet &changes, defaultDb::environment::View_Event &ev); diff --git a/src/global.h b/src/global.h index c6b478f..8291ca4 100644 --- a/src/global.h +++ b/src/global.h @@ -20,3 +20,4 @@ std::string renderSize(uint64_t si); std::string renderPercent(double p); uint64_t parseUint64(const std::string &s); std::string parseIP(const std::string &ip); +uint64_t getDBVersion(lmdb::txn &txn); diff --git a/src/misc.cpp b/src/misc.cpp index 0f23f20..d1f5589 100644 --- a/src/misc.cpp +++ b/src/misc.cpp @@ -85,3 +85,21 @@ uint64_t parseUint64(const std::string &s) { return std::stoull(s); } + + + +uint64_t getDBVersion(lmdb::txn &txn) { + uint64_t dbVersion; + + { + auto s = env.lookup_Meta(txn, 1); + + if (s) { + dbVersion = s->dbVersion(); + } else { + dbVersion = 0; + } + } + + return dbVersion; +} diff --git a/src/onAppStartup.cpp b/src/onAppStartup.cpp index c63db6d..4d43334 100644 --- a/src/onAppStartup.cpp +++ b/src/onAppStartup.cpp @@ -32,7 +32,7 @@ static void dbCheck(lmdb::txn &txn, const std::string &cmd) { return false; }); - if (cmd == "export") return; + if (cmd == "export" || cmd == "info") return; if (eventFound) dbTooOld(0); } @@ -43,7 +43,7 @@ static void dbCheck(lmdb::txn &txn, const std::string &cmd) { if (s->endianness() != 1) throw herr("DB was created on a machine with different endianness"); if (s->dbVersion() < CURR_DB_VERSION) { - if (cmd == "export") return; + if (cmd == "export" || cmd == "info") return; dbTooOld(s->dbVersion()); } From 4a7112a3be601067163a895ef9d7379eace0372b Mon Sep 17 00:00:00 2001 From: Doug Hoyte Date: Thu, 9 Feb 2023 04:35:56 -0500 Subject: [PATCH 43/51] bump --- golpe | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/golpe b/golpe index 620e823..df1f446 160000 --- a/golpe +++ b/golpe @@ -1 +1 @@ -Subproject commit 620e8233da82fb853d9a63797a7ca0ae95bddc8e +Subproject commit df1f44652fedc41e3bac10d1f30a28c4e6e6b575 From c409c53bc52000675f95dbdd105c4a4cd987c3ad Mon Sep 17 00:00:00 2001 From: Doug Hoyte Date: Thu, 9 Feb 2023 04:39:45 -0500 Subject: [PATCH 44/51] logo --- README.md | 2 + docs/strfry.svg | 135 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 137 insertions(+) create mode 100644 docs/strfry.svg diff --git a/README.md b/README.md index 17b1b8b..b3400a2 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,7 @@ # strfry - a nostr relay +![strfry logo](docs/strfry.svg) + strfry is a relay for the [nostr protocol](https://github.com/nostr-protocol/nostr) * Supports most applicable NIPs: 1, 9, 11, 12, 15, 16, 20, 22 diff --git a/docs/strfry.svg b/docs/strfry.svg new file mode 100644 index 0000000..23897a7 --- /dev/null +++ b/docs/strfry.svg @@ -0,0 +1,135 @@ + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + From 213f7cb092e3d417bd10ed2d11dfec661112d17b Mon Sep 17 00:00:00 2001 From: Doug Hoyte Date: Thu, 9 Feb 2023 09:04:58 -0500 Subject: [PATCH 45/51] bump golpe --- golpe | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/golpe b/golpe index df1f446..a660dfa 160000 --- a/golpe +++ b/golpe @@ -1 +1 @@ -Subproject commit df1f44652fedc41e3bac10d1f30a28c4e6e6b575 +Subproject commit a660dfaf285f0ef57b48c538c97fd7d291153d8c From a01bd4eadce4fca85377c7abb5deccad0e769f06 Mon Sep 17 00:00:00 2001 From: Doug Hoyte Date: Thu, 9 Feb 2023 09:05:37 -0500 Subject: [PATCH 46/51] document libzstd dependency --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index b3400a2..985ad98 100644 --- a/README.md +++ b/README.md @@ -30,7 +30,7 @@ Either the full set of messages in the DB can be synced, or the results of one o A C++20 compiler is required, along with a few other common dependencies. On Debian/Ubuntu use these commands: - sudo apt install -y git build-essential libyaml-perl libtemplate-perl libssl-dev zlib1g-dev liblmdb-dev libflatbuffers-dev libsecp256k1-dev libb2-dev + sudo apt install -y git build-essential libyaml-perl libtemplate-perl libssl-dev zlib1g-dev liblmdb-dev libflatbuffers-dev libsecp256k1-dev libb2-dev libzstd-dev git submodule update --init make setup-golpe make -j4 From 204c808b41d2be5ce46e957f132c2df0e2b3683d Mon Sep 17 00:00:00 2001 From: Doug Hoyte Date: Thu, 9 Feb 2023 12:40:40 -0500 Subject: [PATCH 47/51] Monitors can't be stored in flat maps because we manage pointers into them in MonitorSets --- src/ActiveMonitors.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/ActiveMonitors.h b/src/ActiveMonitors.h index 129b917..700fb11 100644 --- a/src/ActiveMonitors.h +++ b/src/ActiveMonitors.h @@ -1,5 +1,7 @@ #pragma once +#include + #include "golpe.h" #include "Subscription.h" @@ -13,9 +15,10 @@ struct ActiveMonitors : NonCopyable { Subscription sub; Monitor(Subscription &sub_) : sub(std::move(sub_)) {} + Monitor(const Monitor&) = delete; // pointers to filters inside sub must be stable because they are stored in MonitorSets }; - using ConnMonitor = flat_hash_map; + using ConnMonitor = std::unordered_map; flat_hash_map conns; // connId -> subId -> Monitor struct MonitorItem { From 73e2719b45f18ef8330652f4c33e2261cdd67fe5 Mon Sep 17 00:00:00 2001 From: Doug Hoyte Date: Fri, 10 Feb 2023 04:28:13 -0500 Subject: [PATCH 48/51] typo --- docs/plugins.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/plugins.md b/docs/plugins.md index 74f4d33..cebd829 100644 --- a/docs/plugins.md +++ b/docs/plugins.md @@ -29,7 +29,7 @@ Input messages contain the following keys: In response to `new` events, the plugin should print a JSONL message (minified JSON followed by a newline). It should contain the following keys: * `id`: The event ID taken from the `event.id` field of the input message -* `action`: Either `accept`, `reject`, or `rejectShadow` +* `action`: Either `accept`, `reject`, or `shadowReject` * `msg`: The NIP-20 response message to be sent to the client. Only used for `reject` From 6fe1df8e378ae813c7e1c4091f9f16d1de102e3e Mon Sep 17 00:00:00 2001 From: Doug Hoyte Date: Fri, 10 Feb 2023 20:44:46 -0500 Subject: [PATCH 49/51] libzstd needed --- Dockerfile | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Dockerfile b/Dockerfile index 436e24d..0d181f8 100644 --- a/Dockerfile +++ b/Dockerfile @@ -4,7 +4,8 @@ WORKDIR /build RUN apt update && apt install -y --no-install-recommends \ git g++ make pkg-config libtool ca-certificates \ libyaml-perl libtemplate-perl libssl-dev zlib1g-dev \ - liblmdb-dev libflatbuffers-dev libsecp256k1-dev libb2-dev + liblmdb-dev libflatbuffers-dev libsecp256k1-dev libb2-dev \ + libzstd-dev COPY . . RUN git submodule update --init @@ -15,9 +16,9 @@ FROM ubuntu:jammy as runner WORKDIR /app RUN apt update && apt install -y --no-install-recommends \ - liblmdb0 libflatbuffers1 libsecp256k1-0 libb2-1 \ + liblmdb0 libflatbuffers1 libsecp256k1-0 libb2-1 libzstd1 \ && rm -rf /var/lib/apt/lists/* COPY --from=build /build/strfry strfry ENTRYPOINT ["/app/strfry"] -CMD ["relay"] \ No newline at end of file +CMD ["relay"] From decc3aea26d26557e9a5a19d8c9d946d7450faff Mon Sep 17 00:00:00 2001 From: Doug Hoyte Date: Sat, 11 Feb 2023 07:36:09 -0500 Subject: [PATCH 50/51] delete command, split up compact and gc into separate commands --- src/cmd_compact.cpp | 25 +++++------- src/cmd_delete.cpp | 93 +++++++++++++++++++++++++++++++++++++++++++++ src/cmd_gc.cpp | 22 +++++++++++ src/cmd_scan.cpp | 3 +- 4 files changed, 125 insertions(+), 18 deletions(-) create mode 100644 src/cmd_delete.cpp create mode 100644 src/cmd_gc.cpp diff --git a/src/cmd_compact.cpp b/src/cmd_compact.cpp index 7d009d9..ebcb6bf 100644 --- a/src/cmd_compact.cpp +++ b/src/cmd_compact.cpp @@ -10,30 +10,23 @@ static const char USAGE[] = R"( Usage: - compact export - compact quad-gc + compact )"; void cmd_compact(const std::vector &subArgs) { std::map args = docopt::docopt(USAGE, subArgs, true, ""); - if (args["export"].asBool()) { - std::string outputFile = args[""].asString(); + std::string outputFile = args[""].asString(); - if (outputFile == "-") { - env.copy_fd(1); - } else { - if (access(outputFile.c_str(), F_OK) == 0) throw herr("output file '", outputFile, "' exists, not overwriting"); + if (outputFile == "-") { + env.copy_fd(1); + } else { + if (access(outputFile.c_str(), F_OK) == 0) throw herr("output file '", outputFile, "' exists, not overwriting"); - auto *f = ::fopen(outputFile.c_str(), "w"); - if (!f) throw herr("opening output file '", outputFile, "' failed: ", strerror(errno)); + auto *f = ::fopen(outputFile.c_str(), "w"); + if (!f) throw herr("opening output file '", outputFile, "' failed: ", strerror(errno)); - env.copy_fd(::fileno(f)); - } - } else if (args["quad-gc"].asBool()) { - auto qdb = getQdbInstance(); - - quadrableGarbageCollect(qdb, 2); + env.copy_fd(::fileno(f)); } } diff --git a/src/cmd_delete.cpp b/src/cmd_delete.cpp new file mode 100644 index 0000000..ffb3a29 --- /dev/null +++ b/src/cmd_delete.cpp @@ -0,0 +1,93 @@ +#include + +#include +#include "golpe.h" + +#include "DBScan.h" +#include "events.h" +#include "gc.h" + + +static const char USAGE[] = +R"( + Usage: + delete [--age=] [--filter=] [--dry-run] [--no-gc] +)"; + + +void cmd_delete(const std::vector &subArgs) { + std::map args = docopt::docopt(USAGE, subArgs, true, ""); + + uint64_t age = MAX_U64; + if (args["--age"]) age = args["--age"].asLong(); + + std::string filterStr; + if (args["--filter"]) filterStr = args["--filter"].asString(); + + bool dryRun = args["--dry-run"].asBool(); + bool noGc = args["--no-gc"].asBool(); + + + + if (filterStr.size() == 0 && age == MAX_U64) throw herr("must specify --age and/or --filter"); + if (filterStr.size() == 0) filterStr = "{}"; + + + auto filter = tao::json::from_string(filterStr); + auto now = hoytech::curr_time_s(); + + if (age != MAX_U64) { + if (age > now) age = now; + if (filter.optional("until")) throw herr("--age is not compatible with filter containing 'until'"); + + filter["until"] = now - age; + } + + + auto filterGroup = NostrFilterGroup::unwrapped(filter, MAX_U64); + Subscription sub(1, "junkSub", filterGroup); + DBScanQuery query(sub); + + + btree_set levIds; + + { + auto txn = env.txn_ro(); + + while (1) { + bool complete = query.process(txn, MAX_U64, false, [&](const auto &sub, uint64_t levId){ + levIds.insert(levId); + }); + + if (complete) break; + } + } + + if (dryRun) { + LI << "Would delete " << levIds.size() << " events"; + return; + } + + + auto qdb = getQdbInstance(); + + LI << "Deleting " << levIds.size() << " events"; + + { + auto txn = env.txn_rw(); + + auto changes = qdb.change(); + + for (auto levId : levIds) { + auto view = env.lookup_Event(txn, levId); + if (!view) continue; // Deleted in between transactions + deleteEvent(txn, changes, *view); + } + + changes.apply(txn); + + txn.commit(); + } + + if (!noGc) quadrableGarbageCollect(qdb, 2); +} diff --git a/src/cmd_gc.cpp b/src/cmd_gc.cpp new file mode 100644 index 0000000..ca40ed1 --- /dev/null +++ b/src/cmd_gc.cpp @@ -0,0 +1,22 @@ +#include +#include + +#include +#include "golpe.h" + +#include "gc.h" + + +static const char USAGE[] = +R"( + Usage: + gc +)"; + + +void cmd_gc(const std::vector &subArgs) { + std::map args = docopt::docopt(USAGE, subArgs, true, ""); + + auto qdb = getQdbInstance(); + quadrableGarbageCollect(qdb, 2); +} diff --git a/src/cmd_scan.cpp b/src/cmd_scan.cpp index b533ac4..73786f3 100644 --- a/src/cmd_scan.cpp +++ b/src/cmd_scan.cpp @@ -20,8 +20,7 @@ void cmd_scan(const std::vector &subArgs) { uint64_t pause = 0; if (args["--pause"]) pause = args["--pause"].asLong(); - bool metrics = false; - if (args["--metrics"]) metrics = true; + bool metrics = args["--metrics"].asBool(); std::string filterStr = args[""].asString(); From 30b8c387a2a1ad58e04f7dd8c00e6c9166c3c5ae Mon Sep 17 00:00:00 2001 From: Doug Hoyte Date: Wed, 15 Feb 2023 17:00:11 -0500 Subject: [PATCH 51/51] don't throw exceptions if modifications are made during dict operations --- src/cmd_dict.cpp | 85 ++++++++++++++++++++++++++---------------------- 1 file changed, 46 insertions(+), 39 deletions(-) diff --git a/src/cmd_dict.cpp b/src/cmd_dict.cpp index d5120e9..088d147 100644 --- a/src/cmd_dict.cpp +++ b/src/cmd_dict.cpp @@ -44,32 +44,29 @@ void cmd_dict(const std::vector &subArgs) { Decompressor decomp; std::vector levIds; - { - auto txn = env.txn_ro(); - auto filterGroup = NostrFilterGroup::unwrapped(tao::json::from_string(filterStr), MAX_U64); - Subscription sub(1, "junkSub", filterGroup); - DBScanQuery query(sub); + auto txn = env.txn_ro(); - while (1) { - bool complete = query.process(txn, MAX_U64, false, [&](const auto &sub, uint64_t levId){ - levIds.push_back(levId); - }); + auto filterGroup = NostrFilterGroup::unwrapped(tao::json::from_string(filterStr), MAX_U64); + Subscription sub(1, "junkSub", filterGroup); + DBScanQuery query(sub); - if (complete) break; - } + while (1) { + bool complete = query.process(txn, MAX_U64, false, [&](const auto &sub, uint64_t levId){ + levIds.push_back(levId); + }); - LI << "Filter matched " << levIds.size() << " records"; + if (complete) break; } + LI << "Filter matched " << levIds.size() << " records"; + if (args["stats"].asBool()) { uint64_t totalSize = 0; uint64_t totalCompressedSize = 0; uint64_t numCompressed = 0; - auto txn = env.txn_ro(); - btree_map dicts; env.foreach_CompressionDictionary(txn, [&](auto &view){ @@ -112,22 +109,18 @@ void cmd_dict(const std::vector &subArgs) { std::string trainingBuf; std::vector trainingSizes; - { - auto txn = env.txn_ro(); + if (levIds.size() > limit) { + LI << "Randomly selecting " << limit << " records"; + std::random_device rd; + std::mt19937 g(rd()); + std::shuffle(levIds.begin(), levIds.end(), g); + levIds.resize(limit); + } - if (levIds.size() > limit) { - LI << "Randomly selecting " << limit << " records"; - std::random_device rd; - std::mt19937 g(rd()); - std::shuffle(levIds.begin(), levIds.end(), g); - levIds.resize(limit); - } - - for (auto levId : levIds) { - std::string json = std::string(getEventJson(txn, decomp, levId)); - trainingBuf += json; - trainingSizes.emplace_back(json.size()); - } + for (auto levId : levIds) { + std::string json = std::string(getEventJson(txn, decomp, levId)); + trainingBuf += json; + trainingSizes.emplace_back(json.size()); } std::string dict(dictSize, '\0'); @@ -137,19 +130,19 @@ void cmd_dict(const std::vector &subArgs) { auto ret = ZDICT_trainFromBuffer(dict.data(), dict.size(), trainingBuf.data(), trainingSizes.data(), trainingSizes.size()); if (ZDICT_isError(ret)) throw herr("zstd training failed: ", ZSTD_getErrorName(ret)); - { - auto txn = env.txn_rw(); + txn.abort(); + txn = env.txn_rw(); - uint64_t newDictId = env.insert_CompressionDictionary(txn, dict); + uint64_t newDictId = env.insert_CompressionDictionary(txn, dict); - std::cout << "Saved new dictionary, dictId = " << newDictId << std::endl; + std::cout << "Saved new dictionary, dictId = " << newDictId << std::endl; - txn.commit(); - } + txn.commit(); } else if (args["compress"].asBool()) { if (dictId == 0) throw herr("specify --dictId or --decompress"); - auto txn = env.txn_rw(); + txn.abort(); + txn = env.txn_rw(); auto view = env.lookup_CompressionDictionary(txn, dictId); if (!view) throw herr("couldn't find dictId ", dictId); @@ -166,7 +159,14 @@ void cmd_dict(const std::vector &subArgs) { std::string compressedData(500'000, '\0'); for (auto levId : levIds) { - auto orig = getEventJson(txn, decomp, levId); + std::string_view orig; + + try { + orig = getEventJson(txn, decomp, levId); + } catch (std::exception &e) { + continue; + } + auto ret = ZSTD_compress_usingCDict(cctx, compressedData.data(), compressedData.size(), orig.data(), orig.size(), cdict); if (ZDICT_isError(ret)) throw herr("zstd compression failed: ", ZSTD_getErrorName(ret)); @@ -203,13 +203,20 @@ void cmd_dict(const std::vector &subArgs) { LI << "Original event sizes: " << origSizes; LI << "New event sizes: " << compressedSizes; } else if (args["decompress"].asBool()) { - auto txn = env.txn_rw(); + txn.abort(); + txn = env.txn_rw(); uint64_t pendingFlush = 0; uint64_t processed = 0; for (auto levId : levIds) { - auto orig = getEventJson(txn, decomp, levId); + std::string_view orig; + + try { + orig = getEventJson(txn, decomp, levId); + } catch (std::exception &e) { + continue; + } std::string newVal;