sync optimisations, DBQuery no longer loads eventPayload

- It is now up to the caller to do so
- QueryScheduler now can optionally not bother to ensure that the events are fresh
This commit is contained in:
Doug Hoyte
2023-07-29 01:14:38 -04:00
parent 94a60c3ad2
commit 206b14a473
8 changed files with 59 additions and 35 deletions

View File

@ -234,13 +234,11 @@ struct DBScan : NonCopyable {
refillScanDepth = 10 * initialScanDepth;
}
bool scan(lmdb::txn &txn, std::function<bool(uint64_t, std::string_view)> handleEvent, std::function<bool(uint64_t)> doPause) {
bool scan(lmdb::txn &txn, std::function<bool(uint64_t)> handleEvent, std::function<bool(uint64_t)> doPause) {
auto cmp = [](auto &a, auto &b){
return a.created() == b.created() ? a.levId() > b.levId() : a.created() > b.created();
};
auto eventPayloadCursor = lmdb::cursor::open(txn, env.dbi_EventPayload);
while (1) {
approxWork++;
if (doPause(approxWork)) return false;
@ -262,23 +260,16 @@ struct DBScan : NonCopyable {
eventQueue.pop_front();
bool doSend = false;
uint64_t levId = ev.levId();
std::string_view eventPayload;
auto loadEventPayload = [&]{
std::string_view key = lmdb::to_sv<uint64_t>(levId);
return eventPayloadCursor.get(key, eventPayload, MDB_SET_KEY); // If not found, was deleted while scan was paused
};
if (indexOnly) {
if (f.doesMatchTimes(ev.created())) doSend = true;
if (!loadEventPayload()) doSend = false;
} else if (loadEventPayload()) {
} else {
approxWork += 10;
if (f.doesMatch(lookupEventByLevId(txn, levId).flat_nested())) doSend = true;
}
if (doSend) {
if (handleEvent(levId, eventPayload)) return true;
if (handleEvent(levId)) return true;
}
cursors[ev.scanIndex()].outstanding--;
@ -315,7 +306,7 @@ struct DBQuery : NonCopyable {
DBQuery(const tao::json::value &filter, uint64_t maxLimit = MAX_U64) : sub(Subscription(1, ".", NostrFilterGroup::unwrapped(filter, maxLimit))) {}
// If scan is complete, returns true
bool process(lmdb::txn &txn, std::function<void(const Subscription &, uint64_t, std::string_view)> cb, uint64_t timeBudgetMicroseconds = MAX_U64, bool logMetrics = false) {
bool process(lmdb::txn &txn, std::function<void(const Subscription &, uint64_t)> cb, uint64_t timeBudgetMicroseconds = MAX_U64, bool logMetrics = false) {
while (filterGroupIndex < sub.filterGroup.size()) {
const auto &f = sub.filterGroup.filters[filterGroupIndex];
@ -323,7 +314,7 @@ struct DBQuery : NonCopyable {
uint64_t startTime = hoytech::curr_time_us();
bool complete = scanner->scan(txn, [&](uint64_t levId, std::string_view eventPayload){
bool complete = scanner->scan(txn, [&](uint64_t levId){
if (f.limit == 0) return true;
// If this event came in after our query began, don't send it. It will be sent after the EOSE.
@ -331,7 +322,7 @@ struct DBQuery : NonCopyable {
if (sentEventsFull.find(levId) == sentEventsFull.end()) {
sentEventsFull.insert(levId);
cb(sub, levId, eventPayload);
cb(sub, levId);
}
sentEventsCurr.insert(levId);
@ -386,10 +377,10 @@ struct DBQuery : NonCopyable {
};
inline void foreachByFilter(lmdb::txn &txn, const tao::json::value &filter, std::function<void(uint64_t, std::string_view)> cb) {
inline void foreachByFilter(lmdb::txn &txn, const tao::json::value &filter, std::function<void(uint64_t)> cb) {
DBQuery query(filter);
query.process(txn, [&](const auto &, uint64_t levId, std::string_view eventPayload){
cb(levId, eventPayload);
query.process(txn, [&](const auto &, uint64_t levId){
cb(levId);
});
}

View File

@ -6,7 +6,11 @@
struct QueryScheduler : NonCopyable {
std::function<void(lmdb::txn &txn, const Subscription &sub, uint64_t levId, std::string_view eventPayload)> onEvent;
std::function<void(lmdb::txn &txn, const Subscription &sub, const std::vector<uint64_t> &levIds)> onEventBatch;
std::function<void(Subscription &sub)> onComplete;
std::function<void(lmdb::txn &txn, Subscription &sub)> onComplete;
// If false, then levIds returned to above callbacks can be stale (because they were deleted)
// If false, then onEvent's eventPayload will always be ""
bool ensureExists = true;
using ConnQueries = flat_hash_map<SubId, DBQuery*>;
flat_hash_map<uint64_t, ConnQueries> conns; // connId -> subId -> DBQuery*
@ -74,7 +78,16 @@ struct QueryScheduler : NonCopyable {
return;
}
bool complete = q->process(txn, [&](const auto &sub, uint64_t levId, std::string_view eventPayload){
auto eventPayloadCursor = lmdb::cursor::open(txn, env.dbi_EventPayload);
bool complete = q->process(txn, [&](const auto &sub, uint64_t levId){
std::string_view eventPayload;
if (ensureExists) {
std::string_view key = lmdb::to_sv<uint64_t>(levId);
if (!eventPayloadCursor.get(key, eventPayload, MDB_SET_KEY)) return; // If not found, was deleted while scan was paused
}
if (onEvent) onEvent(txn, sub, levId, eventPayload);
if (onEventBatch) levIdBatch.push_back(levId);
}, cfg().relay__queryTimesliceBudgetMicroseconds, cfg().relay__logging__dbScanPerf);
@ -88,7 +101,7 @@ struct QueryScheduler : NonCopyable {
auto connId = q->sub.connId;
removeSub(connId, q->sub.subId);
if (onComplete) onComplete(q->sub);
if (onComplete) onComplete(txn, q->sub);
delete q;
} else {

View File

@ -50,7 +50,7 @@ void cmd_delete(const std::vector<std::string> &subArgs) {
auto txn = env.txn_ro();
while (1) {
bool complete = query.process(txn, [&](const auto &sub, uint64_t levId, std::string_view){
bool complete = query.process(txn, [&](const auto &sub, uint64_t levId){
levIds.insert(levId);
});

View File

@ -50,7 +50,7 @@ void cmd_dict(const std::vector<std::string> &subArgs) {
DBQuery query(tao::json::from_string(filterStr));
while (1) {
bool complete = query.process(txn, [&](const auto &sub, uint64_t levId, std::string_view){
bool complete = query.process(txn, [&](const auto &sub, uint64_t levId){
levIds.push_back(levId);
});

View File

@ -37,9 +37,9 @@ void cmd_scan(const std::vector<std::string> &subArgs) {
exitOnSigPipe();
while (1) {
bool complete = query.process(txn, [&](const auto &sub, uint64_t levId, std::string_view eventPayload){
bool complete = query.process(txn, [&](const auto &sub, uint64_t levId){
if (count) numEvents++;
else std::cout << getEventJson(txn, decomp, levId, eventPayload) << "\n";
else std::cout << getEventJson(txn, decomp, levId) << "\n";
}, pause ? pause : MAX_U64, metrics);
if (complete) break;

View File

@ -57,18 +57,24 @@ void cmd_sync(const std::vector<std::string> &subArgs) {
auto txn = env.txn_ro();
uint64_t numEvents = 0;
std::vector<uint64_t> levIds;
while (1) {
bool complete = query.process(txn, [&](const auto &sub, uint64_t levId, std::string_view eventPayload){
auto ev = lookupEventByLevId(txn, levId);
ne.addItem(ev.flat_nested()->created_at(), sv(ev.flat_nested()->id()).substr(0, ne.idSize));
bool complete = query.process(txn, [&](const auto &sub, uint64_t levId){
levIds.push_back(levId);
numEvents++;
});
if (complete) break;
}
std::sort(levIds.begin(), levIds.end());
for (auto levId : levIds) {
auto ev = lookupEventByLevId(txn, levId);
ne.addItem(ev.flat_nested()->created_at(), sv(ev.flat_nested()->id()).substr(0, ne.idSize));
}
LI << "Filter matches " << numEvents << " events";
}

View File

@ -1,7 +1,6 @@
#include <Negentropy.h>
#include "RelayServer.h"
#include "DBQuery.h"
#include "QueryScheduler.h"
@ -9,6 +8,7 @@ struct NegentropyViews {
struct UserView {
Negentropy ne;
std::string initialMsg;
std::vector<uint64_t> levIds;
uint64_t startTime = hoytech::curr_time_us();
};
@ -63,17 +63,18 @@ void RelayServer::runNegentropy(ThreadPool<MsgNegentropy>::Thread &thr) {
QueryScheduler queries;
NegentropyViews views;
queries.ensureExists = false;
queries.onEventBatch = [&](lmdb::txn &txn, const auto &sub, const std::vector<uint64_t> &levIds){
auto *view = views.findView(sub.connId, sub.subId);
if (!view) return;
for (auto levId : levIds) {
auto ev = lookupEventByLevId(txn, levId);
view->ne.addItem(ev.flat_nested()->created_at(), sv(ev.flat_nested()->id()).substr(0, view->ne.idSize));
view->levIds.push_back(levId);
}
};
queries.onComplete = [&](Subscription &sub){
queries.onComplete = [&](lmdb::txn &txn, Subscription &sub){
auto *view = views.findView(sub.connId, sub.subId);
if (!view) return;
@ -94,6 +95,20 @@ void RelayServer::runNegentropy(ThreadPool<MsgNegentropy>::Thread &thr) {
return;
}
std::sort(view->levIds.begin(), view->levIds.end());
for (auto levId : view->levIds) {
try {
auto ev = lookupEventByLevId(txn, levId);
view->ne.addItem(ev.flat_nested()->created_at(), sv(ev.flat_nested()->id()).substr(0, view->ne.idSize));
} catch (std::exception &) {
// levId was deleted when query was paused
}
}
view->levIds.clear();
view->levIds.shrink_to_fit();
view->ne.seal();
auto resp = view->ne.reconcile(view->initialMsg);

View File

@ -1,5 +1,4 @@
#include "RelayServer.h"
#include "DBQuery.h"
#include "QueryScheduler.h"
@ -11,7 +10,7 @@ void RelayServer::runReqWorker(ThreadPool<MsgReqWorker>::Thread &thr) {
sendEvent(sub.connId, sub.subId, decodeEventPayload(txn, decomp, eventPayload, nullptr, nullptr));
};
queries.onComplete = [&](Subscription &sub){
queries.onComplete = [&](lmdb::txn &, Subscription &sub){
sendToConn(sub.connId, tao::json::to_string(tao::json::value::array({ "EOSE", sub.subId.str() })));
tpReqMonitor.dispatch(sub.connId, MsgReqMonitor{MsgReqMonitor::NewSub{std::move(sub)}});
};