mirror of
https://github.com/hoytech/strfry.git
synced 2025-06-18 09:17:12 +00:00
DB schema refactor
This commit is contained in:
2
golpe
2
golpe
Submodule golpe updated: 7705174669...511e70d7b9
24
golpe.yaml
24
golpe.yaml
@ -13,20 +13,14 @@ includes: |
|
||||
tables:
|
||||
## DB meta-data. Single entry, with id = 1
|
||||
Meta:
|
||||
tableId: 2
|
||||
|
||||
fields:
|
||||
- name: dbVersion
|
||||
- name: endianness
|
||||
|
||||
## Stored nostr events
|
||||
## Meta-info of nostr events, suitable for indexing
|
||||
## Primary key is auto-incremented, called "levId" for Local EVent ID
|
||||
Event:
|
||||
tableId: 1
|
||||
|
||||
primaryKey: quadId
|
||||
|
||||
fields:
|
||||
- name: quadId
|
||||
- name: receivedAt # microseconds
|
||||
- name: flat
|
||||
type: ubytes
|
||||
@ -72,6 +66,20 @@ tables:
|
||||
if (flat->kind() == 5 && tagName == 'e') deletion.push_back(std::string(tagVal) + std::string(sv(flat->pubkey())));
|
||||
}
|
||||
|
||||
CompressionDictionary:
|
||||
fields:
|
||||
- name: dict
|
||||
type: ubytes
|
||||
|
||||
tablesRaw:
|
||||
## Raw nostr event JSON, possibly compressed
|
||||
## keys are levIds
|
||||
## vals are prefixed with a type byte:
|
||||
## 0: no compression, payload follows
|
||||
## 1: zstd compression. Followed by Dictionary ID (native endian uint32) then compressed payload
|
||||
EventPayload:
|
||||
flags: 'MDB_INTEGERKEY'
|
||||
|
||||
config:
|
||||
- name: db
|
||||
desc: "Directory that contains strfry database"
|
||||
|
18
src/DBScan.h
18
src/DBScan.h
@ -232,18 +232,18 @@ struct DBScan {
|
||||
}
|
||||
|
||||
bool sent = false;
|
||||
uint64_t quadId = lmdb::from_sv<uint64_t>(v);
|
||||
uint64_t levId = lmdb::from_sv<uint64_t>(v);
|
||||
|
||||
if (f.indexOnlyScans) {
|
||||
if (f.doesMatchTimes(created)) {
|
||||
handleEvent(quadId);
|
||||
handleEvent(levId);
|
||||
sent = true;
|
||||
}
|
||||
} else {
|
||||
auto view = env.lookup_Event(txn, quadId);
|
||||
auto view = env.lookup_Event(txn, levId);
|
||||
if (!view) throw herr("missing event from index, corrupt DB?");
|
||||
if (f.doesMatch(view->flat_nested())) {
|
||||
handleEvent(quadId);
|
||||
handleEvent(levId);
|
||||
sent = true;
|
||||
}
|
||||
}
|
||||
@ -298,16 +298,16 @@ struct DBScanQuery : NonCopyable {
|
||||
while (filterGroupIndex < sub.filterGroup.size()) {
|
||||
if (!scanner) scanner = std::make_unique<DBScan>(sub.filterGroup.filters[filterGroupIndex]);
|
||||
|
||||
bool complete = scanner->scan(txn, [&](uint64_t quadId){
|
||||
bool complete = scanner->scan(txn, [&](uint64_t levId){
|
||||
// If this event came in after our query began, don't send it. It will be sent after the EOSE.
|
||||
if (quadId > sub.latestEventId) return;
|
||||
if (levId > sub.latestEventId) return;
|
||||
|
||||
// We already sent this event
|
||||
if (alreadySentEvents.find(quadId) != alreadySentEvents.end()) return;
|
||||
alreadySentEvents.insert(quadId);
|
||||
if (alreadySentEvents.find(levId) != alreadySentEvents.end()) return;
|
||||
alreadySentEvents.insert(levId);
|
||||
|
||||
currScanRecordsFound++;
|
||||
cb(sub, quadId);
|
||||
cb(sub, levId);
|
||||
}, [&]{
|
||||
currScanRecordsTraversed++;
|
||||
return hoytech::curr_time_us() - startTime > timeBudgetMicroseconds;
|
||||
|
@ -2,17 +2,12 @@
|
||||
|
||||
|
||||
void RelayServer::cleanupOldEvents() {
|
||||
struct EventDel {
|
||||
uint64_t nodeId;
|
||||
uint64_t deletedNodeId;
|
||||
};
|
||||
|
||||
std::vector<EventDel> expiredEvents;
|
||||
std::vector<uint64_t> expiredLevIds;
|
||||
|
||||
{
|
||||
auto txn = env.txn_ro();
|
||||
|
||||
auto mostRecent = getMostRecentEventId(txn);
|
||||
auto mostRecent = getMostRecentLevId(txn);
|
||||
uint64_t cutoff = hoytech::curr_time_s() - cfg().events__ephemeralEventsLifetimeSeconds;
|
||||
uint64_t currKind = 20'000;
|
||||
|
||||
@ -31,10 +26,10 @@ void RelayServer::cleanupOldEvents() {
|
||||
return false;
|
||||
}
|
||||
|
||||
uint64_t nodeId = lmdb::from_sv<uint64_t>(v);
|
||||
uint64_t levId = lmdb::from_sv<uint64_t>(v);
|
||||
|
||||
if (nodeId != mostRecent) { // prevent nodeId re-use
|
||||
expiredEvents.emplace_back(nodeId, 0);
|
||||
if (levId != mostRecent) { // prevent levId re-use
|
||||
expiredLevIds.emplace_back(levId);
|
||||
}
|
||||
|
||||
return true;
|
||||
@ -44,29 +39,30 @@ void RelayServer::cleanupOldEvents() {
|
||||
}
|
||||
}
|
||||
|
||||
if (expiredEvents.size() > 0) {
|
||||
LI << "Deleting " << expiredEvents.size() << " ephemeral events";
|
||||
|
||||
if (expiredLevIds.size() > 0) {
|
||||
auto txn = env.txn_rw();
|
||||
|
||||
quadrable::Quadrable qdb;
|
||||
qdb.init(txn);
|
||||
qdb.checkout("events");
|
||||
|
||||
uint64_t numDeleted = 0;
|
||||
auto changes = qdb.change();
|
||||
|
||||
for (auto &e : expiredEvents) {
|
||||
auto view = env.lookup_Event(txn, e.nodeId);
|
||||
if (!view) throw herr("missing event from index, corrupt DB?");
|
||||
changes.del(flatEventToQuadrableKey(view->flat_nested()), &e.deletedNodeId);
|
||||
for (auto levId : expiredLevIds) {
|
||||
auto view = env.lookup_Event(txn, levId);
|
||||
if (!view) continue; // Deleted in between transactions
|
||||
|
||||
numDeleted++;
|
||||
changes.del(flatEventToQuadrableKey(view->flat_nested()));
|
||||
env.delete_Event(txn, levId);
|
||||
env.dbi_EventPayload.del(txn, lmdb::to_sv<uint64_t>(levId));
|
||||
}
|
||||
|
||||
changes.apply(txn);
|
||||
|
||||
for (auto &e : expiredEvents) {
|
||||
if (e.deletedNodeId) env.delete_Event(txn, e.nodeId);
|
||||
}
|
||||
|
||||
txn.commit();
|
||||
|
||||
if (numDeleted) LI << "Deleted " << numDeleted << " ephemeral events";
|
||||
}
|
||||
}
|
||||
|
@ -22,7 +22,7 @@ void RelayServer::runReqMonitor(ThreadPool<MsgReqMonitor>::Thread &thr) {
|
||||
|
||||
auto txn = env.txn_ro();
|
||||
|
||||
uint64_t latestEventId = getMostRecentEventId(txn);
|
||||
uint64_t latestEventId = getMostRecentLevId(txn);
|
||||
if (currEventId > latestEventId) currEventId = latestEventId;
|
||||
|
||||
for (auto &newMsg : newMsgs) {
|
||||
@ -44,8 +44,8 @@ void RelayServer::runReqMonitor(ThreadPool<MsgReqMonitor>::Thread &thr) {
|
||||
monitors.closeConn(msg->connId);
|
||||
} else if (std::get_if<MsgReqMonitor::DBChange>(&newMsg.msg)) {
|
||||
env.foreach_Event(txn, [&](auto &ev){
|
||||
monitors.process(txn, ev, [&](RecipientList &&recipients, uint64_t quadId){
|
||||
sendEventToBatch(std::move(recipients), std::string(getEventJson(txn, quadId)));
|
||||
monitors.process(txn, ev, [&](RecipientList &&recipients, uint64_t levId){
|
||||
sendEventToBatch(std::move(recipients), std::string(getEventJson(txn, levId)));
|
||||
});
|
||||
return true;
|
||||
}, false, currEventId + 1);
|
||||
|
@ -9,7 +9,7 @@ struct ActiveQueries : NonCopyable {
|
||||
std::deque<DBScanQuery*> running;
|
||||
|
||||
void addSub(lmdb::txn &txn, Subscription &&sub) {
|
||||
sub.latestEventId = getMostRecentEventId(txn);
|
||||
sub.latestEventId = getMostRecentLevId(txn);
|
||||
|
||||
{
|
||||
auto *existing = findQuery(sub.connId, sub.subId);
|
||||
@ -63,8 +63,8 @@ struct ActiveQueries : NonCopyable {
|
||||
return;
|
||||
}
|
||||
|
||||
bool complete = q->process(txn, cfg().relay__queryTimesliceBudgetMicroseconds, cfg().relay__logging__dbScanPerf, [&](const auto &sub, uint64_t quadId){
|
||||
server->sendEvent(sub.connId, sub.subId, getEventJson(txn, quadId));
|
||||
bool complete = q->process(txn, cfg().relay__queryTimesliceBudgetMicroseconds, cfg().relay__logging__dbScanPerf, [&](const auto &sub, uint64_t levId){
|
||||
server->sendEvent(sub.connId, sub.subId, getEventJson(txn, levId));
|
||||
});
|
||||
|
||||
if (complete) {
|
||||
|
@ -37,7 +37,7 @@ void RelayServer::runWriter(ThreadPool<MsgWriter>::Thread &thr) {
|
||||
bool written = false;
|
||||
|
||||
if (newEvent.status == EventWriteStatus::Written) {
|
||||
LI << "Inserted event. id=" << eventIdHex << " qdbNodeId=" << newEvent.nodeId;
|
||||
LI << "Inserted event. id=" << eventIdHex << " levId=" << newEvent.levId;
|
||||
written = true;
|
||||
} else if (newEvent.status == EventWriteStatus::Duplicate) {
|
||||
message = "duplicate: have this event";
|
||||
|
@ -53,20 +53,20 @@ void RelayServer::runYesstr(ThreadPool<MsgYesstr>::Thread &thr) {
|
||||
// FIXME: The following blocks the whole thread for the query duration. Should interleave it
|
||||
// with other requests like RelayReqWorker does.
|
||||
|
||||
std::vector<uint64_t> quadEventIds;
|
||||
std::vector<uint64_t> levIds;
|
||||
auto filterGroup = NostrFilterGroup::unwrapped(tao::json::from_string(filterStr));
|
||||
Subscription sub(1, "junkSub", filterGroup);
|
||||
DBScanQuery query(sub);
|
||||
|
||||
while (1) {
|
||||
bool complete = query.process(txn, MAX_U64, cfg().relay__logging__dbScanPerf, [&](const auto &sub, uint64_t quadId){
|
||||
quadEventIds.push_back(quadId);
|
||||
bool complete = query.process(txn, MAX_U64, cfg().relay__logging__dbScanPerf, [&](const auto &sub, uint64_t levId){
|
||||
levIds.push_back(levId);
|
||||
});
|
||||
|
||||
if (complete) break;
|
||||
}
|
||||
|
||||
LI << "Filter matched " << quadEventIds.size() << " local events";
|
||||
LI << "Filter matched " << levIds.size() << " local events";
|
||||
|
||||
qdb->withMemStore(s.m, [&]{
|
||||
qdb->writeToMemStore = true;
|
||||
@ -74,8 +74,8 @@ void RelayServer::runYesstr(ThreadPool<MsgYesstr>::Thread &thr) {
|
||||
|
||||
auto changes = qdb->change();
|
||||
|
||||
for (auto id : quadEventIds) {
|
||||
changes.putReuse(txn, id);
|
||||
for (auto levId : levIds) {
|
||||
changes.putReuse(txn, levId);
|
||||
}
|
||||
|
||||
changes.apply(txn);
|
||||
|
@ -54,10 +54,10 @@ void cmd_monitor(const std::vector<std::string> &subArgs) {
|
||||
}
|
||||
|
||||
env.foreach_Event(txn, [&](auto &ev){
|
||||
monitors.process(txn, ev, [&](RecipientList &&recipients, uint64_t quadId){
|
||||
monitors.process(txn, ev, [&](RecipientList &&recipients, uint64_t levId){
|
||||
for (auto &r : recipients) {
|
||||
if (r.connId == interestConnId && r.subId.str() == interestSubId) {
|
||||
std::cout << getEventJson(txn, quadId) << "\n";
|
||||
std::cout << getEventJson(txn, levId) << "\n";
|
||||
}
|
||||
}
|
||||
});
|
||||
|
@ -35,8 +35,8 @@ void cmd_scan(const std::vector<std::string> &subArgs) {
|
||||
auto txn = env.txn_ro();
|
||||
|
||||
while (1) {
|
||||
bool complete = query.process(txn, pause ? pause : MAX_U64, metrics, [&](const auto &sub, uint64_t quadId){
|
||||
std::cout << getEventJson(txn, quadId) << "\n";
|
||||
bool complete = query.process(txn, pause ? pause : MAX_U64, metrics, [&](const auto &sub, uint64_t levId){
|
||||
std::cout << getEventJson(txn, levId) << "\n";
|
||||
});
|
||||
|
||||
if (complete) break;
|
||||
|
@ -80,7 +80,7 @@ void cmd_stream(const std::vector<std::string> &subArgs) {
|
||||
|
||||
{
|
||||
auto txn = env.txn_ro();
|
||||
currEventId = getMostRecentEventId(txn);
|
||||
currEventId = getMostRecentLevId(txn);
|
||||
}
|
||||
|
||||
ws.onTrigger = [&]{
|
||||
|
@ -148,7 +148,7 @@ void cmd_sync(const std::vector<std::string> &subArgs) {
|
||||
|
||||
|
||||
if (filterStr.size()) {
|
||||
std::vector<uint64_t> quadEventIds;
|
||||
std::vector<uint64_t> levIds;
|
||||
|
||||
tao::json::value filterJson;
|
||||
|
||||
@ -167,14 +167,14 @@ void cmd_sync(const std::vector<std::string> &subArgs) {
|
||||
auto txn = env.txn_ro();
|
||||
|
||||
while (1) {
|
||||
bool complete = query.process(txn, MAX_U64, false, [&](const auto &sub, uint64_t quadId){
|
||||
quadEventIds.push_back(quadId);
|
||||
bool complete = query.process(txn, MAX_U64, false, [&](const auto &sub, uint64_t levId){
|
||||
levIds.push_back(levId);
|
||||
});
|
||||
|
||||
if (complete) break;
|
||||
}
|
||||
|
||||
LI << "Filter matched " << quadEventIds.size() << " local events";
|
||||
LI << "Filter matched " << levIds.size() << " local events";
|
||||
|
||||
controller = std::make_unique<SyncController>(&qdb, &ws);
|
||||
|
||||
@ -184,8 +184,8 @@ void cmd_sync(const std::vector<std::string> &subArgs) {
|
||||
|
||||
auto changes = qdb.change();
|
||||
|
||||
for (auto id : quadEventIds) {
|
||||
changes.putReuse(txn, id);
|
||||
for (auto levId : levIds) {
|
||||
changes.putReuse(txn, levId);
|
||||
}
|
||||
|
||||
changes.apply(txn);
|
||||
|
@ -158,22 +158,22 @@ std::optional<defaultDb::environment::View_Event> lookupEventById(lmdb::txn &txn
|
||||
return output;
|
||||
}
|
||||
|
||||
uint64_t getMostRecentEventId(lmdb::txn &txn) {
|
||||
uint64_t output = 0;
|
||||
uint64_t getMostRecentLevId(lmdb::txn &txn) {
|
||||
uint64_t levId = 0;
|
||||
|
||||
env.foreach_Event(txn, [&](auto &ev){
|
||||
output = ev.primaryKeyId;
|
||||
levId = ev.primaryKeyId;
|
||||
return false;
|
||||
}, true);
|
||||
|
||||
return output;
|
||||
return levId;
|
||||
}
|
||||
|
||||
std::string_view getEventJson(lmdb::txn &txn, uint64_t quadId) {
|
||||
std::string_view getEventJson(lmdb::txn &txn, uint64_t levId) {
|
||||
std::string_view raw;
|
||||
bool found = env.dbiQuadrable_nodesLeaf.get(txn, lmdb::to_sv<uint64_t>(quadId), raw);
|
||||
bool found = env.dbi_EventPayload.get(txn, lmdb::to_sv<uint64_t>(levId), raw);
|
||||
if (!found) throw herr("couldn't find leaf node in quadrable, corrupted DB?");
|
||||
return raw.substr(8 + 32 + 32);
|
||||
return raw.substr(1);
|
||||
}
|
||||
|
||||
|
||||
@ -183,7 +183,7 @@ void writeEvents(lmdb::txn &txn, quadrable::Quadrable &qdb, std::vector<EventToW
|
||||
|
||||
auto changes = qdb.change();
|
||||
|
||||
std::vector<uint64_t> eventIdsToDelete;
|
||||
std::vector<uint64_t> levIdsToDelete;
|
||||
|
||||
for (size_t i = 0; i < evs.size(); i++) {
|
||||
auto &ev = evs[i];
|
||||
@ -202,13 +202,13 @@ void writeEvents(lmdb::txn &txn, quadrable::Quadrable &qdb, std::vector<EventToW
|
||||
|
||||
if (isReplaceableEvent(flat->kind())) {
|
||||
auto searchKey = makeKey_StringUint64Uint64(sv(flat->pubkey()), flat->kind(), MAX_U64);
|
||||
uint64_t otherEventId = 0;
|
||||
uint64_t otherLevId = 0;
|
||||
|
||||
env.generic_foreachFull(txn, env.dbi_Event__pubkeyKind, searchKey, lmdb::to_sv<uint64_t>(MAX_U64), [&](auto k, auto v) {
|
||||
ParsedKey_StringUint64Uint64 parsedKey(k);
|
||||
if (parsedKey.s == sv(flat->pubkey()) && parsedKey.n1 == flat->kind()) {
|
||||
if (parsedKey.n2 < flat->created_at()) {
|
||||
otherEventId = lmdb::from_sv<uint64_t>(v);
|
||||
otherLevId = lmdb::from_sv<uint64_t>(v);
|
||||
} else {
|
||||
ev.status = EventWriteStatus::Replaced;
|
||||
}
|
||||
@ -216,11 +216,11 @@ void writeEvents(lmdb::txn &txn, quadrable::Quadrable &qdb, std::vector<EventToW
|
||||
return false;
|
||||
}, true);
|
||||
|
||||
if (otherEventId) {
|
||||
auto otherEv = env.lookup_Event(txn, otherEventId);
|
||||
if (otherLevId) {
|
||||
auto otherEv = env.lookup_Event(txn, otherLevId);
|
||||
if (!otherEv) throw herr("missing event from index, corrupt DB?");
|
||||
changes.del(flatEventToQuadrableKey(otherEv->flat_nested()));
|
||||
eventIdsToDelete.push_back(otherEventId);
|
||||
levIdsToDelete.push_back(otherLevId);
|
||||
}
|
||||
}
|
||||
|
||||
@ -232,26 +232,33 @@ void writeEvents(lmdb::txn &txn, quadrable::Quadrable &qdb, std::vector<EventToW
|
||||
if (otherEv && sv(otherEv->flat_nested()->pubkey()) == sv(flat->pubkey())) {
|
||||
LI << "Deleting event. id=" << to_hex(sv(tagPair->val()));
|
||||
changes.del(flatEventToQuadrableKey(otherEv->flat_nested()));
|
||||
eventIdsToDelete.push_back(otherEv->primaryKeyId);
|
||||
levIdsToDelete.push_back(otherEv->primaryKeyId);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (ev.status == EventWriteStatus::Pending) {
|
||||
changes.put(ev.quadKey, ev.jsonStr, &ev.nodeId);
|
||||
}
|
||||
if (ev.status == EventWriteStatus::Pending) changes.put(ev.quadKey, "");
|
||||
}
|
||||
|
||||
changes.apply(txn);
|
||||
|
||||
for (auto eventId : eventIdsToDelete) {
|
||||
env.delete_Event(txn, eventId);
|
||||
for (auto levId : levIdsToDelete) {
|
||||
env.delete_Event(txn, levId);
|
||||
env.dbi_EventPayload.del(txn, lmdb::to_sv<uint64_t>(levId));
|
||||
}
|
||||
|
||||
std::string tmpBuf;
|
||||
|
||||
for (auto &ev : evs) {
|
||||
if (ev.status == EventWriteStatus::Pending) {
|
||||
env.insert_Event(txn, ev.nodeId, ev.receivedAt, ev.flatStr);
|
||||
ev.levId = env.insert_Event(txn, ev.receivedAt, ev.flatStr);
|
||||
|
||||
tmpBuf.clear();
|
||||
tmpBuf += '\x00';
|
||||
tmpBuf += ev.jsonStr;
|
||||
env.dbi_EventPayload.put(txn, lmdb::to_sv<uint64_t>(ev.levId), tmpBuf);
|
||||
|
||||
ev.status = EventWriteStatus::Written;
|
||||
}
|
||||
}
|
||||
|
@ -45,8 +45,8 @@ inline const NostrIndex::Event *flatStrToFlatEvent(std::string_view flatStr) {
|
||||
|
||||
|
||||
std::optional<defaultDb::environment::View_Event> lookupEventById(lmdb::txn &txn, std::string_view id);
|
||||
uint64_t getMostRecentEventId(lmdb::txn &txn);
|
||||
std::string_view getEventJson(lmdb::txn &txn, uint64_t quadId);
|
||||
uint64_t getMostRecentLevId(lmdb::txn &txn);
|
||||
std::string_view getEventJson(lmdb::txn &txn, uint64_t levId);
|
||||
|
||||
inline quadrable::Key flatEventToQuadrableKey(const NostrIndex::Event *flat) {
|
||||
return quadrable::Key::fromIntegerAndHash(flat->created_at(), sv(flat->id()).substr(0, 23));
|
||||
@ -72,8 +72,8 @@ struct EventToWrite {
|
||||
uint64_t receivedAt;
|
||||
void *userData = nullptr;
|
||||
quadrable::Key quadKey;
|
||||
uint64_t nodeId = 0;
|
||||
EventWriteStatus status = EventWriteStatus::Pending;
|
||||
uint64_t levId = 0;
|
||||
|
||||
EventToWrite() {}
|
||||
|
||||
|
Reference in New Issue
Block a user