Use Bytes32 instead of std::string where possible, to reduce memory usage

This commit is contained in:
Doug Hoyte
2024-09-05 15:12:40 -04:00
parent 55fa4dc032
commit 169e633a08
7 changed files with 82 additions and 32 deletions

View File

@ -4,6 +4,7 @@
#include "golpe.h" #include "golpe.h"
#include "Bytes32.h"
#include "Subscription.h" #include "Subscription.h"
#include "filters.h" #include "filters.h"
@ -27,8 +28,8 @@ struct ActiveMonitors : NonCopyable {
}; };
using MonitorSet = flat_hash_map<NostrFilter*, MonitorItem>; using MonitorSet = flat_hash_map<NostrFilter*, MonitorItem>;
btree_map<std::string, MonitorSet> allIds; btree_map<Bytes32, MonitorSet> allIds;
btree_map<std::string, MonitorSet> allAuthors; btree_map<Bytes32, MonitorSet> allAuthors;
btree_map<std::string, MonitorSet> allTags; btree_map<std::string, MonitorSet> allTags;
btree_map<uint64_t, MonitorSet> allKinds; btree_map<uint64_t, MonitorSet> allKinds;
MonitorSet allOthers; MonitorSet allOthers;
@ -116,15 +117,15 @@ struct ActiveMonitors : NonCopyable {
auto packed = PackedEventView(ev.buf); auto packed = PackedEventView(ev.buf);
{ {
auto id = std::string(packed.id()); Bytes32 id(packed.id());
processMonitorsExact(allIds, id, static_cast<std::function<bool(const std::string&)>>([&](const std::string &val){ processMonitorsExact(allIds, id, static_cast<std::function<bool(const Bytes32&)>>([&](const Bytes32 &val){
return id == val; return id == val;
})); }));
} }
{ {
auto pubkey = std::string(packed.pubkey()); Bytes32 pubkey(packed.pubkey());
processMonitorsExact(allAuthors, pubkey, static_cast<std::function<bool(const std::string&)>>([&](const std::string &val){ processMonitorsExact(allAuthors, pubkey, static_cast<std::function<bool(const Bytes32&)>>([&](const Bytes32 &val){
return pubkey == val; return pubkey == val;
})); }));
} }
@ -167,12 +168,12 @@ struct ActiveMonitors : NonCopyable {
for (auto &f : m->sub.filterGroup.filters) { for (auto &f : m->sub.filterGroup.filters) {
if (f.ids) { if (f.ids) {
for (size_t i = 0; i < f.ids->size(); i++) { for (size_t i = 0; i < f.ids->size(); i++) {
auto res = allIds.try_emplace(f.ids->at(i)); auto res = allIds.try_emplace(Bytes32(f.ids->at(i)));
res.first->second.try_emplace(&f, MonitorItem{m, currEventId}); res.first->second.try_emplace(&f, MonitorItem{m, currEventId});
} }
} else if (f.authors) { } else if (f.authors) {
for (size_t i = 0; i < f.authors->size(); i++) { for (size_t i = 0; i < f.authors->size(); i++) {
auto res = allAuthors.try_emplace(f.authors->at(i)); auto res = allAuthors.try_emplace(Bytes32(f.authors->at(i)));
res.first->second.try_emplace(&f, MonitorItem{m, currEventId}); res.first->second.try_emplace(&f, MonitorItem{m, currEventId});
} }
} else if (f.tags.size()) { } else if (f.tags.size()) {
@ -198,15 +199,17 @@ struct ActiveMonitors : NonCopyable {
for (auto &f : m->sub.filterGroup.filters) { for (auto &f : m->sub.filterGroup.filters) {
if (f.ids) { if (f.ids) {
for (size_t i = 0; i < f.ids->size(); i++) { for (size_t i = 0; i < f.ids->size(); i++) {
auto &monSet = allIds.at(f.ids->at(i)); Bytes32 id(f.ids->at(i));
auto &monSet = allIds.at(id);
monSet.erase(&f); monSet.erase(&f);
if (monSet.empty()) allIds.erase(f.ids->at(i)); if (monSet.empty()) allIds.erase(id);
} }
} else if (f.authors) { } else if (f.authors) {
for (size_t i = 0; i < f.authors->size(); i++) { for (size_t i = 0; i < f.authors->size(); i++) {
auto &monSet = allAuthors.at(f.authors->at(i)); Bytes32 author(f.authors->at(i));
auto &monSet = allAuthors.at(author);
monSet.erase(&f); monSet.erase(&f);
if (monSet.empty()) allAuthors.erase(f.authors->at(i)); if (monSet.empty()) allAuthors.erase(author);
} }
} else if (f.tags.size()) { } else if (f.tags.size()) {
for (const auto &[tagName, filterSet] : f.tags) { for (const auto &[tagName, filterSet] : f.tags) {
@ -219,9 +222,10 @@ struct ActiveMonitors : NonCopyable {
} }
} else if (f.kinds) { } else if (f.kinds) {
for (size_t i = 0; i < f.kinds->size(); i++) { for (size_t i = 0; i < f.kinds->size(); i++) {
auto &monSet = allKinds.at(f.kinds->at(i)); uint64_t kind = f.kinds->at(i);
auto &monSet = allKinds.at(kind);
monSet.erase(&f); monSet.erase(&f);
if (monSet.empty()) allKinds.erase(f.kinds->at(i)); if (monSet.empty()) allKinds.erase(kind);
} }
} else { } else {
allOthers.erase(&f); allOthers.erase(&f);

40
src/Bytes32.h Normal file
View File

@ -0,0 +1,40 @@
#pragma once
#include <cstring>
#include <string_view>
#include "golpe.h"
struct Bytes32 {
uint8_t buf[32];
Bytes32(std::string_view s) {
if (s.size() != 32) throw herr("invalid length for Bytes32");
memcpy(buf, s.data(), 32);
}
std::string_view sv() const {
return std::string_view((char*)buf, 32);
}
int operator <=>(const Bytes32& rhs) const {
return memcmp(buf, rhs.buf, 32);
}
bool operator==(const Bytes32 &o) const {
return std::memcmp(buf, o.buf, sizeof(buf)) == 0;
}
};
// Inject specialization of std::hash, so we can use it in a flat_hash_set
namespace std {
template<> struct hash<Bytes32> {
std::size_t operator()(Bytes32 const &b) const {
static_assert(sizeof(b.buf) == 32);
uint64_t *p = (size_t*)&b.buf;
return size_t(p[0] ^ p[1] ^ p[2] ^ p[3]);
}
};
}

View File

@ -3,6 +3,7 @@
#include <docopt.h> #include <docopt.h>
#include "golpe.h" #include "golpe.h"
#include "Bytes32.h"
#include "NegentropyFilterCache.h" #include "NegentropyFilterCache.h"
#include "events.h" #include "events.h"
#include "DBQuery.h" #include "DBQuery.h"
@ -74,7 +75,7 @@ void cmd_negentropy(const std::vector<std::string> &subArgs) {
struct Record { struct Record {
uint64_t created_at; uint64_t created_at;
uint8_t id[32]; Bytes32 id;
}; };
std::vector<Record> recs; std::vector<Record> recs;
@ -100,8 +101,8 @@ void cmd_negentropy(const std::vector<std::string> &subArgs) {
bool complete = query.process(txn, [&](const auto &sub, uint64_t levId){ bool complete = query.process(txn, [&](const auto &sub, uint64_t levId){
auto ev = lookupEventByLevId(txn, levId); auto ev = lookupEventByLevId(txn, levId);
auto packed = PackedEventView(ev.buf); auto packed = PackedEventView(ev.buf);
recs.push_back({ packed.created_at(), }); recs.emplace_back(packed.created_at(), packed.id());
memcpy(recs.back().id, packed.id().data(), 32); //memcpy(recs.back().id, packed.id().data(), 32);
}); });
if (complete) break; if (complete) break;
@ -112,7 +113,7 @@ void cmd_negentropy(const std::vector<std::string> &subArgs) {
negentropy::storage::BTreeLMDB storage(txn, negentropyDbi, treeId); negentropy::storage::BTreeLMDB storage(txn, negentropyDbi, treeId);
for (const auto &r : recs) { for (const auto &r : recs) {
storage.insert(r.created_at, std::string_view((char*)r.id, 32)); storage.insert(r.created_at, r.id.sv());
} }
storage.flush(); storage.flush();

View File

@ -5,6 +5,7 @@
#include "golpe.h" #include "golpe.h"
#include "Bytes32.h"
#include "WriterPipeline.h" #include "WriterPipeline.h"
#include "Subscription.h" #include "Subscription.h"
#include "WSConnection.h" #include "WSConnection.h"
@ -31,7 +32,7 @@ void cmd_stream(const std::vector<std::string> &subArgs) {
if (dir != "up" && dir != "down" && dir != "both") throw herr("invalid direction: ", dir, ". Should be one of up/down/both"); if (dir != "up" && dir != "down" && dir != "both") throw herr("invalid direction: ", dir, ". Should be one of up/down/both");
flat_hash_set<std::string> downloadedIds; flat_hash_set<Bytes32> downloadedIds;
WriterPipeline writer; WriterPipeline writer;
WSConnection ws(url); WSConnection ws(url);
Decompressor decomp; Decompressor decomp;
@ -101,7 +102,7 @@ void cmd_stream(const std::vector<std::string> &subArgs) {
env.foreach_Event(txn, [&](auto &ev){ env.foreach_Event(txn, [&](auto &ev){
currEventId = ev.primaryKeyId; currEventId = ev.primaryKeyId;
auto id = std::string(PackedEventView(ev.buf).id()); Bytes32 id(PackedEventView(ev.buf).id());
if (downloadedIds.find(id) != downloadedIds.end()) { if (downloadedIds.find(id) != downloadedIds.end()) {
downloadedIds.erase(id); downloadedIds.erase(id);
return true; return true;

View File

@ -7,6 +7,7 @@
#include "golpe.h" #include "golpe.h"
#include "Bytes32.h"
#include "WriterPipeline.h" #include "WriterPipeline.h"
#include "Subscription.h" #include "Subscription.h"
#include "WSConnection.h" #include "WSConnection.h"
@ -150,8 +151,8 @@ void cmd_sync(const std::vector<std::string> &subArgs) {
const uint64_t batchSizeDown = 50; const uint64_t batchSizeDown = 50;
uint64_t inFlightUp = 0; uint64_t inFlightUp = 0;
bool inFlightDown = false; // bool because we can't count on getting every EVENT we request (might've been deleted mid-query) bool inFlightDown = false; // bool because we can't count on getting every EVENT we request (might've been deleted mid-query)
std::vector<std::string> have, need; std::vector<Bytes32> have, need;
flat_hash_set<std::string> seenHave, seenNeed; flat_hash_set<Bytes32> seenHave, seenNeed;
bool syncDone = false; bool syncDone = false;
uint64_t totalHaves = 0, totalNeeds = 0; uint64_t totalHaves = 0, totalNeeds = 0;
Decompressor decomp; Decompressor decomp;
@ -186,16 +187,18 @@ void cmd_sync(const std::vector<std::string> &subArgs) {
neMsg = ne.reconcile(inputMsg, currHave, currNeed); neMsg = ne.reconcile(inputMsg, currHave, currNeed);
} }
for (auto &id : currHave) { for (auto &idStr : currHave) {
Bytes32 id(idStr);
if (seenHave.contains(id)) continue; if (seenHave.contains(id)) continue;
seenHave.insert(id); seenHave.insert(id);
have.push_back(std::move(id)); have.push_back(id);
} }
for (auto &id : currNeed) { for (auto &idStr : currNeed) {
Bytes32 id(idStr);
if (seenNeed.contains(id)) continue; if (seenNeed.contains(id)) continue;
seenNeed.insert(id); seenNeed.insert(id);
need.push_back(std::move(id)); need.push_back(id);
} }
} catch (std::exception &e) { } catch (std::exception &e) {
LE << "Unable to parse negentropy message from relay: " << e.what(); LE << "Unable to parse negentropy message from relay: " << e.what();
@ -263,7 +266,7 @@ void cmd_sync(const std::vector<std::string> &subArgs) {
auto id = std::move(have.back()); auto id = std::move(have.back());
have.pop_back(); have.pop_back();
auto ev = lookupEventById(txn, id); auto ev = lookupEventById(txn, id.sv());
if (!ev) { if (!ev) {
LW << "Couldn't upload event because not found (deleted?)"; LW << "Couldn't upload event because not found (deleted?)";
continue; continue;
@ -285,7 +288,7 @@ void cmd_sync(const std::vector<std::string> &subArgs) {
tao::json::value ids = tao::json::empty_array; tao::json::value ids = tao::json::empty_array;
while (need.size() > 0 && ids.get_array().size() < batchSizeDown) { while (need.size() > 0 && ids.get_array().size() < batchSizeDown) {
ids.emplace_back(to_hex(need.back())); ids.emplace_back(to_hex(need.back().sv()));
need.pop_back(); need.pop_back();
} }

View File

@ -68,7 +68,7 @@ std::string nostrJsonToPackedEvent(const tao::json::value &v) {
return std::move(builder.buf); return std::move(builder.buf);
} }
std::string nostrHash(const tao::json::value &origJson) { Bytes32 nostrHash(const tao::json::value &origJson) {
tao::json::value arr = tao::json::empty_array; tao::json::value arr = tao::json::empty_array;
arr.emplace_back(0); arr.emplace_back(0);
@ -84,7 +84,7 @@ std::string nostrHash(const tao::json::value &origJson) {
unsigned char hash[SHA256_DIGEST_LENGTH]; unsigned char hash[SHA256_DIGEST_LENGTH];
SHA256(reinterpret_cast<unsigned char*>(encoded.data()), encoded.size(), hash); SHA256(reinterpret_cast<unsigned char*>(encoded.data()), encoded.size(), hash);
return std::string(reinterpret_cast<char*>(hash), SHA256_DIGEST_LENGTH); return Bytes32(std::string_view(reinterpret_cast<char*>(hash), SHA256_DIGEST_LENGTH));
} }
bool verifySig(secp256k1_context* ctx, std::string_view sig, std::string_view hash, std::string_view pubkey) { bool verifySig(secp256k1_context* ctx, std::string_view sig, std::string_view hash, std::string_view pubkey) {
@ -106,7 +106,7 @@ bool verifySig(secp256k1_context* ctx, std::string_view sig, std::string_view ha
void verifyNostrEvent(secp256k1_context *secpCtx, PackedEventView packed, const tao::json::value &origJson) { void verifyNostrEvent(secp256k1_context *secpCtx, PackedEventView packed, const tao::json::value &origJson) {
auto hash = nostrHash(origJson); auto hash = nostrHash(origJson);
if (hash != packed.id()) throw herr("bad event id"); if (hash != Bytes32(packed.id())) throw herr("bad event id");
bool valid = verifySig(secpCtx, from_hex(jsonGetString(origJson.at("sig"), "event sig was not a string"), false), packed.id(), packed.pubkey()); bool valid = verifySig(secpCtx, from_hex(jsonGetString(origJson.at("sig"), "event sig was not a string"), false), packed.id(), packed.pubkey());
if (!valid) throw herr("bad signature"); if (!valid) throw herr("bad signature");

View File

@ -4,6 +4,7 @@
#include "golpe.h" #include "golpe.h"
#include "Bytes32.h"
#include "PackedEvent.h" #include "PackedEvent.h"
#include "NegentropyFilterCache.h" #include "NegentropyFilterCache.h"
#include "Decompressor.h" #include "Decompressor.h"
@ -36,7 +37,7 @@ inline bool isEphemeralKind(uint64_t kind) {
std::string nostrJsonToPackedEvent(const tao::json::value &v); std::string nostrJsonToPackedEvent(const tao::json::value &v);
std::string nostrHash(const tao::json::value &origJson); Bytes32 nostrHash(const tao::json::value &origJson);
bool verifySig(secp256k1_context* ctx, std::string_view sig, std::string_view hash, std::string_view pubkey); bool verifySig(secp256k1_context* ctx, std::string_view sig, std::string_view hash, std::string_view pubkey);
void verifyNostrEvent(secp256k1_context *secpCtx, PackedEventView packed, const tao::json::value &origJson); void verifyNostrEvent(secp256k1_context *secpCtx, PackedEventView packed, const tao::json::value &origJson);