remove prefix matching for ids and authors filter fields

- this also fixes https://github.com/hoytech/strfry/issues/109
This commit is contained in:
Doug Hoyte
2024-08-30 15:56:41 -04:00
parent 80249f2078
commit 21d4e07b87
9 changed files with 42 additions and 70 deletions

View File

@ -100,17 +100,6 @@ struct ActiveMonitors : NonCopyable {
}
};
auto processMonitorsPrefix = [&](btree_map<std::string, MonitorSet> &m, const std::string &key, std::function<bool(const std::string&)> matches){
auto it = m.lower_bound(key.substr(0, 1));
if (it == m.end()) return;
while (it != m.end() && it->first[0] == key[0]) {
if (matches(it->first)) processMonitorSet(it->second);
it = std::next(it);
}
};
auto processMonitorsExact = [&]<typename T>(btree_map<T, MonitorSet> &m, const T &key, std::function<bool(const T &)> matches){
auto it = m.upper_bound(key);
@ -128,15 +117,15 @@ struct ActiveMonitors : NonCopyable {
{
auto id = std::string(packed.id());
processMonitorsPrefix(allIds, id, static_cast<std::function<bool(const std::string&)>>([&](const std::string &val){
return id.starts_with(val);
processMonitorsExact(allIds, id, static_cast<std::function<bool(const std::string&)>>([&](const std::string &val){
return id == val;
}));
}
{
auto pubkey = std::string(packed.pubkey());
processMonitorsPrefix(allAuthors, pubkey, static_cast<std::function<bool(const std::string&)>>([&](const std::string &val){
return pubkey.starts_with(val);
processMonitorsExact(allAuthors, pubkey, static_cast<std::function<bool(const std::string&)>>([&](const std::string &val){
return pubkey == val;
}));
}

View File

@ -24,7 +24,6 @@ struct DBScan : NonCopyable {
enum class KeyMatchResult {
Yes,
No,
NoButContinue,
};
struct ScanCursor {
@ -111,13 +110,13 @@ struct DBScan : NonCopyable {
cursors.reserve(f.ids->size());
for (uint64_t i = 0; i < f.ids->size(); i++) {
std::string prefix = f.ids->at(i);
std::string search = f.ids->at(i);
cursors.emplace_back(
padBytes(prefix, 32 + 8, '\xFF'),
search + std::string(8, '\xFF'),
MAX_U64,
[prefix](std::string_view k){
return k.starts_with(prefix) ? KeyMatchResult::Yes : KeyMatchResult::No;
[search](std::string_view k){
return k.starts_with(search) ? KeyMatchResult::Yes : KeyMatchResult::No;
}
);
}
@ -161,22 +160,15 @@ struct DBScan : NonCopyable {
for (uint64_t j = 0; j < f.kinds->size(); j++) {
uint64_t kind = f.kinds->at(j);
std::string prefix = f.authors->at(i);
if (prefix.size() == 32) prefix += lmdb::to_sv<uint64_t>(kind);
std::string search = f.authors->at(i);
search += lmdb::to_sv<uint64_t>(kind);
cursors.emplace_back(
padBytes(prefix, 32 + 8 + 8, '\xFF'),
search + std::string(8, '\xFF'),
MAX_U64,
[prefix, kind](std::string_view k){
if (!k.starts_with(prefix)) return KeyMatchResult::No;
if (prefix.size() == 32 + 8) return KeyMatchResult::Yes;
ParsedKey_StringUint64Uint64 parsedKey(k);
if (parsedKey.n1 == kind) return KeyMatchResult::Yes;
// With a prefix pubkey, continue scanning (pubkey,kind) backwards because with this index
// we don't know the next pubkey to jump back to
return KeyMatchResult::NoButContinue;
[search, kind](std::string_view k){
if (!k.starts_with(search)) return KeyMatchResult::No;
return KeyMatchResult::Yes;
}
);
}
@ -189,13 +181,13 @@ struct DBScan : NonCopyable {
cursors.reserve(f.authors->size());
for (uint64_t i = 0; i < f.authors->size(); i++) {
std::string prefix = f.authors->at(i);
std::string search = f.authors->at(i);
cursors.emplace_back(
padBytes(prefix, 32 + 8, '\xFF'),
search + std::string(8, '\xFF'),
MAX_U64,
[prefix](std::string_view k){
return k.starts_with(prefix) ? KeyMatchResult::Yes : KeyMatchResult::No;
[search](std::string_view k){
return k.starts_with(search) ? KeyMatchResult::Yes : KeyMatchResult::No;
}
);
}

View File

@ -29,8 +29,9 @@ struct FilterSetBytes {
std::sort(arr.begin(), arr.end());
for (const auto &item : arr) {
if (items.size() > 0 && item.starts_with(at(items.size() - 1))) continue; // remove duplicates and redundant prefixes
for (size_t i = 0; i < arr.size(); i++) {
const auto &item = arr[i];
if (i > 0 && item == arr[i - 1]) continue; // remove duplicates
items.emplace_back(Item{ (uint16_t)buf.size(), (uint8_t)item.size(), (uint8_t)item[0] });
buf += item;
}
@ -72,7 +73,7 @@ struct FilterSetBytes {
}
if (first == 0) return false;
if (candidate.starts_with(std::string_view(buf.data() + items[first - 1].offset, items[first - 1].size))) return true;
if (candidate == std::string_view(buf.data() + items[first - 1].offset, items[first - 1].size)) return true;
return false;
}
@ -123,14 +124,14 @@ struct NostrFilter {
for (const auto &[k, v] : filterObj.get_object()) {
if (v.is_array() && v.get_array().size() == 0) {
neverMatch = true;
break;
continue;
}
if (k == "ids") {
ids.emplace(v, true, 1, 32);
ids.emplace(v, true, 32, 32);
numMajorFields++;
} else if (k == "authors") {
authors.emplace(v, true, 1, 32);
authors.emplace(v, true, 32, 32);
numMajorFields++;
} else if (k == "kinds") {
kinds.emplace(v);
@ -159,7 +160,7 @@ struct NostrFilter {
}
}
if (tags.size() > 2) throw herr("too many tags in filter"); // O(N^2) in matching, just prohibit it
if (tags.size() > 3) throw herr("too many tags in filter"); // O(N^2) in matching, just prohibit it
if (limit > maxFilterLimit) limit = maxFilterLimit;

View File

@ -15,5 +15,4 @@ std::string renderPercent(double p);
uint64_t parseUint64(const std::string &s);
std::string parseIP(const std::string &ip);
uint64_t getDBVersion(lmdb::txn &txn);
std::string padBytes(std::string_view str, size_t n, char padChar);
void exitOnSigPipe();

View File

@ -110,11 +110,6 @@ uint64_t getDBVersion(lmdb::txn &txn) {
}
std::string padBytes(std::string_view str, size_t n, char padChar) {
if (str.size() > n) throw herr("unable to pad, string longer than expected");
return std::string(str) + std::string(n - str.size(), padChar);
}
void exitOnSigPipe() {
struct sigaction act;
memset(&act, 0, sizeof act);