From 18a7cc556ab87c9b59610cc7028deaa6251d53b9 Mon Sep 17 00:00:00 2001
From: Doug Hoyte <doug@hcsw.org>
Date: Mon, 16 Jan 2023 14:04:44 -0500
Subject: [PATCH] support empty arrays in REQ filters

- These never match, so clients shouldn't really ever send them, but there is no reason to fail if they do
---
 golpe.yaml             |  2 +-
 src/ActiveMonitors.h   | 42 ++++++++++++++++++++--------------------
 src/DBScan.h           | 32 +++++++++++++++---------------
 src/filters.h          | 44 +++++++++++++++++++++---------------------
 test/filterFuzzTest.pl | 15 +++++++++-----
 5 files changed, 70 insertions(+), 65 deletions(-)

diff --git a/golpe.yaml b/golpe.yaml
index d1a27bf..edf909a 100644
--- a/golpe.yaml
+++ b/golpe.yaml
@@ -131,4 +131,4 @@ config:
     default: 250
   - name: events__maxTagValSize
     desc: "Maximum size for tag values, in bytes"
-    default: 128
+    default: 255
diff --git a/src/ActiveMonitors.h b/src/ActiveMonitors.h
index 68823b0..6de6996 100644
--- a/src/ActiveMonitors.h
+++ b/src/ActiveMonitors.h
@@ -161,14 +161,14 @@ struct ActiveMonitors : NonCopyable {
 
     void installLookups(Monitor *m, uint64_t currEventId) {
         for (auto &f : m->sub.filterGroup.filters) {
-            if (f.ids.size()) {
-                for (size_t i = 0; i < f.ids.size(); i++) {
-                    auto res = allIds.try_emplace(f.ids.at(i));
+            if (f.ids) {
+                for (size_t i = 0; i < f.ids->size(); i++) {
+                    auto res = allIds.try_emplace(f.ids->at(i));
                     res.first->second.try_emplace(&f, MonitorItem{m, currEventId});
                 }
-            } else if (f.authors.size()) {
-                for (size_t i = 0; i < f.authors.size(); i++) {
-                    auto res = allAuthors.try_emplace(f.authors.at(i));
+            } else if (f.authors) {
+                for (size_t i = 0; i < f.authors->size(); i++) {
+                    auto res = allAuthors.try_emplace(f.authors->at(i));
                     res.first->second.try_emplace(&f, MonitorItem{m, currEventId});
                 }
             } else if (f.tags.size()) {
@@ -179,9 +179,9 @@ struct ActiveMonitors : NonCopyable {
                         res.first->second.try_emplace(&f, MonitorItem{m, currEventId});
                     }
                 }
-            } else if (f.kinds.size()) {
-                for (size_t i = 0; i < f.kinds.size(); i++) {
-                    auto res = allKinds.try_emplace(f.kinds.at(i));
+            } else if (f.kinds) {
+                for (size_t i = 0; i < f.kinds->size(); i++) {
+                    auto res = allKinds.try_emplace(f.kinds->at(i));
                     res.first->second.try_emplace(&f, MonitorItem{m, currEventId});
                 }
             } else {
@@ -192,17 +192,17 @@ struct ActiveMonitors : NonCopyable {
 
     void uninstallLookups(Monitor *m) {
         for (auto &f : m->sub.filterGroup.filters) {
-            if (f.ids.size()) {
-                for (size_t i = 0; i < f.ids.size(); i++) {
-                    auto &monSet = allIds.at(f.ids.at(i));
+            if (f.ids) {
+                for (size_t i = 0; i < f.ids->size(); i++) {
+                    auto &monSet = allIds.at(f.ids->at(i));
                     monSet.erase(&f);
-                    if (monSet.empty()) allIds.erase(f.ids.at(i));
+                    if (monSet.empty()) allIds.erase(f.ids->at(i));
                 }
-            } else if (f.authors.size()) {
-                for (size_t i = 0; i < f.authors.size(); i++) {
-                    auto &monSet = allAuthors.at(f.authors.at(i));
+            } else if (f.authors) {
+                for (size_t i = 0; i < f.authors->size(); i++) {
+                    auto &monSet = allAuthors.at(f.authors->at(i));
                     monSet.erase(&f);
-                    if (monSet.empty()) allAuthors.erase(f.authors.at(i));
+                    if (monSet.empty()) allAuthors.erase(f.authors->at(i));
                 }
             } else if (f.tags.size()) {
                 for (const auto &[tagName, filterSet] : f.tags) {
@@ -213,11 +213,11 @@ struct ActiveMonitors : NonCopyable {
                         if (monSet.empty()) allTags.erase(tagSpec);
                     }
                 }
-            } else if (f.kinds.size()) {
-                for (size_t i = 0; i < f.kinds.size(); i++) {
-                    auto &monSet = allKinds.at(f.kinds.at(i));
+            } else if (f.kinds) {
+                for (size_t i = 0; i < f.kinds->size(); i++) {
+                    auto &monSet = allKinds.at(f.kinds->at(i));
                     monSet.erase(&f);
-                    if (monSet.empty()) allKinds.erase(f.kinds.at(i));
+                    if (monSet.empty()) allKinds.erase(f.kinds->at(i));
                 }
             } else {
                 allOthers.erase(&f);
diff --git a/src/DBScan.h b/src/DBScan.h
index ae2f33d..b69abe2 100644
--- a/src/DBScan.h
+++ b/src/DBScan.h
@@ -57,7 +57,7 @@ struct DBScan {
     DBScan(const NostrFilter &f_) : f(f_) {
         remainingLimit = f.limit;
 
-        if (f.ids.size()) {
+        if (f.ids) {
             LI << "ID Scan";
 
             scanState = IdScan{};
@@ -65,20 +65,20 @@ struct DBScan {
             indexDbi = env.dbi_Event__id;
 
             isComplete = [&, state]{
-                return state->index >= f.ids.size();
+                return state->index >= f.ids->size();
             };
             nextFilterItem = [&, state]{
                 state->index++;
             };
             resetResume = [&, state]{
-                state->prefix = f.ids.at(state->index);
+                state->prefix = f.ids->at(state->index);
                 resumeKey = padBytes(state->prefix, 32 + 8, '\xFF');
                 resumeVal = MAX_U64;
             };
             keyMatch = [&, state](std::string_view k, bool&){
                 return k.starts_with(state->prefix);
             };
-        } else if (f.authors.size() && f.kinds.size()) {
+        } else if (f.authors && f.kinds) {
             LI << "PubkeyKind Scan";
 
             scanState = PubkeyKindScan{};
@@ -86,18 +86,18 @@ struct DBScan {
             indexDbi = env.dbi_Event__pubkeyKind;
 
             isComplete = [&, state]{
-                return state->indexAuthor >= f.authors.size();
+                return state->indexAuthor >= f.authors->size();
             };
             nextFilterItem = [&, state]{
                 state->indexKind++;
-                if (state->indexKind >= f.kinds.size()) {
+                if (state->indexKind >= f.kinds->size()) {
                     state->indexAuthor++;
                     state->indexKind = 0;
                 }
             };
             resetResume = [&, state]{
-                state->prefix = f.authors.at(state->indexAuthor);
-                if (state->prefix.size() == 32) state->prefix += lmdb::to_sv<uint64_t>(f.kinds.at(state->indexKind));
+                state->prefix = f.authors->at(state->indexAuthor);
+                if (state->prefix.size() == 32) state->prefix += lmdb::to_sv<uint64_t>(f.kinds->at(state->indexKind));
                 resumeKey = padBytes(state->prefix, 32 + 8 + 8, '\xFF');
                 resumeVal = MAX_U64;
             };
@@ -106,14 +106,14 @@ struct DBScan {
                 if (state->prefix.size() == 32 + 8) return true;
 
                 ParsedKey_StringUint64Uint64 parsedKey(k);
-                if (parsedKey.n1 <= f.kinds.at(state->indexKind)) return true;
+                if (parsedKey.n1 <= f.kinds->at(state->indexKind)) return true;
 
-                resumeKey = makeKey_StringUint64Uint64(parsedKey.s, f.kinds.at(state->indexKind), MAX_U64);
+                resumeKey = makeKey_StringUint64Uint64(parsedKey.s, f.kinds->at(state->indexKind), MAX_U64);
                 resumeVal = MAX_U64;
                 skipBack = true;
                 return false;
             };
-        } else if (f.authors.size()) {
+        } else if (f.authors) {
             LI << "Pubkey Scan";
 
             scanState = PubkeyScan{};
@@ -121,13 +121,13 @@ struct DBScan {
             indexDbi = env.dbi_Event__pubkey;
 
             isComplete = [&, state]{
-                return state->index >= f.authors.size();
+                return state->index >= f.authors->size();
             };
             nextFilterItem = [&, state]{
                 state->index++;
             };
             resetResume = [&, state]{
-                state->prefix = f.authors.at(state->index);
+                state->prefix = f.authors->at(state->index);
                 resumeKey = padBytes(state->prefix, 32 + 8, '\xFF');
                 resumeVal = MAX_U64;
             };
@@ -160,7 +160,7 @@ struct DBScan {
             keyMatch = [&, state](std::string_view k, bool&){
                 return k.substr(0, state->search.size()) == state->search;
             };
-        } else if (f.kinds.size()) {
+        } else if (f.kinds) {
             LI << "Kind Scan";
 
             scanState = KindScan{};
@@ -168,13 +168,13 @@ struct DBScan {
             indexDbi = env.dbi_Event__kind;
 
             isComplete = [&, state]{
-                return state->index >= f.kinds.size();
+                return state->index >= f.kinds->size();
             };
             nextFilterItem = [&, state]{
                 state->index++;
             };
             resetResume = [&, state]{
-                state->kind = f.kinds.at(state->index);
+                state->kind = f.kinds->at(state->index);
                 resumeKey = std::string(lmdb::to_sv<uint64_t>(state->kind)) + std::string(8, '\xFF');
                 resumeVal = MAX_U64;
             };
diff --git a/src/filters.h b/src/filters.h
index d994a01..bf7ea7b 100644
--- a/src/filters.h
+++ b/src/filters.h
@@ -15,11 +15,9 @@ struct FilterSetBytes {
     std::vector<Item> items;
     std::string buf;
 
-    FilterSetBytes() {}
-
     // Sizes are post-hex decode 
 
-    void init(const tao::json::value &arrHex, bool hexDecode, size_t minSize, size_t maxSize) {
+    FilterSetBytes(const tao::json::value &arrHex, bool hexDecode, size_t minSize, size_t maxSize) {
         std::vector<std::string> arr;
 
         uint64_t totalSize = 0;
@@ -32,8 +30,6 @@ struct FilterSetBytes {
             totalSize += itemSize;
         }
 
-        if (arr.size() == 0) throw herr("empty filter item");
-
         std::sort(arr.begin(), arr.end());
 
         for (const auto &item : arr) {
@@ -90,15 +86,11 @@ struct FilterSetBytes {
 struct FilterSetUint {
     std::vector<uint64_t> items;
 
-    FilterSetUint() {}
-
-    void init(const tao::json::value &arr) {
+    FilterSetUint(const tao::json::value &arr) {
         for (const auto &i : arr.get_array()) {
             items.push_back(i.get_unsigned());
         }
 
-        if (items.size() == 0) throw herr("empty filter item");
-
         std::sort(items.begin(), items.end());
 
         items.erase(std::unique(items.begin(), items.end()), items.end()); // remove duplicates
@@ -119,39 +111,44 @@ struct FilterSetUint {
 };
 
 struct NostrFilter {
-    FilterSetBytes ids;
-    FilterSetBytes authors;
-    FilterSetUint kinds;
+    std::optional<FilterSetBytes> ids;
+    std::optional<FilterSetBytes> authors;
+    std::optional<FilterSetUint> kinds;
     std::map<char, FilterSetBytes> tags;
 
     uint64_t since = 0;
     uint64_t until = MAX_U64;
     uint64_t limit = MAX_U64;
+    bool neverMatch = false;
     bool indexOnlyScans = false;
 
     explicit NostrFilter(const tao::json::value &filterObj) {
         uint64_t numMajorFields = 0;
 
         for (const auto &[k, v] : filterObj.get_object()) {
+            if (v.is_array() && v.get_array().size() == 0) {
+                neverMatch = true;
+                break;
+            }
+
             if (k == "ids") {
-                ids.init(v, true, 1, 32);
+                ids.emplace(v, true, 1, 32);
                 numMajorFields++;
             } else if (k == "authors") {
-                authors.init(v, true, 1, 32);
+                authors.emplace(v, true, 1, 32);
                 numMajorFields++;
             } else if (k == "kinds") {
-                kinds.init(v);
+                kinds.emplace(v);
                 numMajorFields++;
             } else if (k.starts_with('#')) {
                 numMajorFields++;
                 if (k.size() == 2) {
                     char tag = k[1];
-                    auto [it, _] = tags.emplace(tag, FilterSetBytes{});
 
                     if (tag == 'p' || tag == 'e') {
-                        it->second.init(v, true, 32, 32);
+                        tags.emplace(tag, FilterSetBytes(v, true, 32, 32));
                     } else {
-                        it->second.init(v, false, 1, cfg().events__maxTagValSize);
+                        tags.emplace(tag, FilterSetBytes(v, false, 1, cfg().events__maxTagValSize));
                     }
                 } else {
                     throw herr("unindexed tag filter");
@@ -182,11 +179,13 @@ struct NostrFilter {
     }
 
     bool doesMatch(const NostrIndex::Event *ev) const {
+        if (neverMatch) return false;
+
         if (!doesMatchTimes(ev->created_at())) return false;
 
-        if (ids.size() && !ids.doesMatch(sv(ev->id()))) return false;
-        if (authors.size() && !authors.doesMatch(sv(ev->pubkey()))) return false;
-        if (kinds.size() && !kinds.doesMatch(ev->kind())) return false;
+        if (ids && !ids->doesMatch(sv(ev->id()))) return false;
+        if (authors && !authors->doesMatch(sv(ev->pubkey()))) return false;
+        if (kinds && !kinds->doesMatch(ev->kind())) return false;
 
         for (const auto &[tag, filt] : tags) {
             bool foundMatch = false;
@@ -216,6 +215,7 @@ struct NostrFilterGroup {
 
         for (size_t i = 2; i < arr.size(); i++) {
             filters.emplace_back(arr[i]);
+            if (filters.back().neverMatch) filters.pop_back();
         }
     }
 
diff --git a/test/filterFuzzTest.pl b/test/filterFuzzTest.pl
index f6dff43..eab026d 100644
--- a/test/filterFuzzTest.pl
+++ b/test/filterFuzzTest.pl
@@ -96,31 +96,36 @@ sub genRandomFilterGroup {
 
         while (!keys %$f) {
             if (rand() < .15) {
-                for (1..(rand()*10) + 1) {
+                $f->{ids} = [];
+                for (1..(rand()*10)) {
                     push @{$f->{ids}}, randPrefix($ids->[int(rand() * @$ids)]);
                 }
             }
 
             if (rand() < .3) {
-                for (1..(rand()*5) + 1) {
+                $f->{authors} = [];
+                for (1..(rand()*5)) {
                     push @{$f->{authors}}, randPrefix($pubkeys->[int(rand() * @$pubkeys)]);
                 }
             }
 
             if (rand() < .2) {
-                for (1..(rand()*5) + 1) {
+                $f->{kinds} = [];
+                for (1..(rand()*5)) {
                     push @{$f->{kinds}}, 0+$kinds->[int(rand() * @$kinds)];
                 }
             }
 
             if (rand() < .2) {
-                for (1..(rand()*10) + 1) {
+                $f->{'#e'} = [];
+                for (1..(rand()*10)) {
                     push @{$f->{'#e'}}, $ids->[int(rand() * @$ids)];
                 }
             }
 
             if (rand() < .2) {
-                for (1..(rand()*5) + 1) {
+                $f->{'#p'} = [];
+                for (1..(rand()*5)) {
                     push @{$f->{'#p'}}, $pubkeys->[int(rand() * @$pubkeys)];
                 }
             }