flat_merge to rust

2023-09-11 14:27:39 +01:00 · 2023-09-11 14:27:39 +01:00 · 4dfc6525a1
commit 4dfc6525a1
parent d67beb8dbf
11 changed files with 423 additions and 234 deletions
--- a/packages/system-query/Cargo.toml
+++ b/packages/system-query/Cargo.toml
@ -9,11 +9,11 @@ crate-type = ["cdylib"]

 [dependencies]
 itertools = "0.11.0"
-rand = "0.8.5"
 serde = { version = "1.0.188", features = ["derive"] }
 serde-wasm-bindgen = "0.5.0"
 wasm-bindgen = "0.2.87"
-serde_json = "1.0.105"

 [dev-dependencies]
+rand = "0.8.5"
 wasm-bindgen-test = "0.3.37"
+serde_json = "1.0.105"
--- a/packages/system-query/pkg/system_query.d.ts
+++ b/packages/system-query/pkg/system_query.d.ts
@ -17,6 +17,11 @@ export function expand_filter(val: any): any;
 * @returns {any}
 */
 export function get_diff(prev: any, next: any): any;
+/**
+* @param {any} val
+* @returns {any}
+*/
+export function flat_merge(val: any): any;

 export type InitInput = RequestInfo | URL | Response | BufferSource | WebAssembly.Module;

@ -25,6 +30,7 @@ export interface InitOutput {
  readonly diff_filters: (a: number, b: number, c: number) => void;
  readonly expand_filter: (a: number, b: number) => void;
  readonly get_diff: (a: number, b: number, c: number) => void;
+  readonly flat_merge: (a: number, b: number) => void;
  readonly __wbindgen_malloc: (a: number, b: number) => number;
  readonly __wbindgen_realloc: (a: number, b: number, c: number, d: number) => number;
  readonly __wbindgen_add_to_stack_pointer: (a: number) => number;
--- a/packages/system-query/pkg/system_query.js
+++ b/packages/system-query/pkg/system_query.js
@ -250,6 +250,26 @@ export function get_diff(prev, next) {
    }
 }

+/**
+* @param {any} val
+* @returns {any}
+*/
+export function flat_merge(val) {
+    try {
+        const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
+        wasm.flat_merge(retptr, addHeapObject(val));
+        var r0 = getInt32Memory0()[retptr / 4 + 0];
+        var r1 = getInt32Memory0()[retptr / 4 + 1];
+        var r2 = getInt32Memory0()[retptr / 4 + 2];
+        if (r2) {
+            throw takeObject(r1);
+        }
+        return takeObject(r0);
+    } finally {
+        wasm.__wbindgen_add_to_stack_pointer(16);
+    }
+}
+
 function handleError(f, args) {
    try {
        return f.apply(this, args);
--- a/packages/system-query/pkg/system_query_bg.wasm
+++ b/packages/system-query/pkg/system_query_bg.wasm
--- a/packages/system-query/pkg/system_query_bg.wasm.d.ts
+++ b/packages/system-query/pkg/system_query_bg.wasm.d.ts
@ -4,6 +4,7 @@ export const memory: WebAssembly.Memory;
 export function diff_filters(a: number, b: number, c: number): void;
 export function expand_filter(a: number, b: number): void;
 export function get_diff(a: number, b: number, c: number): void;
+export function flat_merge(a: number, b: number): void;
 export function __wbindgen_malloc(a: number, b: number): number;
 export function __wbindgen_realloc(a: number, b: number, c: number, d: number): number;
 export function __wbindgen_add_to_stack_pointer(a: number): number;
--- a/packages/system-query/src/expand.rs
+++ b/packages/system-query/src/expand.rs
@ -161,14 +161,19 @@ pub fn expand_filter(filter: &ReqFilter) -> Vec<FlatReqFilter> {
 mod tests {
    use super::*;
    use crate::ReqFilter;
+    use std::collections::HashSet;

    #[test]
    fn test_expand_filter() {
        let input = ReqFilter {
-            authors: Some(vec!["a".to_owned(), "b".to_owned(), "c".to_owned()]),
-            kinds: Some(vec![1, 2, 3]),
-            ids: Some(vec!["x".to_owned(), "y".to_owned()]),
-            p_tag: Some(vec!["a".to_owned()]),
+            authors: Some(HashSet::from([
+                "a".to_owned(),
+                "b".to_owned(),
+                "c".to_owned(),
+            ])),
+            kinds: Some(HashSet::from([1, 2, 3])),
+            ids: Some(HashSet::from(["x".to_owned(), "y".to_owned()])),
+            p_tag: Some(HashSet::from(["a".to_owned()])),
            t_tag: None,
            d_tag: None,
            r_tag: None,
--- a/packages/system-query/src/filter.rs
+++ b/packages/system-query/src/filter.rs
@ -0,0 +1,287 @@
+use serde::{Deserialize, Serialize};
+use std::collections::HashSet;
+#[cfg(test)]
+use std::fmt::Debug;
+use std::hash::Hash;
+
+#[derive(PartialEq, Clone, Serialize, Deserialize)]
+pub struct ReqFilter {
+    #[serde(rename = "ids", skip_serializing_if = "Option::is_none")]
+    pub ids: Option<HashSet<String>>,
+    #[serde(rename = "authors", skip_serializing_if = "Option::is_none")]
+    pub authors: Option<HashSet<String>>,
+    #[serde(rename = "kinds", skip_serializing_if = "Option::is_none")]
+    pub kinds: Option<HashSet<i32>>,
+    #[serde(rename = "#e", skip_serializing_if = "Option::is_none")]
+    pub e_tag: Option<HashSet<String>>,
+    #[serde(rename = "#p", skip_serializing_if = "Option::is_none")]
+    pub p_tag: Option<HashSet<String>>,
+    #[serde(rename = "#t", skip_serializing_if = "Option::is_none")]
+    pub t_tag: Option<HashSet<String>>,
+    #[serde(rename = "#d", skip_serializing_if = "Option::is_none")]
+    pub d_tag: Option<HashSet<String>>,
+    #[serde(rename = "#r", skip_serializing_if = "Option::is_none")]
+    pub r_tag: Option<HashSet<String>>,
+    #[serde(rename = "search", skip_serializing_if = "Option::is_none")]
+    pub search: Option<HashSet<String>>,
+    #[serde(rename = "since", skip_serializing_if = "Option::is_none")]
+    pub since: Option<i32>,
+    #[serde(rename = "until", skip_serializing_if = "Option::is_none")]
+    pub until: Option<i32>,
+    #[serde(rename = "limit", skip_serializing_if = "Option::is_none")]
+    pub limit: Option<i32>,
+}
+
+#[cfg(test)]
+impl Debug for ReqFilter {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.write_str(&serde_json::to_string(self).unwrap().to_owned())
+    }
+}
+
+#[derive(PartialEq, PartialOrd, Clone, Serialize, Deserialize)]
+pub struct FlatReqFilter {
+    #[serde(rename = "ids", skip_serializing_if = "Option::is_none")]
+    pub id: Option<String>,
+    #[serde(rename = "authors", skip_serializing_if = "Option::is_none")]
+    pub author: Option<String>,
+    #[serde(rename = "kinds", skip_serializing_if = "Option::is_none")]
+    pub kind: Option<i32>,
+    #[serde(rename = "#e", skip_serializing_if = "Option::is_none")]
+    pub e_tag: Option<String>,
+    #[serde(rename = "#p", skip_serializing_if = "Option::is_none")]
+    pub p_tag: Option<String>,
+    #[serde(rename = "#t", skip_serializing_if = "Option::is_none")]
+    pub t_tag: Option<String>,
+    #[serde(rename = "#d", skip_serializing_if = "Option::is_none")]
+    pub d_tag: Option<String>,
+    #[serde(rename = "#r", skip_serializing_if = "Option::is_none")]
+    pub r_tag: Option<String>,
+    #[serde(rename = "search", skip_serializing_if = "Option::is_none")]
+    pub search: Option<String>,
+    #[serde(rename = "since", skip_serializing_if = "Option::is_none")]
+    pub since: Option<i32>,
+    #[serde(rename = "until", skip_serializing_if = "Option::is_none")]
+    pub until: Option<i32>,
+    #[serde(rename = "limit", skip_serializing_if = "Option::is_none")]
+    pub limit: Option<i32>,
+}
+
+#[cfg(test)]
+impl Debug for FlatReqFilter {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.write_str(&serde_json::to_string(self).unwrap().to_owned())
+    }
+}
+
+pub trait Distance {
+    /// Calculate the distance in terms of similarity for merging
+    ///
+    /// The goal of this function is to find 2 filters which are very similar where
+    /// one filter may have a single property change like so:
+    ///
+    /// ```javascript
+    /// const a = { "kinds": 1, "authors": "a", "since": 99 };
+    /// const b = { "kinds": 1, "authors": "b", "since": 99 };
+    /// ```
+    /// In this case these 2 filters could be merged because their distance is `1`
+    /// ```javascript
+    /// const result = { "kinds": [1], "authors": ["a", "b"], "since": 99 };
+    /// ```
+    fn distance(&self, other: &Self) -> u32;
+}
+
+pub trait CanMerge {
+    fn can_merge(&self, other: &Self) -> bool;
+}
+
+impl Distance for FlatReqFilter {
+    fn distance(&self, b: &Self) -> u32 {
+        let mut ret = 0u32;
+
+        ret += prop_dist(&self.id, &b.id);
+        ret += prop_dist(&self.kind, &b.kind);
+        ret += prop_dist(&self.author, &b.author);
+        ret += prop_dist(&self.e_tag, &b.e_tag);
+        ret += prop_dist(&self.p_tag, &b.p_tag);
+        ret += prop_dist(&self.d_tag, &b.d_tag);
+        ret += prop_dist(&self.r_tag, &b.r_tag);
+        ret += prop_dist(&self.t_tag, &b.t_tag);
+        ret += prop_dist(&self.search, &b.search);
+
+        ret
+    }
+}
+
+impl CanMerge for FlatReqFilter {
+    fn can_merge(&self, other: &Self) -> bool {
+        if self.since != other.since
+            || self.until != other.until
+            || self.limit != other.limit
+            || self.search != other.search
+        {
+            return false;
+        }
+
+        self.distance(other) <= 1
+    }
+}
+
+impl From<Vec<&FlatReqFilter>> for ReqFilter {
+    fn from(value: Vec<&FlatReqFilter>) -> Self {
+        let ret = ReqFilter {
+            ids: None,
+            authors: None,
+            kinds: None,
+            e_tag: None,
+            p_tag: None,
+            t_tag: None,
+            d_tag: None,
+            r_tag: None,
+            search: None,
+            since: None,
+            until: None,
+            limit: None,
+        };
+        value.iter().fold(ret, |mut acc, x| {
+            array_prop_append(&x.id, &mut acc.ids);
+            array_prop_append(&x.author, &mut acc.authors);
+            array_prop_append(&x.kind, &mut acc.kinds);
+            array_prop_append(&x.e_tag, &mut acc.e_tag);
+            array_prop_append(&x.p_tag, &mut acc.p_tag);
+            array_prop_append(&x.t_tag, &mut acc.t_tag);
+            array_prop_append(&x.d_tag, &mut acc.d_tag);
+            array_prop_append(&x.r_tag, &mut acc.r_tag);
+            array_prop_append(&x.search, &mut acc.search);
+            acc.since = x.since;
+            acc.until = x.until;
+            acc.limit = x.limit;
+
+            acc
+        })
+    }
+}
+
+impl From<Vec<&ReqFilter>> for ReqFilter {
+    fn from(value: Vec<&ReqFilter>) -> Self {
+        let ret = ReqFilter {
+            ids: None,
+            authors: None,
+            kinds: None,
+            e_tag: None,
+            p_tag: None,
+            t_tag: None,
+            d_tag: None,
+            r_tag: None,
+            search: None,
+            since: None,
+            until: None,
+            limit: None,
+        };
+        value.iter().fold(ret, |mut acc, x| {
+            array_prop_append_vec(&x.ids, &mut acc.ids);
+            array_prop_append_vec(&x.authors, &mut acc.authors);
+            array_prop_append_vec(&x.kinds, &mut acc.kinds);
+            array_prop_append_vec(&x.e_tag, &mut acc.e_tag);
+            array_prop_append_vec(&x.p_tag, &mut acc.p_tag);
+            array_prop_append_vec(&x.t_tag, &mut acc.t_tag);
+            array_prop_append_vec(&x.d_tag, &mut acc.d_tag);
+            array_prop_append_vec(&x.r_tag, &mut acc.r_tag);
+            array_prop_append_vec(&x.search, &mut acc.search);
+            acc.since = x.since;
+            acc.until = x.until;
+            acc.limit = x.limit;
+
+            acc
+        })
+    }
+}
+
+impl Distance for ReqFilter {
+    fn distance(&self, b: &Self) -> u32 {
+        let mut ret = 0u32;
+
+        ret += prop_dist_vec(&self.ids, &b.ids);
+        ret += prop_dist_vec(&self.kinds, &b.kinds);
+        ret += prop_dist_vec(&self.authors, &b.authors);
+        ret += prop_dist_vec(&self.e_tag, &b.e_tag);
+        ret += prop_dist_vec(&self.p_tag, &b.p_tag);
+        ret += prop_dist_vec(&self.d_tag, &b.d_tag);
+        ret += prop_dist_vec(&self.r_tag, &b.r_tag);
+        ret += prop_dist_vec(&self.t_tag, &b.t_tag);
+        ret += prop_dist_vec(&self.search, &b.search);
+
+        ret
+    }
+}
+
+impl CanMerge for ReqFilter {
+    fn can_merge(&self, other: &Self) -> bool {
+        if self.since != other.since
+            || self.until != other.until
+            || self.limit != other.limit
+            || self.search != other.search
+        {
+            return false;
+        }
+
+        self.distance(other) <= 1
+    }
+}
+
+#[inline(always)]
+fn prop_dist<T: Eq>(a: &Option<T>, b: &Option<T>) -> u32 {
+    if (a.is_some() && b.is_none()) || (a.is_none() && b.is_some()) {
+        return 10;
+    } else if a.is_some() && a != b {
+        return 1;
+    }
+    0
+}
+
+#[inline(always)]
+fn prop_dist_vec<T: Eq + Hash>(a: &Option<HashSet<T>>, b: &Option<HashSet<T>>) -> u32 {
+    if (a.is_some() && b.is_none()) || (a.is_none() && b.is_some()) {
+        return 10;
+    }
+    match (a, b) {
+        (Some(aa), Some(bb)) => {
+            if aa.len() != bb.len() {
+                1
+            } else if aa == bb {
+                0
+            } else {
+                1
+            }
+        }
+        (None, None) => 0,
+        _ => panic!("Should not reach here!"),
+    }
+}
+
+#[inline(always)]
+fn array_prop_append<T: Clone + Eq + Hash>(val: &Option<T>, arr: &mut Option<HashSet<T>>) {
+    if let Some(ap) = val {
+        if arr.is_none() {
+            *arr = Some(HashSet::from([ap.clone()]))
+        } else {
+            arr.as_mut().unwrap().insert(ap.clone());
+        }
+    }
+}
+
+#[inline(always)]
+fn array_prop_append_vec<T: Clone + Eq + Hash>(
+    val: &Option<HashSet<T>>,
+    arr: &mut Option<HashSet<T>>,
+) {
+    if let Some(ap) = val {
+        if arr.is_none() {
+            *arr = Some(ap.clone())
+        } else {
+            ap.iter().for_each(|v| {
+                arr.as_mut().unwrap().insert((*v).clone());
+            });
+        }
+    }
+}
--- a/packages/system-query/src/lib.rs
+++ b/packages/system-query/src/lib.rs
@ -1,79 +1,11 @@
-use std::fmt::{Debug};
-use serde::{Deserialize, Serialize};
+use crate::filter::{FlatReqFilter, ReqFilter};
 use wasm_bindgen::prelude::*;

 mod diff;
 mod expand;
+mod filter;
 mod merge;

-#[derive(PartialEq, Clone, Serialize, Deserialize)]
-pub struct ReqFilter {
-    #[serde(rename = "ids", skip_serializing_if = "Option::is_none")]
-    pub ids: Option<Vec<String>>,
-    #[serde(rename = "authors", skip_serializing_if = "Option::is_none")]
-    pub authors: Option<Vec<String>>,
-    #[serde(rename = "kinds", skip_serializing_if = "Option::is_none")]
-    pub kinds: Option<Vec<i32>>,
-    #[serde(rename = "#e", skip_serializing_if = "Option::is_none")]
-    pub e_tag: Option<Vec<String>>,
-    #[serde(rename = "#p", skip_serializing_if = "Option::is_none")]
-    pub p_tag: Option<Vec<String>>,
-    #[serde(rename = "#t", skip_serializing_if = "Option::is_none")]
-    pub t_tag: Option<Vec<String>>,
-    #[serde(rename = "#d", skip_serializing_if = "Option::is_none")]
-    pub d_tag: Option<Vec<String>>,
-    #[serde(rename = "#r", skip_serializing_if = "Option::is_none")]
-    pub r_tag: Option<Vec<String>>,
-    #[serde(rename = "search", skip_serializing_if = "Option::is_none")]
-    pub search: Option<Vec<String>>,
-    #[serde(rename = "since", skip_serializing_if = "Option::is_none")]
-    pub since: Option<i32>,
-    #[serde(rename = "until", skip_serializing_if = "Option::is_none")]
-    pub until: Option<i32>,
-    #[serde(rename = "limit", skip_serializing_if = "Option::is_none")]
-    pub limit: Option<i32>,
-}
-
-impl Debug for ReqFilter {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        f.write_str(&serde_json::to_string(self).unwrap().to_owned())
-    }
-}
-
-#[derive(PartialEq, Clone, Serialize, Deserialize)]
-pub struct FlatReqFilter {
-    #[serde(rename = "ids", skip_serializing_if = "Option::is_none")]
-    id: Option<String>,
-    #[serde(rename = "authors", skip_serializing_if = "Option::is_none")]
-    author: Option<String>,
-    #[serde(rename = "kinds", skip_serializing_if = "Option::is_none")]
-    kind: Option<i32>,
-    #[serde(rename = "#e", skip_serializing_if = "Option::is_none")]
-    e_tag: Option<String>,
-    #[serde(rename = "#p", skip_serializing_if = "Option::is_none")]
-    p_tag: Option<String>,
-    #[serde(rename = "#t", skip_serializing_if = "Option::is_none")]
-    t_tag: Option<String>,
-    #[serde(rename = "#d", skip_serializing_if = "Option::is_none")]
-    d_tag: Option<String>,
-    #[serde(rename = "#r", skip_serializing_if = "Option::is_none")]
-    r_tag: Option<String>,
-    #[serde(rename = "search", skip_serializing_if = "Option::is_none")]
-    search: Option<String>,
-    #[serde(rename = "since", skip_serializing_if = "Option::is_none")]
-    since: Option<i32>,
-    #[serde(rename = "until", skip_serializing_if = "Option::is_none")]
-    until: Option<i32>,
-    #[serde(rename = "limit", skip_serializing_if = "Option::is_none")]
-    limit: Option<i32>,
-}
-
-impl Debug for FlatReqFilter {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        f.write_str(&serde_json::to_string(self).unwrap().to_owned())
-    }
-}
-
 #[wasm_bindgen]
 pub fn diff_filters(prev: JsValue, next: JsValue) -> Result<JsValue, JsValue> {
    let prev_parsed: Vec<FlatReqFilter> = serde_wasm_bindgen::from_value(prev)?;
@ -108,7 +40,7 @@ pub fn get_diff(prev: JsValue, next: JsValue) -> Result<JsValue, JsValue> {
 #[wasm_bindgen]
 pub fn flat_merge(val: JsValue) -> Result<JsValue, JsValue> {
    let val_parsed: Vec<FlatReqFilter> = serde_wasm_bindgen::from_value(val)?;
-    let result = merge::flat_merge(&val_parsed);
+    let result = merge::merge::<FlatReqFilter, ReqFilter>(val_parsed.iter().collect());
    Ok(serde_wasm_bindgen::to_value(&result)?)
 }

@ -117,25 +49,26 @@ mod tests {
    use super::*;
    use itertools::Itertools;
    use std::cmp::Ordering;
+    use std::collections::HashSet;

    #[test]
    fn flat_merge_expanded() {
        let input = vec![
            ReqFilter {
                ids: None,
-                kinds: Some(vec![1, 6969, 6]),
+                kinds: Some(HashSet::from([1, 6969, 6])),
                e_tag: None,
                p_tag: None,
                t_tag: None,
                d_tag: None,
                r_tag: None,
-                authors: Some(vec![
+                authors: Some(HashSet::from([
                    "kieran".to_string(),
                    "snort".to_string(),
                    "c".to_string(),
                    "d".to_string(),
                    "e".to_string(),
-                ]),
+                ])),
                since: Some(1),
                until: Some(100),
                search: None,
@ -143,7 +76,7 @@ mod tests {
            },
            ReqFilter {
                ids: None,
-                kinds: Some(vec![4]),
+                kinds: Some(HashSet::from([4])),
                e_tag: None,
                p_tag: None,
                t_tag: None,
@ -152,15 +85,15 @@ mod tests {
                search: None,
                since: None,
                until: None,
-                authors: Some(vec!["kieran".to_string()]),
+                authors: Some(HashSet::from(["kieran".to_string()])),
                limit: None,
            },
            ReqFilter {
                ids: None,
                authors: None,
-                kinds: Some(vec![4]),
+                kinds: Some(HashSet::from([4])),
                e_tag: None,
-                p_tag: Some(vec!["kieran".to_string()]),
+                p_tag: Some(HashSet::from(["kieran".to_string()])),
                t_tag: None,
                d_tag: None,
                r_tag: None,
@ -171,9 +104,9 @@ mod tests {
            },
            ReqFilter {
                ids: None,
-                kinds: Some(vec![1000]),
-                authors: Some(vec!["snort".to_string()]),
-                p_tag: Some(vec!["kieran".to_string()]),
+                kinds: Some(HashSet::from([1000])),
+                authors: Some(HashSet::from(["snort".to_string()])),
+                p_tag: Some(HashSet::from(["kieran".to_string()])),
                t_tag: None,
                d_tag: None,
                r_tag: None,
@ -196,7 +129,8 @@ mod tests {
                }
            })
            .collect_vec();
-        let expanded_flat = merge::flat_merge(&expanded);
-        assert_eq!(expanded_flat, input);
+        let merged_expanded: Vec<ReqFilter> = merge::merge(expanded.iter().collect());
+        assert_eq!(merged_expanded.len(), input.len());
+        assert!(merged_expanded.iter().all(|v| input.contains(v)));
    }
 }
--- a/packages/system-query/src/merge.rs
+++ b/packages/system-query/src/merge.rs
@ -1,134 +1,59 @@
-use crate::{FlatReqFilter, ReqFilter};
-use itertools::Itertools;
-use std::cmp::Ordering;
+use crate::filter::CanMerge;

-pub fn flat_merge(all: &Vec<FlatReqFilter>) -> Vec<ReqFilter> {
-    let mut ret: Vec<ReqFilter> = vec![];
-
-    let merge_sets: Vec<Vec<&FlatReqFilter>> = vec![vec![all.first().unwrap()]];
-    let merge_sets = all
-        .iter()
-        .skip(1)
-        .sorted_by(|a, b| match distance(&a, &b) {
-            0 => Ordering::Equal,
-            1 => Ordering::Less,
-            _ => Ordering::Greater,
-        })
-        .fold(merge_sets, |mut acc, x| {
-            let mut did_match = false;
-            for y in acc.iter_mut() {
-                if y.iter().all(|z| can_merge_filters(z, x)) {
-                    y.push(x);
-                    did_match = true;
-                    break;
-                }
-            }
-            if !did_match {
-                acc.push(vec![x]);
-            }
-            acc
-        });
-
-    for s in merge_sets.iter() {
-        ret.push(merge_set(s));
-    }
-    ret
-}
-
-fn merge_set(set: &Vec<&FlatReqFilter>) -> ReqFilter {
-    let ret = ReqFilter {
-        ids: None,
-        authors: None,
-        kinds: None,
-        e_tag: None,
-        p_tag: None,
-        t_tag: None,
-        d_tag: None,
-        r_tag: None,
-        search: None,
-        since: None,
-        until: None,
-        limit: None,
-    };
-    set.iter().fold(ret, |mut acc, x| {
-        array_prop_append(&x.id, &mut acc.ids);
-        array_prop_append(&x.author, &mut acc.authors);
-        array_prop_append(&x.kind, &mut acc.kinds);
-        array_prop_append(&x.e_tag, &mut acc.e_tag);
-        array_prop_append(&x.p_tag, &mut acc.p_tag);
-        array_prop_append(&x.t_tag, &mut acc.t_tag);
-        array_prop_append(&x.d_tag, &mut acc.d_tag);
-        array_prop_append(&x.r_tag, &mut acc.r_tag);
-        array_prop_append(&x.search, &mut acc.search);
-        acc.since = x.since;
-        acc.until = x.until;
-        acc.limit = x.limit;
-
-        acc
-    })
-}
-
-fn can_merge_filters(a: &FlatReqFilter, b: &FlatReqFilter) -> bool {
-    if a.since != b.since || a.until != b.until || a.limit != b.limit || a.search != b.search {
-        return false;
-    }
-
-    distance(a, b) <= 1
-}
-
-/// Calculate the distance in terms of similarity for merging
-///
-/// The goal of this function is to find 2 filters which are very similar where
-/// one filter may have a single property change like so:
-///
-/// ```javascript
-/// const a = { "kinds": 1, "authors": "a", "since": 99 };
-/// const b = { "kinds": 1, "authors": "b", "since": 99 };
-/// ```
-/// In this case these 2 filters could be merged because their distance is `1`
-/// ```javascript
-/// const result = { "kinds": [1], "authors": ["a", "b"], "since": 99 };
-/// ```
-fn distance(a: &FlatReqFilter, b: &FlatReqFilter) -> u32 {
-    let mut ret = 0u32;
-
-    ret += prop_dist(&a.id, &b.id);
-    ret += prop_dist(&a.kind, &b.kind);
-    ret += prop_dist(&a.author, &b.author);
-    ret += prop_dist(&a.e_tag, &b.e_tag);
-    ret += prop_dist(&a.p_tag, &b.p_tag);
-    ret += prop_dist(&a.d_tag, &b.d_tag);
-    ret += prop_dist(&a.r_tag, &b.r_tag);
-    ret += prop_dist(&a.t_tag, &b.t_tag);
-    ret += prop_dist(&a.search, &b.search);
-
-    ret
-}
-
-#[inline(always)]
-fn prop_dist<T: Eq>(a: &Option<T>, b: &Option<T>) -> u32 {
-    if (a.is_some() && b.is_none()) || (a.is_none() && b.is_some()) {
-        return 10;
-    } else if a.is_some() && a != b {
-        return 1;
-    }
-    0
-}
-
-#[inline(always)]
-fn array_prop_append<T: Clone + Eq>(val: &Option<T>, arr: &mut Option<Vec<T>>) {
-    if let Some(ap) = val {
-        if arr.is_none() {
-            *arr = Some(vec![ap.clone()])
-        } else if !arr.as_ref().unwrap().contains(ap) {
-            arr.as_mut().unwrap().push(ap.clone());
+pub fn merge<'a, T, Z>(all: Vec<&'a T>) -> Vec<Z>
+where
+    T: CanMerge,
+    for<'b> Z: CanMerge + From<Vec<&'a T>> + From<Vec<&'b Z>>,
+{
+    let mut ret: Vec<Z> = merge_once(all);
+    loop {
+        let last_len = ret.len();
+        ret = merge_once(ret.iter().collect());
+        if last_len == ret.len() {
+            break;
        }
    }
+    ret
+}
+
+fn merge_once<'a, T, Z>(all: Vec<&'a T>) -> Vec<Z>
+where
+    T: CanMerge,
+    for<'b> Z: CanMerge + From<Vec<&'a T>> + From<Vec<&'b Z>>,
+{
+    let mut ret: Vec<Z> = vec![];
+    if all.is_empty() {
+        return ret;
+    }
+
+    let merge_sets: Vec<Vec<&T>> = vec![vec![all.first().unwrap()]];
+    let merge_sets = all.iter().skip(1).fold(merge_sets, |mut acc, x| {
+        let mut did_match = false;
+        for y in acc.iter_mut() {
+            if y.iter().all(|z| z.can_merge(x)) {
+                y.push(x);
+                did_match = true;
+                break;
+            }
+        }
+        if !did_match {
+            acc.push(vec![x]);
+        }
+        acc
+    });
+
+    for s in merge_sets {
+        ret.push(Z::from(s));
+    }
+
+    ret
 }

 #[cfg(test)]
 mod tests {
    use super::*;
+    use crate::filter::{Distance, FlatReqFilter, ReqFilter};
+    use std::collections::HashSet;

    #[test]
    fn distance() {
@ -202,10 +127,10 @@ mod tests {
            until: None,
            limit: None,
        };
-        assert_eq!(super::distance(&a, &b), 0);
-        assert_eq!(super::distance(&a, &c), 1);
-        assert_eq!(super::distance(&a, &d), 10);
-        assert_eq!(super::distance(&a, &e), 11);
+        assert_eq!(a.distance(&b), 0);
+        assert_eq!(a.distance(&c), 1);
+        assert_eq!(a.distance(&d), 10);
+        assert_eq!(a.distance(&e), 11);
    }

    #[test]
@ -240,8 +165,8 @@ mod tests {
        };

        let output = ReqFilter {
-            ids: Some(vec!["0".to_owned()]),
-            authors: Some(vec!["a".to_owned(), "b".to_owned()]),
+            ids: Some(HashSet::from(["0".to_owned()])),
+            authors: Some(HashSet::from(["a".to_owned(), "b".to_owned()])),
            kinds: None,
            e_tag: None,
            p_tag: None,
@ -253,7 +178,7 @@ mod tests {
            until: None,
            limit: Some(10),
        };
-        assert_eq!(super::merge_set(&vec![&a, &b]), output);
+        assert_eq!(ReqFilter::from(vec![&a, &b]), output);
    }

    #[test]
@ -300,8 +225,8 @@ mod tests {
            until: None,
            limit: Some(100),
        };
-        assert!(super::can_merge_filters(&a, &b));
-        assert!(!super::can_merge_filters(&b, &c));
+        assert!(&a.can_merge(&b));
+        assert!(!&b.can_merge(&c));
    }

    #[test]
@ -436,8 +361,12 @@ mod tests {
        ];
        let output = vec![
            ReqFilter {
-                ids: Some(vec!["0".to_owned()]),
-                authors: Some(vec!["a".to_owned(), "b".to_owned(), "c".to_owned()]),
+                ids: Some(HashSet::from(["0".to_owned()])),
+                authors: Some(HashSet::from([
+                    "a".to_owned(),
+                    "b".to_owned(),
+                    "c".to_owned(),
+                ])),
                kinds: None,
                e_tag: None,
                p_tag: None,
@ -452,7 +381,7 @@ mod tests {
            ReqFilter {
                ids: None,
                authors: None,
-                kinds: Some(vec![1, 2]),
+                kinds: Some(HashSet::from([1, 2])),
                e_tag: None,
                p_tag: None,
                t_tag: None,
@ -465,8 +394,8 @@ mod tests {
            },
            ReqFilter {
                ids: None,
-                authors: Some(vec!["c".to_owned()]),
-                kinds: Some(vec![1]),
+                authors: Some(HashSet::from(["c".to_owned()])),
+                kinds: Some(HashSet::from([1])),
                e_tag: None,
                p_tag: None,
                t_tag: None,
@ -479,7 +408,7 @@ mod tests {
            },
            ReqFilter {
                ids: None,
-                authors: Some(vec!["c".to_owned()]),
+                authors: Some(HashSet::from(["c".to_owned()])),
                kinds: None,
                e_tag: None,
                p_tag: None,
@ -492,8 +421,8 @@ mod tests {
                limit: Some(100),
            },
            ReqFilter {
-                ids: Some(vec!["1".to_owned()]),
-                authors: Some(vec!["c".to_owned()]),
+                ids: Some(HashSet::from(["1".to_owned()])),
+                authors: Some(HashSet::from(["c".to_owned()])),
                kinds: None,
                e_tag: None,
                p_tag: None,
@ -507,6 +436,9 @@ mod tests {
            },
        ];

-        assert_eq!(super::flat_merge(&input), output)
+        assert_eq!(
+            merge::<FlatReqFilter, ReqFilter>(input.iter().collect()),
+            output
+        )
    }
 }
--- a/packages/system/src/nostr-system.ts
+++ b/packages/system/src/nostr-system.ts
@ -4,7 +4,7 @@ import { unwrap, sanitizeRelayUrl, ExternalStore, FeedCache } from "@snort/share
 import { NostrEvent, TaggedNostrEvent } from "./nostr";
 import { AuthHandler, Connection, RelaySettings, ConnectionStateSnapshot } from "./connection";
 import { Query } from "./query";
-import { NoteCollection, NoteStore, NoteStoreHook, NoteStoreSnapshotData } from "./note-collection";
+import { NoteCollection, NoteStore, NoteStoreSnapshotData } from "./note-collection";
 import { BuiltRawReqFilter, RequestBuilder } from "./request-builder";
 import { RelayMetricHandler } from "./relay-metric-handler";
 import {
@ -258,8 +258,12 @@ export class NostrSystem extends ExternalStore<SystemSnapshot> implements System
      const filters = req.build(this.#relayCache);
      const q = new Query(req.id, req.instance, store, req.options?.leaveOpen);
      if (filters.some(a => a.filters.some(b => b.ids))) {
+        const expectIds = new Set(filters.flatMap(a => a.filters).flatMap(a => a.ids ?? []));
        q.feed.onEvent(async evs => {
-          await this.#eventsCache.bulkSet(evs);
+          const toSet = evs.filter(a => expectIds.has(a.id) && this.#eventsCache.getFromCache(a.id) === undefined);
+          if(toSet.length > 0) {
+            await this.#eventsCache.bulkSet(toSet);
+          }
        });
      }
      this.Queries.set(req.id, q);
--- a/packages/system/src/request-builder.ts
+++ b/packages/system/src/request-builder.ts
@ -1,7 +1,7 @@
 import debug from "debug";
 import { v4 as uuid } from "uuid";
 import { appendDedupe, sanitizeRelayUrl, unixNowMs } from "@snort/shared";
-import { get_diff }from "@snort/system-query";
+import { flat_merge, get_diff }from "@snort/system-query";

 import { ReqFilter, u256, HexKey, EventKind } from ".";
 import { RelayCache, splitByWriteRelays, splitFlatByWriteRelays } from "./gossip-model";
@ -115,7 +115,7 @@ export class RequestBuilder {
      return splitFlatByWriteRelays(relays, diff).map(a => {
        return {
          strategy: RequestStrategy.AuthorsRelays,
-          filters: flatMerge(a.filters),
+          filters: flat_merge(a.filters) as Array<ReqFilter>,
          relay: a.relay,
        };
      });