From a1fa59b960389faf2b9fcc1f2f120ec11de0716a Mon Sep 17 00:00:00 2001 From: Doug Hoyte Date: Sat, 23 Nov 2024 01:24:42 -0500 Subject: [PATCH] feeds wip --- src/apps/web/AlgoParser.h | 437 ------------------------------------- src/apps/web/AlgoScanner.h | 116 ---------- src/apps/web/WebData.h | 32 +-- src/apps/web/cmd_algo.cpp | 56 ----- 4 files changed, 7 insertions(+), 634 deletions(-) delete mode 100644 src/apps/web/AlgoParser.h delete mode 100644 src/apps/web/AlgoScanner.h delete mode 100644 src/apps/web/cmd_algo.cpp diff --git a/src/apps/web/AlgoParser.h b/src/apps/web/AlgoParser.h deleted file mode 100644 index 8511743..0000000 --- a/src/apps/web/AlgoParser.h +++ /dev/null @@ -1,437 +0,0 @@ -#include -#include - -#include -#include - -#include "events.h" -#include "Bech32Utils.h" - - - - -struct AlgoCompiled { - double threshold = 20; - using PubkeySet = flat_hash_set; - std::vector pubkeySets; - flat_hash_map variableIndexLookup; // variableName -> index into pubkeySets - - PubkeySet *mods = nullptr; - PubkeySet *voters = nullptr; - - struct Filter { - std::unique_ptr re; - char op; - double arg; - }; - - std::vector filters; - - void updateScore(lmdb::txn &txn, Decompressor &decomp, const defaultDb::environment::View_Event &e, double &score) { - auto rawJson = getEventJson(txn, decomp, e.primaryKeyId); - re2::StringPiece rawJsonSP(rawJson); - - for (const auto &f : filters) { - if (!RE2::PartialMatch(rawJsonSP, *f.re)) continue; - - if (f.op == '+') score += f.arg; - else if (f.op == '-') score -= f.arg; - else if (f.op == '*') score *= f.arg; - else if (f.op == '/') score /= f.arg; - } - } -}; - - -struct AlgoParseState { - lmdb::txn &txn; - - AlgoCompiled a; - - struct ExpressionState { - std::string currInfixOp; - AlgoCompiled::PubkeySet set; - }; - - std::vector expressionStateStack; - std::string currPubkeyDesc; - std::vector currModifiers; - - std::string currSetterVar; - char currFilterOp; - double currFilterArg; - - AlgoParseState(lmdb::txn &txn) : txn(txn) {} - - void letStart(std::string_view name) { - if (a.variableIndexLookup.contains(name)) throw herr("overwriting variable: ", name); - a.variableIndexLookup[name] = a.pubkeySets.size(); - expressionStateStack.push_back({ "+" }); - } - - void letEnd() { - a.pubkeySets.emplace_back(std::move(expressionStateStack.back().set)); - expressionStateStack.clear(); - } - - void letAddExpression() { - const auto &id = currPubkeyDesc; - AlgoCompiled::PubkeySet set; - - if (id.starts_with("npub1")) { - set.insert(decodeBech32Simple(id)); - } else { - if (!a.variableIndexLookup.contains(id)) throw herr("variable not found: ", id); - auto n = a.variableIndexLookup[id]; - if (n >= a.pubkeySets.size()) throw herr("self referential variable: ", id); - set = a.pubkeySets[n]; - } - - for (const auto &m : currModifiers) { - if (m == "following") { - AlgoCompiled::PubkeySet newSet = set; - for (const auto &p : set) loadFollowing(p, newSet); - set = newSet; - } else { - throw herr("unrecognised modifier: ", m); - } - } - - currPubkeyDesc = ""; - currModifiers.clear(); - - mergeInfix(set); - } - - void mergeInfix(AlgoCompiled::PubkeySet &set) { - auto &currInfixOp = expressionStateStack.back().currInfixOp; - - if (currInfixOp == "+") { - for (const auto &e : set) { - expressionStateStack.back().set.insert(e); - } - } else if (currInfixOp == "-") { - for (const auto &e : set) { - expressionStateStack.back().set.erase(e); - } - } else if (currInfixOp == "&") { - AlgoCompiled::PubkeySet intersection; - - for (const auto &e : set) { - if (expressionStateStack.back().set.contains(e)) intersection.insert(e); - } - - std::swap(intersection, expressionStateStack.back().set); - } - } - - - - void installSetter(std::string_view val) { - if (currSetterVar == "mods" || currSetterVar == "voters") { - if (!a.variableIndexLookup.contains(val)) throw herr("unknown variable: ", val); - auto *setPtr = &a.pubkeySets[a.variableIndexLookup[val]]; - - if (currSetterVar == "mods") a.mods = setPtr; - else if (currSetterVar == "voters") a.voters = setPtr; - } else if (currSetterVar == "threshold") { - a.threshold = std::stod(std::string(val)); - } - } - - void installFilter(std::string_view val) { - a.filters.emplace_back(std::make_unique(val), currFilterOp, currFilterArg); - } - - - - - - void loadFollowing(std::string_view pubkey, flat_hash_set &output) { - const uint64_t kind = 3; - - env.generic_foreachFull(txn, env.dbi_Event__pubkeyKind, makeKey_StringUint64Uint64(pubkey, kind, 0), "", [&](std::string_view k, std -::string_view v){ - ParsedKey_StringUint64Uint64 parsedKey(k); - - if (parsedKey.s == pubkey && parsedKey.n1 == kind) { - auto levId = lmdb::from_sv(v); - auto ev = lookupEventByLevId(txn, levId); - PackedEventView packed(ev.buf); - - packed.foreachTag([&](char tagName, std::string_view tagVal){ - if (tagName != 'p') return true; - output.insert(std::string(tagVal)); - return true; - }); - } - - return false; - }); - } -}; - - - - -namespace pegtl = TAO_PEGTL_NAMESPACE; - -namespace algo_parser { - // Whitespace - - struct comment : - pegtl::seq< - pegtl::one< '#' >, - pegtl::until< pegtl::eolf > - > {}; - - struct ws : pegtl::sor< pegtl::space, comment > {}; - - template< typename R > - struct pad : pegtl::pad< R, ws > {}; - - - // Pubkeys - - struct npub : - pegtl::seq< - pegtl::string< 'n', 'p', 'u', 'b', '1' >, - pegtl::plus< pegtl::alnum > - > {}; - - struct pubkey : - pegtl::sor< - npub, - pegtl::identifier - > {}; - - struct pubkeySetOp : pegtl::one< '+', '-', '&' > {}; - - struct pubkeyGroup; - struct pubkeyList : pegtl::list< pubkeyGroup, pubkeySetOp, ws > {}; - - struct pubkeyGroupOpen : pegtl::one< '(' > {}; - struct pubkeyGroupClose : pegtl::one< ')' > {}; - - struct pubkeyModifier : pegtl::identifier {}; - struct pubkeyExpression : pegtl::seq< - pubkey, - pegtl::star< pegtl::seq< pegtl::one< '.'>, pubkeyModifier > > - > {}; - - struct pubkeyGroup : pegtl::sor< - pubkeyExpression, - pegtl::seq< - pad< pubkeyGroupOpen >, - pubkeyList, - pad< pubkeyGroupClose > - > - > {}; - - - - // Let statements - - struct variableIdentifier : pegtl::seq< pegtl::not_at< npub >, pegtl::identifier > {}; - - struct letDefinition : variableIdentifier {}; - struct letTerminator : pegtl::one< ';' > {}; - - struct let : - pegtl::seq< - pad< TAO_PEGTL_STRING("let") >, - pad< letDefinition >, - pad< pegtl::one< '=' > >, - pad< pubkeyList >, - letTerminator - > {}; - - - - - // Posts block - - struct number : - pegtl::if_then_else< pegtl::one< '.' >, - pegtl::plus< pegtl::digit >, - pegtl::seq< - pegtl::plus< pegtl::digit >, - pegtl::opt< pegtl::one< '.' >, pegtl::star< pegtl::digit > > - > - > {}; - - struct arithOp : pegtl::one< '+', '-', '*', '/' > {}; - struct arithNumber : number {}; - struct arith : - pegtl::seq< - pad< arithOp >, - arithNumber - > {}; - - struct regexpPayload : pegtl::star< pegtl::sor< pegtl::string< '\\', '/' >, pegtl::not_one< '/' > > > {}; - struct regexp : - pegtl::seq< - pegtl::one< '/' >, - regexpPayload, - pegtl::one< '/' > - > {}; - - struct contentCondition : - pegtl::seq< - pad< pegtl::one< '~' > >, - pad< regexp > - > {}; - - struct condition : - pegtl::sor< - pad< contentCondition > - > {}; - - struct setterVar : - pegtl::sor< - TAO_PEGTL_STRING("mods"), - TAO_PEGTL_STRING("voters"), - TAO_PEGTL_STRING("threshold") - > {}; - - struct setterValue : - pegtl::star< - pegtl::sor< - pegtl::alnum, - pegtl::one< '.' > - > - > {}; - - struct setterStatement : - pegtl::seq< - pad< setterVar >, - pad< TAO_PEGTL_STRING("=") >, - pad< setterValue >, - pegtl::one< ';' > - > {}; - - struct filterStatment : - pegtl::seq< - pad< arith >, - pad< TAO_PEGTL_STRING("if") >, - pad< condition >, - pegtl::one< ';' > - > {}; - - struct postBlock : - pegtl::seq< - pad< TAO_PEGTL_STRING("posts") >, - pad< pegtl::one< '{' > >, - pegtl::star< pad< pegtl::sor< setterStatement, filterStatment > > >, - pegtl::one< '}' > - > {}; - - - - - // Main - - struct anything : pegtl::sor< ws, let, postBlock > {}; - struct main : pegtl::until< pegtl::eof, pegtl::must< anything > > {}; - - - - template< typename Rule > - struct action {}; - - - template<> struct action< letDefinition > { template< typename ActionInput > - static void apply(const ActionInput &in, AlgoParseState &a) { - a.letStart(in.string_view()); - } - }; - - template<> struct action< letTerminator > { template< typename ActionInput > - static void apply(const ActionInput &in, AlgoParseState &a) { - a.letEnd(); - } - }; - - template<> struct action< pubkey > { template< typename ActionInput > - static void apply(const ActionInput& in, AlgoParseState &a) { - a.currPubkeyDesc = in.string(); - } - }; - - template<> struct action< pubkeyModifier > { template< typename ActionInput > - static void apply(const ActionInput& in, AlgoParseState &a) { - a.currModifiers.push_back(in.string()); - } - }; - - template<> struct action< pubkeySetOp > { template< typename ActionInput > - static void apply(const ActionInput& in, AlgoParseState &a) { - a.expressionStateStack.back().currInfixOp = in.string(); - } - }; - - template<> struct action< pubkeyExpression > { template< typename ActionInput > - static void apply(const ActionInput &in, AlgoParseState &a) { - a.letAddExpression(); - } - }; - - template<> struct action< pubkeyGroupOpen > { template< typename ActionInput > - static void apply(const ActionInput &in, AlgoParseState &a) { - a.expressionStateStack.push_back({ "+" }); - } - }; - - template<> struct action< pubkeyGroupClose > { template< typename ActionInput > - static void apply(const ActionInput &in, AlgoParseState &a) { - auto set = std::move(a.expressionStateStack.back().set); - a.expressionStateStack.pop_back(); - a.mergeInfix(set); - } - }; - - - - template<> struct action< setterVar > { template< typename ActionInput > - static void apply(const ActionInput &in, AlgoParseState &a) { - a.currSetterVar = in.string(); - } - }; - - template<> struct action< setterValue > { template< typename ActionInput > - static void apply(const ActionInput &in, AlgoParseState &a) { - a.installSetter(in.string_view()); - } - }; - - template<> struct action< arithOp > { template< typename ActionInput > - static void apply(const ActionInput &in, AlgoParseState &a) { - a.currFilterOp = in.string_view().at(0); - } - }; - - template<> struct action< arithNumber > { template< typename ActionInput > - static void apply(const ActionInput &in, AlgoParseState &a) { - a.currFilterArg = std::stod(in.string()); - } - }; - - template<> struct action< regexpPayload > { template< typename ActionInput > - static void apply(const ActionInput &in, AlgoParseState &a) { - a.installFilter(in.string_view()); - } - }; -} - - -inline AlgoCompiled parseAlgo(lmdb::txn &txn, std::string_view algoText) { - AlgoParseState a(txn); - - pegtl::memory_input in(algoText, ""); - - if (!pegtl::parse< algo_parser::main, algo_parser::action >(in, a)) { - throw herr("algo parse error"); - } - - return std::move(a.a); -} diff --git a/src/apps/web/AlgoScanner.h b/src/apps/web/AlgoScanner.h deleted file mode 100644 index b0da9f8..0000000 --- a/src/apps/web/AlgoScanner.h +++ /dev/null @@ -1,116 +0,0 @@ -#pragma once - -#include "golpe.h" - -#include "events.h" - -#include "AlgoParser.h" - - - -struct AlgoScanner { - struct EventInfo { - uint64_t comments = 0; - double score = 0.0; - }; - - struct FilteredEvent { - uint64_t levId; - std::string id; - - EventInfo info; - }; - - AlgoCompiled a; - - - AlgoScanner(lmdb::txn &txn, std::string_view algoText) : a(parseAlgo(txn, algoText)) { - } - - - std::vector getEvents(lmdb::txn &txn, Decompressor &decomp, uint64_t limit) { - flat_hash_map eventInfoCache; - std::vector output; - - env.generic_foreachFull(txn, env.dbi_Event__created_at, lmdb::to_sv(MAX_U64), lmdb::to_sv(MAX_U64), [&](auto k, auto v) { - if (output.size() > limit) return false; - - auto ev = lookupEventByLevId(txn, lmdb::from_sv(v)); - PackedEventView packed(ev.buf); - - auto kind = packed.kind(); - auto id = packed.id(); - - if (kind == 1) { - auto pubkey = std::string(packed.pubkey()); - - bool foundETag = false; - packed.foreachTag([&](char tagName, std::string_view tagVal){ - if (tagName == 'e') { - auto tagEventId = std::string(tagVal); - eventInfoCache.emplace(tagEventId, EventInfo{}); - eventInfoCache[tagEventId].comments++; - foundETag = true; - } - return true; - }); - if (foundETag) return true; // not root event - - eventInfoCache.emplace(id, EventInfo{}); - auto &eventInfo = eventInfoCache[id]; - - if (a.voters && !a.voters->contains(pubkey)) return true; - a.updateScore(txn, decomp, ev, eventInfo.score); - if (eventInfo.score < a.threshold) return true; - - output.emplace_back(FilteredEvent{ev.primaryKeyId, std::string(id), eventInfo}); - } else if (kind == 7) { - auto pubkey = std::string(packed.pubkey()); - //if (a.voters && !a.voters->contains(pubkey)) return true; - - std::optional lastETag; - packed.foreachTag([&](char tagName, std::string_view tagVal){ - if (tagName == 'e') lastETag = tagVal; - return true; - }); - - if (lastETag) { - auto tagEventId = std::string(*lastETag); - eventInfoCache.emplace(tagEventId, EventInfo{}); - eventInfoCache[tagEventId].score++; - } - } - - return true; - }, true); - - //for (auto &o : output) { - //} - - return output; - } - - - void loadFollowing(lmdb::txn &txn, std::string_view pubkey, flat_hash_set &output) { - const uint64_t kind = 3; - - env.generic_foreachFull(txn, env.dbi_Event__pubkeyKind, makeKey_StringUint64Uint64(pubkey, kind, 0), "", [&](std::string_view k, std -::string_view v){ - ParsedKey_StringUint64Uint64 parsedKey(k); - - if (parsedKey.s == pubkey && parsedKey.n1 == kind) { - auto levId = lmdb::from_sv(v); - auto ev = lookupEventByLevId(txn, levId); - PackedEventView packed(ev.buf); - - packed.foreachTag([&](char tagName, std::string_view tagVal){ - if (tagName != 'p') return true; - output.insert(std::string(tagVal)); - return true; - }); - } - - return false; - }); - } -}; diff --git a/src/apps/web/WebData.h b/src/apps/web/WebData.h index 93f83bc..7c26dc7 100644 --- a/src/apps/web/WebData.h +++ b/src/apps/web/WebData.h @@ -4,7 +4,6 @@ #include "Bech32Utils.h" #include "WebUtils.h" -#include "AlgoScanner.h" #include "WebTemplates.h" #include "DBQuery.h" @@ -699,26 +698,19 @@ struct UserEvents { struct CommunitySpec { - bool valid = true; tao::json::value raw; - - std::string name; - std::string desc; - std::string algo; - std::string adminNpub; - std::string adminUsername; std::string adminTopic; }; -inline CommunitySpec lookupCommunitySpec(lmdb::txn &txn, Decompressor &decomp, UserCache &userCache, std::string_view algoDescriptor) { +inline CommunitySpec lookupCommunitySpec(lmdb::txn &txn, Decompressor &decomp, UserCache &userCache, std::string_view descriptor) { CommunitySpec spec; - size_t pos = algoDescriptor.find("/"); - if (pos == std::string_view::npos) throw herr("bad algo descriptor"); - spec.adminNpub = std::string(algoDescriptor.substr(0, pos)); + size_t pos = descriptor.find("/"); + if (pos == std::string_view::npos) throw herr("bad descriptor"); + spec.adminNpub = std::string(descriptor.substr(0, pos)); std::string authorPubkey = decodeBech32Simple(spec.adminNpub); - spec.adminTopic = algoDescriptor.substr(pos + 1); + spec.adminTopic = descriptor.substr(pos + 1); tao::json::value filter = tao::json::value({ { "authors", tao::json::value::array({ to_hex(authorPubkey) }) }, @@ -729,22 +721,12 @@ inline CommunitySpec lookupCommunitySpec(lmdb::txn &txn, Decompressor &decomp, U bool found = false; foreachByFilter(txn, filter, [&](uint64_t levId){ - tao::json::value ev = tao::json::from_string(getEventJson(txn, decomp, levId)); - spec.algo = ev.at("content").get_string(); + spec.raw = tao::json::from_string(getEventJson(txn, decomp, levId)); found = true; return false; }); - if (!found) throw herr("unable to find algo"); - - spec.raw = tao::json::from_string(spec.algo); - - spec.name = spec.raw.at("name").get_string(); - spec.desc = spec.raw.at("desc").get_string(); - spec.algo = spec.raw.at("algo").get_string(); - - auto *user = userCache.getUser(txn, decomp, authorPubkey); - spec.adminUsername = user->username; + if (!found) throw herr("unable to find community"); return spec; } diff --git a/src/apps/web/cmd_algo.cpp b/src/apps/web/cmd_algo.cpp deleted file mode 100644 index 0221531..0000000 --- a/src/apps/web/cmd_algo.cpp +++ /dev/null @@ -1,56 +0,0 @@ -#include - -#include "golpe.h" - -#include "WebData.h" -#include "AlgoScanner.h" -#include "Decompressor.h" - - -static const char USAGE[] = -R"( - Usage: - algo scan -)"; - - -void cmd_algo(const std::vector &subArgs) { - std::map args = docopt::docopt(USAGE, subArgs, true, ""); - - std::string descriptor = args[""].asString(); - - - UserCache userCache; - Decompressor decomp; - auto txn = env.txn_ro(); - - auto communitySpec = lookupCommunitySpec(txn, decomp, userCache, descriptor); - - AlgoScanner a(txn, communitySpec.algo); - auto events = a.getEvents(txn, decomp, 300); - - for (const auto &e : events) { - auto ev = Event::fromLevId(txn, e.levId); - ev.populateJson(txn, decomp); - std::cout << e.info.score << "/" << e.info.comments << " : " << ev.summaryHtml() << "\n"; - } - - -/* - std::string str; - - { - std::string line; - while (std::getline(std::cin, line)) { - str += line; - str += "\n"; - } - } - - auto alg = parseAlgo(txn, str); - - for (const auto &[k, v] : alg.variableIndexLookup) { - LI << k << " = " << alg.pubkeySets[v].size() << " recs"; - } - */ -}