mirror of
https://github.com/hoytech/strfry.git
synced 2025-06-19 09:36:43 +00:00
don't throw exceptions if modifications are made during dict operations
This commit is contained in:
@ -44,32 +44,29 @@ void cmd_dict(const std::vector<std::string> &subArgs) {
|
|||||||
Decompressor decomp;
|
Decompressor decomp;
|
||||||
std::vector<uint64_t> levIds;
|
std::vector<uint64_t> levIds;
|
||||||
|
|
||||||
{
|
|
||||||
auto txn = env.txn_ro();
|
|
||||||
|
|
||||||
auto filterGroup = NostrFilterGroup::unwrapped(tao::json::from_string(filterStr), MAX_U64);
|
auto txn = env.txn_ro();
|
||||||
Subscription sub(1, "junkSub", filterGroup);
|
|
||||||
DBScanQuery query(sub);
|
|
||||||
|
|
||||||
while (1) {
|
auto filterGroup = NostrFilterGroup::unwrapped(tao::json::from_string(filterStr), MAX_U64);
|
||||||
bool complete = query.process(txn, MAX_U64, false, [&](const auto &sub, uint64_t levId){
|
Subscription sub(1, "junkSub", filterGroup);
|
||||||
levIds.push_back(levId);
|
DBScanQuery query(sub);
|
||||||
});
|
|
||||||
|
|
||||||
if (complete) break;
|
while (1) {
|
||||||
}
|
bool complete = query.process(txn, MAX_U64, false, [&](const auto &sub, uint64_t levId){
|
||||||
|
levIds.push_back(levId);
|
||||||
|
});
|
||||||
|
|
||||||
LI << "Filter matched " << levIds.size() << " records";
|
if (complete) break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
LI << "Filter matched " << levIds.size() << " records";
|
||||||
|
|
||||||
|
|
||||||
if (args["stats"].asBool()) {
|
if (args["stats"].asBool()) {
|
||||||
uint64_t totalSize = 0;
|
uint64_t totalSize = 0;
|
||||||
uint64_t totalCompressedSize = 0;
|
uint64_t totalCompressedSize = 0;
|
||||||
uint64_t numCompressed = 0;
|
uint64_t numCompressed = 0;
|
||||||
|
|
||||||
auto txn = env.txn_ro();
|
|
||||||
|
|
||||||
btree_map<uint32_t, uint64_t> dicts;
|
btree_map<uint32_t, uint64_t> dicts;
|
||||||
|
|
||||||
env.foreach_CompressionDictionary(txn, [&](auto &view){
|
env.foreach_CompressionDictionary(txn, [&](auto &view){
|
||||||
@ -112,22 +109,18 @@ void cmd_dict(const std::vector<std::string> &subArgs) {
|
|||||||
std::string trainingBuf;
|
std::string trainingBuf;
|
||||||
std::vector<size_t> trainingSizes;
|
std::vector<size_t> trainingSizes;
|
||||||
|
|
||||||
{
|
if (levIds.size() > limit) {
|
||||||
auto txn = env.txn_ro();
|
LI << "Randomly selecting " << limit << " records";
|
||||||
|
std::random_device rd;
|
||||||
|
std::mt19937 g(rd());
|
||||||
|
std::shuffle(levIds.begin(), levIds.end(), g);
|
||||||
|
levIds.resize(limit);
|
||||||
|
}
|
||||||
|
|
||||||
if (levIds.size() > limit) {
|
for (auto levId : levIds) {
|
||||||
LI << "Randomly selecting " << limit << " records";
|
std::string json = std::string(getEventJson(txn, decomp, levId));
|
||||||
std::random_device rd;
|
trainingBuf += json;
|
||||||
std::mt19937 g(rd());
|
trainingSizes.emplace_back(json.size());
|
||||||
std::shuffle(levIds.begin(), levIds.end(), g);
|
|
||||||
levIds.resize(limit);
|
|
||||||
}
|
|
||||||
|
|
||||||
for (auto levId : levIds) {
|
|
||||||
std::string json = std::string(getEventJson(txn, decomp, levId));
|
|
||||||
trainingBuf += json;
|
|
||||||
trainingSizes.emplace_back(json.size());
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string dict(dictSize, '\0');
|
std::string dict(dictSize, '\0');
|
||||||
@ -137,19 +130,19 @@ void cmd_dict(const std::vector<std::string> &subArgs) {
|
|||||||
auto ret = ZDICT_trainFromBuffer(dict.data(), dict.size(), trainingBuf.data(), trainingSizes.data(), trainingSizes.size());
|
auto ret = ZDICT_trainFromBuffer(dict.data(), dict.size(), trainingBuf.data(), trainingSizes.data(), trainingSizes.size());
|
||||||
if (ZDICT_isError(ret)) throw herr("zstd training failed: ", ZSTD_getErrorName(ret));
|
if (ZDICT_isError(ret)) throw herr("zstd training failed: ", ZSTD_getErrorName(ret));
|
||||||
|
|
||||||
{
|
txn.abort();
|
||||||
auto txn = env.txn_rw();
|
txn = env.txn_rw();
|
||||||
|
|
||||||
uint64_t newDictId = env.insert_CompressionDictionary(txn, dict);
|
uint64_t newDictId = env.insert_CompressionDictionary(txn, dict);
|
||||||
|
|
||||||
std::cout << "Saved new dictionary, dictId = " << newDictId << std::endl;
|
std::cout << "Saved new dictionary, dictId = " << newDictId << std::endl;
|
||||||
|
|
||||||
txn.commit();
|
txn.commit();
|
||||||
}
|
|
||||||
} else if (args["compress"].asBool()) {
|
} else if (args["compress"].asBool()) {
|
||||||
if (dictId == 0) throw herr("specify --dictId or --decompress");
|
if (dictId == 0) throw herr("specify --dictId or --decompress");
|
||||||
|
|
||||||
auto txn = env.txn_rw();
|
txn.abort();
|
||||||
|
txn = env.txn_rw();
|
||||||
|
|
||||||
auto view = env.lookup_CompressionDictionary(txn, dictId);
|
auto view = env.lookup_CompressionDictionary(txn, dictId);
|
||||||
if (!view) throw herr("couldn't find dictId ", dictId);
|
if (!view) throw herr("couldn't find dictId ", dictId);
|
||||||
@ -166,7 +159,14 @@ void cmd_dict(const std::vector<std::string> &subArgs) {
|
|||||||
std::string compressedData(500'000, '\0');
|
std::string compressedData(500'000, '\0');
|
||||||
|
|
||||||
for (auto levId : levIds) {
|
for (auto levId : levIds) {
|
||||||
auto orig = getEventJson(txn, decomp, levId);
|
std::string_view orig;
|
||||||
|
|
||||||
|
try {
|
||||||
|
orig = getEventJson(txn, decomp, levId);
|
||||||
|
} catch (std::exception &e) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
auto ret = ZSTD_compress_usingCDict(cctx, compressedData.data(), compressedData.size(), orig.data(), orig.size(), cdict);
|
auto ret = ZSTD_compress_usingCDict(cctx, compressedData.data(), compressedData.size(), orig.data(), orig.size(), cdict);
|
||||||
if (ZDICT_isError(ret)) throw herr("zstd compression failed: ", ZSTD_getErrorName(ret));
|
if (ZDICT_isError(ret)) throw herr("zstd compression failed: ", ZSTD_getErrorName(ret));
|
||||||
|
|
||||||
@ -203,13 +203,20 @@ void cmd_dict(const std::vector<std::string> &subArgs) {
|
|||||||
LI << "Original event sizes: " << origSizes;
|
LI << "Original event sizes: " << origSizes;
|
||||||
LI << "New event sizes: " << compressedSizes;
|
LI << "New event sizes: " << compressedSizes;
|
||||||
} else if (args["decompress"].asBool()) {
|
} else if (args["decompress"].asBool()) {
|
||||||
auto txn = env.txn_rw();
|
txn.abort();
|
||||||
|
txn = env.txn_rw();
|
||||||
|
|
||||||
uint64_t pendingFlush = 0;
|
uint64_t pendingFlush = 0;
|
||||||
uint64_t processed = 0;
|
uint64_t processed = 0;
|
||||||
|
|
||||||
for (auto levId : levIds) {
|
for (auto levId : levIds) {
|
||||||
auto orig = getEventJson(txn, decomp, levId);
|
std::string_view orig;
|
||||||
|
|
||||||
|
try {
|
||||||
|
orig = getEventJson(txn, decomp, levId);
|
||||||
|
} catch (std::exception &e) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
std::string newVal;
|
std::string newVal;
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user