don't throw exceptions if modifications are made during dict operations

This commit is contained in:
Doug Hoyte
2023-02-15 17:00:11 -05:00
parent decc3aea26
commit 30b8c387a2

View File

@ -44,7 +44,7 @@ void cmd_dict(const std::vector<std::string> &subArgs) {
Decompressor decomp; Decompressor decomp;
std::vector<uint64_t> levIds; std::vector<uint64_t> levIds;
{
auto txn = env.txn_ro(); auto txn = env.txn_ro();
auto filterGroup = NostrFilterGroup::unwrapped(tao::json::from_string(filterStr), MAX_U64); auto filterGroup = NostrFilterGroup::unwrapped(tao::json::from_string(filterStr), MAX_U64);
@ -60,7 +60,6 @@ void cmd_dict(const std::vector<std::string> &subArgs) {
} }
LI << "Filter matched " << levIds.size() << " records"; LI << "Filter matched " << levIds.size() << " records";
}
if (args["stats"].asBool()) { if (args["stats"].asBool()) {
@ -68,8 +67,6 @@ void cmd_dict(const std::vector<std::string> &subArgs) {
uint64_t totalCompressedSize = 0; uint64_t totalCompressedSize = 0;
uint64_t numCompressed = 0; uint64_t numCompressed = 0;
auto txn = env.txn_ro();
btree_map<uint32_t, uint64_t> dicts; btree_map<uint32_t, uint64_t> dicts;
env.foreach_CompressionDictionary(txn, [&](auto &view){ env.foreach_CompressionDictionary(txn, [&](auto &view){
@ -112,9 +109,6 @@ void cmd_dict(const std::vector<std::string> &subArgs) {
std::string trainingBuf; std::string trainingBuf;
std::vector<size_t> trainingSizes; std::vector<size_t> trainingSizes;
{
auto txn = env.txn_ro();
if (levIds.size() > limit) { if (levIds.size() > limit) {
LI << "Randomly selecting " << limit << " records"; LI << "Randomly selecting " << limit << " records";
std::random_device rd; std::random_device rd;
@ -128,7 +122,6 @@ void cmd_dict(const std::vector<std::string> &subArgs) {
trainingBuf += json; trainingBuf += json;
trainingSizes.emplace_back(json.size()); trainingSizes.emplace_back(json.size());
} }
}
std::string dict(dictSize, '\0'); std::string dict(dictSize, '\0');
@ -137,19 +130,19 @@ void cmd_dict(const std::vector<std::string> &subArgs) {
auto ret = ZDICT_trainFromBuffer(dict.data(), dict.size(), trainingBuf.data(), trainingSizes.data(), trainingSizes.size()); auto ret = ZDICT_trainFromBuffer(dict.data(), dict.size(), trainingBuf.data(), trainingSizes.data(), trainingSizes.size());
if (ZDICT_isError(ret)) throw herr("zstd training failed: ", ZSTD_getErrorName(ret)); if (ZDICT_isError(ret)) throw herr("zstd training failed: ", ZSTD_getErrorName(ret));
{ txn.abort();
auto txn = env.txn_rw(); txn = env.txn_rw();
uint64_t newDictId = env.insert_CompressionDictionary(txn, dict); uint64_t newDictId = env.insert_CompressionDictionary(txn, dict);
std::cout << "Saved new dictionary, dictId = " << newDictId << std::endl; std::cout << "Saved new dictionary, dictId = " << newDictId << std::endl;
txn.commit(); txn.commit();
}
} else if (args["compress"].asBool()) { } else if (args["compress"].asBool()) {
if (dictId == 0) throw herr("specify --dictId or --decompress"); if (dictId == 0) throw herr("specify --dictId or --decompress");
auto txn = env.txn_rw(); txn.abort();
txn = env.txn_rw();
auto view = env.lookup_CompressionDictionary(txn, dictId); auto view = env.lookup_CompressionDictionary(txn, dictId);
if (!view) throw herr("couldn't find dictId ", dictId); if (!view) throw herr("couldn't find dictId ", dictId);
@ -166,7 +159,14 @@ void cmd_dict(const std::vector<std::string> &subArgs) {
std::string compressedData(500'000, '\0'); std::string compressedData(500'000, '\0');
for (auto levId : levIds) { for (auto levId : levIds) {
auto orig = getEventJson(txn, decomp, levId); std::string_view orig;
try {
orig = getEventJson(txn, decomp, levId);
} catch (std::exception &e) {
continue;
}
auto ret = ZSTD_compress_usingCDict(cctx, compressedData.data(), compressedData.size(), orig.data(), orig.size(), cdict); auto ret = ZSTD_compress_usingCDict(cctx, compressedData.data(), compressedData.size(), orig.data(), orig.size(), cdict);
if (ZDICT_isError(ret)) throw herr("zstd compression failed: ", ZSTD_getErrorName(ret)); if (ZDICT_isError(ret)) throw herr("zstd compression failed: ", ZSTD_getErrorName(ret));
@ -203,13 +203,20 @@ void cmd_dict(const std::vector<std::string> &subArgs) {
LI << "Original event sizes: " << origSizes; LI << "Original event sizes: " << origSizes;
LI << "New event sizes: " << compressedSizes; LI << "New event sizes: " << compressedSizes;
} else if (args["decompress"].asBool()) { } else if (args["decompress"].asBool()) {
auto txn = env.txn_rw(); txn.abort();
txn = env.txn_rw();
uint64_t pendingFlush = 0; uint64_t pendingFlush = 0;
uint64_t processed = 0; uint64_t processed = 0;
for (auto levId : levIds) { for (auto levId : levIds) {
auto orig = getEventJson(txn, decomp, levId); std::string_view orig;
try {
orig = getEventJson(txn, decomp, levId);
} catch (std::exception &e) {
continue;
}
std::string newVal; std::string newVal;