Stop WSConnection from calling ::exit(), so that the sync command can flush its writes first

This fixes a rare race condition that caused a segfault on exit:

We call ::exit() which runs the atexit handler, which destroys the LMDB environment. However, there was another thread running that still had pending events to write, so as the environment was being destroyed, it tried to use the environment to write the events.
This commit is contained in:
Doug Hoyte
2023-07-26 20:24:55 -04:00
parent 0fe929ffcb
commit ca0449286c
2 changed files with 17 additions and 7 deletions

View File

@ -23,6 +23,8 @@ class WSConnection {
std::function<void()> onConnect;
std::function<void(std::string_view, uWS::OpCode, size_t)> onMessage;
std::function<void()> onTrigger;
std::function<void()> onDisconnect;
std::function<void()> onError;
bool reconnect = true;
uint64_t reconnectDelayMilliseconds = 5'000;
std::string remoteAddr;
@ -84,8 +86,8 @@ class WSConnection {
if (ws == currWs) {
currWs = nullptr;
if (!reconnect) ::exit(1);
doConnect(reconnectDelayMilliseconds);
if (onDisconnect) onDisconnect();
if (reconnect) doConnect(reconnectDelayMilliseconds);
} else {
LI << "Got disconnect for unexpected connection?";
}
@ -94,8 +96,8 @@ class WSConnection {
hubGroup->onError([&](void *) {
LI << "Websocket connection error";
if (!reconnect) ::exit(1);
doConnect(reconnectDelayMilliseconds);
if (onError) onError();
if (reconnect) doConnect(reconnectDelayMilliseconds);
});
hubGroup->onMessage2([&](uWS::WebSocket<uWS::CLIENT> *ws, char *message, size_t length, uWS::OpCode opCode, size_t compressedSize) {

View File

@ -94,6 +94,15 @@ void cmd_sync(const std::vector<std::string> &subArgs) {
})));
};
auto doExit = [&](int status){
if (doDown) writer.flush();
::exit(status);
};
ws.onDisconnect = ws.onError = [&]{
doExit(1);
};
const uint64_t highWaterUp = 100, lowWaterUp = 50;
const uint64_t batchSizeDown = 50;
@ -151,7 +160,7 @@ void cmd_sync(const std::vector<std::string> &subArgs) {
writer.wait();
} else if (msg.at(0) == "NEG-ERR") {
LE << "Got NEG-ERR response from relay: " << msg;
::exit(1);
doExit(1);
} else {
LW << "Unexpected message from relay: " << msg;
}
@ -209,8 +218,7 @@ void cmd_sync(const std::vector<std::string> &subArgs) {
}
if (syncDone && have.size() == 0 && need.size() == 0 && inFlightUp == 0 && !inFlightDown) {
if (doDown) writer.flush();
::exit(0);
doExit(0);
}
};