From ca0449286c820d260a3708e6a709f88eae4c748a Mon Sep 17 00:00:00 2001 From: Doug Hoyte Date: Wed, 26 Jul 2023 20:24:55 -0400 Subject: [PATCH] Stop WSConnection from calling ::exit(), so that the sync command can flush its writes first This fixes a rare race condition that caused a segfault on exit: We call ::exit() which runs the atexit handler, which destroys the LMDB environment. However, there was another thread running that still had pending events to write, so as the environment was being destroyed, it tried to use the environment to write the events. --- src/WSConnection.h | 10 ++++++---- src/apps/mesh/cmd_sync.cpp | 14 +++++++++++--- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/src/WSConnection.h b/src/WSConnection.h index b84150f..1360bcc 100644 --- a/src/WSConnection.h +++ b/src/WSConnection.h @@ -23,6 +23,8 @@ class WSConnection { std::function onConnect; std::function onMessage; std::function onTrigger; + std::function onDisconnect; + std::function onError; bool reconnect = true; uint64_t reconnectDelayMilliseconds = 5'000; std::string remoteAddr; @@ -84,8 +86,8 @@ class WSConnection { if (ws == currWs) { currWs = nullptr; - if (!reconnect) ::exit(1); - doConnect(reconnectDelayMilliseconds); + if (onDisconnect) onDisconnect(); + if (reconnect) doConnect(reconnectDelayMilliseconds); } else { LI << "Got disconnect for unexpected connection?"; } @@ -94,8 +96,8 @@ class WSConnection { hubGroup->onError([&](void *) { LI << "Websocket connection error"; - if (!reconnect) ::exit(1); - doConnect(reconnectDelayMilliseconds); + if (onError) onError(); + if (reconnect) doConnect(reconnectDelayMilliseconds); }); hubGroup->onMessage2([&](uWS::WebSocket *ws, char *message, size_t length, uWS::OpCode opCode, size_t compressedSize) { diff --git a/src/apps/mesh/cmd_sync.cpp b/src/apps/mesh/cmd_sync.cpp index 97eb76f..69b2d90 100644 --- a/src/apps/mesh/cmd_sync.cpp +++ b/src/apps/mesh/cmd_sync.cpp @@ -94,6 +94,15 @@ void cmd_sync(const std::vector &subArgs) { }))); }; + auto doExit = [&](int status){ + if (doDown) writer.flush(); + ::exit(status); + }; + + ws.onDisconnect = ws.onError = [&]{ + doExit(1); + }; + const uint64_t highWaterUp = 100, lowWaterUp = 50; const uint64_t batchSizeDown = 50; @@ -151,7 +160,7 @@ void cmd_sync(const std::vector &subArgs) { writer.wait(); } else if (msg.at(0) == "NEG-ERR") { LE << "Got NEG-ERR response from relay: " << msg; - ::exit(1); + doExit(1); } else { LW << "Unexpected message from relay: " << msg; } @@ -209,8 +218,7 @@ void cmd_sync(const std::vector &subArgs) { } if (syncDone && have.size() == 0 && need.size() == 0 && inFlightUp == 0 && !inFlightDown) { - if (doDown) writer.flush(); - ::exit(0); + doExit(0); } };