Skip to content

Commit

Permalink
Improvements for sigsegv handler (#1391)
Browse files Browse the repository at this point in the history
  • Loading branch information
4eUeP authored Apr 28, 2023
1 parent f9c1f26 commit 722fc61
Show file tree
Hide file tree
Showing 18 changed files with 225 additions and 63 deletions.
204 changes: 204 additions & 0 deletions common/base/cbits/fatalsignal.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,204 @@
#include "hs_common.h"

#include <algorithm>
#include <atomic>
#include <cstdio>
#include <cstring>
#include <memory>
#include <pwd.h>
#include <signal.h>
#include <unistd.h>

#include <folly/experimental/symbolizer/Symbolizer.h>
#include <logdevice/common/checks.h>
#include <rocksdb/cache.h>
#include <sys/mman.h>
#include <sys/types.h>

// Boost.Stacktrace does not show the details
//
// void handle_fatal_signal(int signum) {
// ::signal(signum, SIG_DFL);
// std::cerr << "handle_fatal_signal(): Caught coredump signal " << signum
// << '\n';
// std::cerr << "Backtrace:\n" << boost::stacktrace::stacktrace() << '\n';
// ::raise(SIGTRAP);
// }

// The following code is copied from "logdevice/server/fatalsignal.cpp"

// a collection of rocksdb caches
struct RocksDBCachesInfo {
std::weak_ptr<rocksdb::Cache> block_cache;
std::weak_ptr<rocksdb::Cache> block_cache_compressed;
std::weak_ptr<rocksdb::Cache> metadata_block_cache;
};

extern RocksDBCachesInfo g_rocksdb_caches;

RocksDBCachesInfo g_rocksdb_caches;

static size_t unmap_count = 0, page_count = 0;

static const size_t PAGE_SHIFT = 12;
static const size_t PAGE_SIZE = (1UL << 12);
static const size_t PAGE_MASK = ~((1UL << PAGE_SHIFT) - 1);

struct Segment {
size_t start;
size_t end;
Segment(int s, int e) : start(s), end(e) {}
};

static Segment* sarray = nullptr;
static size_t s_index = 0;
static const size_t SEGMAP_SIZE = 1024 * 1024 * 1024;
static const size_t max_num_segs = SEGMAP_SIZE / sizeof(Segment);

static void unmap_callback(void* entry, size_t charge) {
if (s_index >= max_num_segs) {
return;
}
// align both start and end to page boundaries
sarray[s_index].start = (size_t)entry & PAGE_MASK;
sarray[s_index].end = ((size_t)entry + charge + PAGE_SIZE - 1) & PAGE_MASK;
++s_index;
}

static void safe_print(const char msg[]) {
auto _ = write(2, msg, std::strlen(msg));
}

static void safe_print_unsigned(size_t num) {
char buf[64];
size_t i = 0;
if (num == 0) {
safe_print("0");
return;
}
while (num != 0) {
buf[i++] = (num % 10) + '0';
num /= 10;
}
buf[i] = '\0';
for (size_t j = 0, k = i - 1; j < k; ++j, --k) {
size_t tmp = buf[j];
buf[j] = buf[k];
buf[k] = tmp;
}
safe_print(buf);
}

// Construct this on startup, since it allocates on the heap and we don't want
// to do that in a signal handler. Leak it so we don't have to worry about
// destruction order.
folly::symbolizer::SafeStackTracePrinter* gStackTrace =
new folly::symbolizer::SafeStackTracePrinter();

static void handle_fatal_signal(int sig) {
static std::atomic<pthread_t> insegv(0);
pthread_t old_val(0);

if (!insegv.compare_exchange_strong(old_val, pthread_self())) {
/* another thread is in the handler, suspend this one forever */
if (pthread_self() != old_val) {
pause();
_exit(EXIT_FAILURE);
}
/* recursive call from the same thread, give up and dump core*/
raise(SIGTRAP);
_exit(EXIT_FAILURE);
}

safe_print("handle_fatal_signal(): Caught coredump signal ");
safe_print_unsigned(sig);
safe_print("\n");

gStackTrace->printStackTrace(true);

// allocate a big enough VM to hold all segments
sarray = (Segment*)mmap(nullptr, SEGMAP_SIZE, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (sarray == (Segment*)MAP_FAILED) {
safe_print("handle_fatal_signal(): allocate seg array failed.\n");
raise(SIGTRAP);
_exit(EXIT_FAILURE);
}

for (auto cache_weak_ptr :
{g_rocksdb_caches.block_cache, g_rocksdb_caches.block_cache_compressed,
g_rocksdb_caches.metadata_block_cache}) {
std::shared_ptr<rocksdb::Cache> cache = cache_weak_ptr.lock();
if (cache != nullptr) {
cache->ApplyToAllCacheEntries(unmap_callback, false);
}
}

if (s_index == 0) {
safe_print("handle_fatal_signal(): No rocksdb caches found.\n");
munmap((void*)sarray, SEGMAP_SIZE);
raise(SIGTRAP);
return;
}

safe_print("handle_fatal_signal(): processed segments: ");
safe_print_unsigned(s_index);
safe_print("\n");

std::sort(sarray, sarray + s_index,
[](const Segment& a, const Segment& b) -> bool {
return a.start < b.start;
});

size_t cur_start = sarray[0].start, cur_end = sarray[0].end;
for (size_t i = 1; i <= s_index; ++i) {
// unmap a segment
if (i == s_index || sarray[i].start > cur_end) {
size_t len = cur_end - cur_start;
munmap((void*)cur_start, len);
++unmap_count;
page_count += (len >> PAGE_SHIFT);
if (i < s_index) {
cur_start = sarray[i].start;
cur_end = sarray[i].end;
}
}
// could merge
else {
cur_end = std::max(cur_end, sarray[i].end);
}
}

safe_print("handle_fatal_signal(): unmapped pages - unmap calls: ");
safe_print_unsigned(page_count);
safe_print(" - ");
safe_print_unsigned(unmap_count);
safe_print("\n");

// unmap the seg array
munmap((void*)sarray, SEGMAP_SIZE);

raise(SIGTRAP);
}

static void setup_signal_handler(int signum, void (*handler)(int)) {
struct sigaction sa;
sa.sa_handler = handler;
sigemptyset(&sa.sa_mask);
sa.sa_flags = 0;
int rv;
rv = sigaction(signum, &sa, nullptr);
ld_check(rv == 0);
}

extern "C" {
// ----------------------------------------------------------------------------

void setup_fatal_signal_handler() {
for (int signum : {SIGSEGV, SIGABRT, SIGBUS, SIGQUIT, SIGILL, SIGFPE}) {
setup_signal_handler(signum, handle_fatal_signal);
}
}

// ----------------------------------------------------------------------------
}
13 changes: 0 additions & 13 deletions common/base/cbits/hs_utils.cpp
Original file line number Diff line number Diff line change
@@ -1,21 +1,8 @@
#include "hs_common.h"

#include <boost/stacktrace.hpp>
#include <iostream>
#include <signal.h>

extern "C" {
// ----------------------------------------------------------------------------

void handle_fatal_signal(int signum) {
::signal(signum, SIG_DFL);
std::cerr << "handle_fatal_signal(): Caught coredump signal " << signum
<< '\n';
std::cerr << "Backtrace:\n" << boost::stacktrace::stacktrace() << '\n';
::raise(SIGTRAP);
}

void setup_sigsegv_handler() { ::signal(SIGSEGV, &handle_fatal_signal); }

// ----------------------------------------------------------------------------
}
1 change: 1 addition & 0 deletions common/base/hstream-common-base.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ library
hs_cpp_lib.h

cxx-sources:
cbits/fatalsignal.cpp
cbits/hs_struct.cpp
cbits/hs_utils.cpp

Expand Down
10 changes: 0 additions & 10 deletions common/base/include/hs_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,6 @@ extern "C" {

void setup_sigsegv_handler();

// ----------------------------------------------------------------------------
// Stats
//
// See: cbits/stats.cpp

// ----------------------------------------------------------------------------
// Query
//
// See: cbits/query.cpp

// ----------------------------------------------------------------------------
#ifdef __cplusplus
} /* end extern "C" */
Expand Down
6 changes: 3 additions & 3 deletions common/hstream/HStream/Utils/Common.hs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
module HStream.Utils.Common
( maybeToEither
, setupSigsegvHandler
, setupFatalSignalHandler
, newRandomText
) where

Expand All @@ -13,8 +13,8 @@ import System.Random
maybeToEither :: b -> Maybe a -> Either b a
maybeToEither errmsg = maybe (Left errmsg) Right

foreign import ccall unsafe "hs_common.h setup_sigsegv_handler"
setupSigsegvHandler :: IO ()
foreign import ccall unsafe "hs_common.h setup_fatal_signal_handler"
setupFatalSignalHandler :: IO ()

newRandomText :: Int -> IO Text
newRandomText n = Text.pack . take n . randomRs ('a', 'z') <$> newStdGen
20 changes: 0 additions & 20 deletions common/hstream/cbits/hs_utils.cpp

This file was deleted.

2 changes: 1 addition & 1 deletion common/hstream/include/hs_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ extern "C" {
// ----------------------------------------------------------------------------
// Utils

void setup_sigsegv_handler();
void setup_fatal_signal_handler();

// ----------------------------------------------------------------------------
// Stats
Expand Down
2 changes: 1 addition & 1 deletion common/hstream/test/HStream/UtilsSpec.hs
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,4 @@ timeIntervalSpec = describe "TimeInterval" $ do
utilsSpec :: Spec
utilsSpec = describe "HStream.Utils" $ do
-- TODO
it "setupSigsegvHandler" $ setupSigsegvHandler `shouldReturn` ()
it "setupFatalSignalHandler" $ setupFatalSignalHandler `shouldReturn` ()
4 changes: 2 additions & 2 deletions common/stats/test/HStream/StatsSpec.hs
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,13 @@ import Test.Hspec

import HStream.Stats
import HStream.StatsSpecUtils (mkTimeSeriesTest)
import HStream.Utils (runConc, setupSigsegvHandler)
import HStream.Utils (runConc, setupFatalSignalHandler)

{-# ANN module ("HLint: ignore Use head" :: String) #-}

spec :: Spec
spec = do
runIO setupSigsegvHandler
runIO setupFatalSignalHandler

statsSpec
threadedStatsSpec
Expand Down
2 changes: 1 addition & 1 deletion hstream/app/client.hs
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ import HStream.Utils (ResourceType (..),
formatResult,
mkGRPCClientConfWithSSL,
pattern EnumPB,
setupSigsegvHandler)
setupFatalSignalHandler)
import qualified HStream.Utils.Aeson as AesonComp

main :: IO ()
Expand Down
4 changes: 2 additions & 2 deletions hstream/app/server.hs
Original file line number Diff line number Diff line change
Expand Up @@ -90,15 +90,15 @@ import qualified HStream.ThirdParty.Protobuf as Proto
import HStream.Utils (ResourceType (..),
getProtoTimestamp,
pattern EnumPB,
setupSigsegvHandler)
setupFatalSignalHandler)


main :: IO ()
main = getConfig >>= app

app :: ServerOpts -> IO ()
app config@ServerOpts{..} = do
setupSigsegvHandler
setupFatalSignalHandler
Log.setDefaultLogger _serverLogLevel _serverLogWithColor Log.LogStderr
Log.setLogDeviceDbgLevel' _ldLogLevel

Expand Down
4 changes: 2 additions & 2 deletions hstream/src/HStream/Client/Execute.hs
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ import HStream.Utils (Format, HStreamClientApi,
getServerResp,
mkGRPCClientConfWithSSL,
serverNodeToSocketAddr,
setupSigsegvHandler)
setupFatalSignalHandler)

executeShowPlan :: HStreamCliContext -> ShowObject -> IO ()
executeShowPlan ctx showObject =
Expand Down Expand Up @@ -147,7 +147,7 @@ initCliContext RefinedCliConnOpts{..} = do
currentServer <- newMVar addr
let sslConfig = clientSSLConfig clientConfig
let ctx = HStreamCliContext {..}
setupSigsegvHandler
setupFatalSignalHandler
connected <- waitForServerToStart (retryTimeout * 1000000) addr sslConfig
case connected of
Nothing -> errorWithoutStackTrace "Connection timed out. Please check the server URI and try again."
Expand Down
2 changes: 1 addition & 1 deletion hstream/test/HStream/AdminCommandSpec.hs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ import qualified HStream.Utils.Aeson as Aeson

spec :: Spec
spec = describe "HStream.AdminCommnadSpec" $ do
runIO setupSigsegvHandler
runIO setupFatalSignalHandler
runIO $ setLogDeviceDbgLevel C_DBG_ERROR

adminCommandStatsSpec
Expand Down
2 changes: 1 addition & 1 deletion hstream/test/HStream/HandlerSpec.hs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ import HStream.Utils

spec :: Spec
spec = describe "HStream.HandlerSpec" $ do
runIO setupSigsegvHandler
runIO setupFatalSignalHandler
runIO $ setLogDeviceDbgLevel C_DBG_ERROR

streamSpec
Expand Down
Loading

0 comments on commit 722fc61

Please sign in to comment.