Skip to content

Commit

Permalink
launch command worker earlier
Browse files Browse the repository at this point in the history
since the command worker forks the main naemon process, it inherits all open
files like ex.: pidfile, logfiles, etc... It will keep those references open, even
if the main process rotates and reopens those files.

This patch closes query handler and pid file references after starting the
command worker and also moves starting the command worker before initializing
the neb modules, so it won't inherit open logfiles from neb modules.

references:

- ConSol-Monitoring/omd#146

Signed-off-by: Sven Nierlein <[email protected]>
  • Loading branch information
sni committed Jul 5, 2024
1 parent 22f0fb6 commit e842ca7
Show file tree
Hide file tree
Showing 6 changed files with 60 additions and 36 deletions.
8 changes: 8 additions & 0 deletions src/naemon/commands.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include "globals.h"
#include "logging.h"
#include "nm_alloc.h"
#include "query-handler.h"
#include "lib/libnaemon.h"
#include <string.h>
#include <sys/types.h>
Expand Down Expand Up @@ -388,6 +389,13 @@ int launch_command_file_worker(void)
/* make our own process-group so we can be traced into and stuff */
setpgid(0, 0);


// close inherited file handles
close_log_file();
close_standard_fds();
qh_close_socket();
close_lockfile_fd();

str = nm_strdup(command_file);
free_memory(get_global_macros());
command_file = str;
Expand Down
47 changes: 24 additions & 23 deletions src/naemon/naemon.c
Original file line number Diff line number Diff line change
Expand Up @@ -543,22 +543,6 @@ int main(int argc, char **argv)
nerd_init();
timing_point("Initialized NERD\n");

/* initialize check workers */
timing_point("Spawning %u workers\n", wproc_num_workers_spawned);
if (init_workers(num_check_workers) < 0) {
nm_log(NSLOG_RUNTIME_ERROR, "Failed to spawn workers. Aborting\n");
exit(EXIT_FAILURE);
}
timing_point("Spawned %u workers\n", wproc_num_workers_spawned);

timing_point("Connecting %u workers\n", wproc_num_workers_online);
i = 0;
while (i < 50 && wproc_num_workers_online < wproc_num_workers_spawned) {
iobroker_poll(nagios_iobs, 50);
i++;
}
timing_point("Connected %u workers\n", wproc_num_workers_online);

/* read in all object config data */
if (result == OK) {
timing_point("Reading all object data\n");
Expand All @@ -576,6 +560,29 @@ int main(int argc, char **argv)
init_event_queue();
timing_point("Initialized Event queue\n");

registered_commands_init(200);
register_core_commands();
/* fire up command file worker */
timing_point("Launching command file worker\n");
launch_command_file_worker();
timing_point("Launched command file worker\n");

/* initialize check workers */
timing_point("Spawning %u workers\n", wproc_num_workers_spawned);
if (init_workers(num_check_workers) < 0) {
nm_log(NSLOG_RUNTIME_ERROR, "Failed to spawn workers. Aborting\n");
exit(EXIT_FAILURE);
}
timing_point("Spawned %u workers\n", wproc_num_workers_spawned);

timing_point("Connecting %u workers\n", wproc_num_workers_online);
i = 0;
while (i < 50 && wproc_num_workers_online < wproc_num_workers_spawned) {
iobroker_poll(nagios_iobs, 50);
i++;
}
timing_point("Connected %u workers\n", wproc_num_workers_online);

/* load modules */
timing_point("Loading modules\n");
if (neb_load_all_modules() != OK) {
Expand Down Expand Up @@ -619,6 +626,7 @@ int main(int argc, char **argv)
broker_program_state(NEBTYPE_PROCESS_SHUTDOWN, NEBFLAG_PROCESS_INITIATED, NEBATTR_SHUTDOWN_ABNORMAL);

cleanup();
shutdown_command_file_worker();
exit(ERROR);
}

Expand Down Expand Up @@ -680,13 +688,6 @@ int main(int argc, char **argv)
log_service_states(INITIAL_STATES, NULL);
timing_point("Logged initial states\n");

registered_commands_init(200);
register_core_commands();
/* fire up command file worker */
timing_point("Launching command file worker\n");
launch_command_file_worker();
timing_point("Launched command file worker\n");

broker_program_state(NEBTYPE_PROCESS_EVENTLOOPSTART, NEBFLAG_NONE, NEBATTR_NONE);

/* get event start time and save as macro */
Expand Down
8 changes: 7 additions & 1 deletion src/naemon/query-handler.c
Original file line number Diff line number Diff line change
Expand Up @@ -394,7 +394,7 @@ int qh_init(const char *path)
result = iobroker_register(nagios_iobs, qh_listen_sock, NULL, qh_registration_input);
if (result < 0) {
g_hash_table_destroy(qh_table);
close(qh_listen_sock);
qh_close_socket();
nm_log(NSLOG_RUNTIME_ERROR, "qh: Failed to register socket with io broker: %s\n", iobroker_strerror(result));
return ERROR;
}
Expand All @@ -408,3 +408,9 @@ int qh_init(const char *path)

return 0;
}

void qh_close_socket() {
if( qh_listen_sock > 0 )
close(qh_listen_sock);
qh_listen_sock = -1;
}
1 change: 1 addition & 0 deletions src/naemon/query-handler.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ int qh_init(const char *path);
void qh_deinit(const char *path);
int qh_register_handler(const char *name, const char *description, unsigned int options, qh_handler handler);
const char *qh_strerror(int code);
void qh_close_socket(void);

NAGIOS_END_DECL

Expand Down
31 changes: 19 additions & 12 deletions src/naemon/utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,8 @@ int host_skip_check_dependency_status = DEFAULT_SKIP_CHECK_STATUS;

static long long check_file_size(char *, unsigned long, struct rlimit);

static int lock_file_fd = -1; /* the file handle of the lockfile */

time_t max_check_result_file_age = DEFAULT_MAX_CHECK_RESULT_AGE;

check_stats check_statistics[MAX_CHECK_STATS_TYPES];
Expand Down Expand Up @@ -498,7 +500,6 @@ int signal_parent(int sig)
int daemon_init(void)
{
int pid = 0;
int lockfile = 0;
int val = 0;
char buf[256];
struct flock lock;
Expand All @@ -509,16 +510,16 @@ int daemon_init(void)

umask(S_IWGRP | S_IWOTH);

lockfile = open(lock_file, O_RDWR | O_CREAT, S_IWUSR | S_IRUSR | S_IRGRP | S_IROTH);
lock_file_fd = open(lock_file, O_RDWR | O_CREAT, S_IWUSR | S_IRUSR | S_IRGRP | S_IROTH);

if (lockfile < 0) {
if (lock_file_fd < 0) {
nm_log(NSLOG_RUNTIME_ERROR, "Failed to obtain lock on file %s: %s\n", lock_file, strerror(errno));
nm_log(NSLOG_PROCESS_INFO | NSLOG_RUNTIME_ERROR, "Bailing out due to errors encountered while attempting to daemonize... (PID=%d)", (int)getpid());
return (ERROR);
}

/* see if we can read the contents of the lockfile */
if ((val = read(lockfile, buf, (size_t)10)) < 0) {
if ((val = read(lock_file_fd, buf, (size_t)10)) < 0) {
nm_log(NSLOG_RUNTIME_ERROR, "Lockfile exists but cannot be read");
return (ERROR);
}
Expand All @@ -540,7 +541,7 @@ int daemon_init(void)
lock.l_start = 0;
lock.l_whence = SEEK_SET;
lock.l_len = 0;
if (fcntl(lockfile, F_GETLK, &lock) == -1) {
if (fcntl(lock_file_fd, F_GETLK, &lock) == -1) {
nm_log(NSLOG_RUNTIME_ERROR, "Failed to access lockfile '%s'. %s. Bailing out...", lock_file, strerror(errno));
return (ERROR);
}
Expand Down Expand Up @@ -609,9 +610,9 @@ int daemon_init(void)
lock.l_whence = SEEK_SET;
lock.l_len = 0;
lock.l_pid = getpid();
if (fcntl(lockfile, F_SETLK, &lock) == -1) {
if (fcntl(lock_file_fd, F_SETLK, &lock) == -1) {
if (errno == EACCES || errno == EAGAIN) {
fcntl(lockfile, F_GETLK, &lock);
fcntl(lock_file_fd, F_GETLK, &lock);
nm_log(NSLOG_RUNTIME_ERROR, "Lockfile '%s' looks like its already held by another instance of Naemon (PID %d). Bailing out, post-fork...", lock_file, (int)lock.l_pid);
} else
nm_log(NSLOG_RUNTIME_ERROR, "Cannot lock lockfile '%s': %s. Bailing out...", lock_file, strerror(errno));
Expand All @@ -620,28 +621,34 @@ int daemon_init(void)
}

/* write PID to lockfile... */
lseek(lockfile, 0, SEEK_SET);
if (ftruncate(lockfile, 0) != 0) {
lseek(lock_file_fd, 0, SEEK_SET);
if (ftruncate(lock_file_fd, 0) != 0) {
nm_log(NSLOG_RUNTIME_ERROR, "Cannot truncate lockfile '%s': %s. Bailing out...", lock_file, strerror(errno));
return (ERROR);
}
sprintf(buf, "%d\n", (int)getpid());

if (nsock_write_all(lockfile, buf, strlen(buf)) != 0) {
if (nsock_write_all(lock_file_fd, buf, strlen(buf)) != 0) {
nm_log(NSLOG_RUNTIME_ERROR, "Cannot write PID to lockfile '%s': %s. Bailing out...", lock_file, strerror(errno));
return (ERROR);
}

/* make sure lock file stays open while program is executing... */
val = fcntl(lockfile, F_GETFD, 0);
val = fcntl(lock_file_fd, F_GETFD, 0);
val |= FD_CLOEXEC;
fcntl(lockfile, F_SETFD, val);
fcntl(lock_file_fd, F_SETFD, val);

broker_program_state(NEBTYPE_PROCESS_DAEMONIZE, NEBFLAG_NONE, NEBATTR_NONE);

return OK;
}

void close_lockfile_fd() {
if(lock_file_fd > 0)
close(lock_file_fd);
lock_file_fd = -1;
}

/******************************************************************/
/************************* FILE FUNCTIONS *************************/
/******************************************************************/
Expand Down
1 change: 1 addition & 0 deletions src/naemon/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ void signal_react(void); /* General signal reaction routines */
void handle_sigxfsz(void); /* handle SIGXFSZ */
int signal_parent(int); /* signal parent when daemonizing */
int daemon_init(void); /* switches to daemon mode */
void close_lockfile_fd(void); /* close lock_file file handle */

int init_check_stats(void);
int update_check_stats(int, time_t);
Expand Down

0 comments on commit e842ca7

Please sign in to comment.