Skip to content

Commit

Permalink
Fix reload 100% CPU spinning after failed reload
Browse files Browse the repository at this point in the history
After a failed reload, some UDB pointers were cleared that were still linked
in the UDB. This fix unlinks them in all the correct places.

This may be the fix to issue #417
  • Loading branch information
wtoorop committed Jan 16, 2025
1 parent 2109449 commit 5766307
Showing 1 changed file with 31 additions and 7 deletions.
38 changes: 31 additions & 7 deletions server.c
Original file line number Diff line number Diff line change
Expand Up @@ -2808,6 +2808,13 @@ server_main(struct nsd *nsd)
netio_remove_handler(netio, &reload_listener);
reload_listener.fd = -1;
reload_listener.event_types = NETIO_EVENT_NONE;
/* "load" failed a apply_xfr task.
* "old-main" will become "main" and
* MUST unlink last_task and xfrs2process.
* see: UNLINKING last_task and xfrs2process
*/
udb_ptr_unlink(&last_task, nsd->task[nsd->mytask]);
udb_ptr_unlink(&xfrs2process, nsd->task[nsd->mytask]);
task_process_sync(nsd->task[nsd->mytask]);
/* inform xfrd reload attempt ended */
if(!write_socket(nsd->xfrd_listener->fd,
Expand Down Expand Up @@ -2912,6 +2919,13 @@ server_main(struct nsd *nsd)
netio_remove_handler(netio, &reload_listener);
reload_listener.fd = -1;
reload_listener.event_types = NETIO_EVENT_NONE;
/* "load" failed a apply_xfr task.
* "old-main" will become "main" and
* MUST unlink last_task and xfrs2process.
* see: UNLINKING last_task and xfrs2process
*/
udb_ptr_unlink(&last_task, nsd->task[nsd->mytask]);
udb_ptr_unlink(&xfrs2process, nsd->task[nsd->mytask]);
task_process_sync(nsd->task[nsd->mytask]);
/* inform xfrd reload attempt ended */
if(!write_socket(nsd->xfrd_listener->fd,
Expand Down Expand Up @@ -2971,13 +2985,30 @@ server_main(struct nsd *nsd)
task_remap(nsd->task[nsd->mytask]);
udb_ptr_init(&xfrs2process, nsd->task[nsd->mytask]);
udb_ptr_init(&last_task , nsd->task[nsd->mytask]);
/* UNLINKING last_task and xfrs2process
* ------------------------------------
* last_task and xfrs2processwill be unlinked when
* the "load"process processed all tasks.
*
* If the "load" process fails, because of failed fork()
* or due to failed task_process_apply_xfr(), then the
* process that will take over the "main" role ("load"
* on failed fork() and "old-main" on failed
* process_apply_xfr() MUST unlink these udb_ptr's.
*/
reload_process_non_xfr_tasks(nsd, &xfrs2process
, &last_task);
/* Do actual reload */
reload_pid = fork();
switch (reload_pid) {
case -1:
log_msg(LOG_ERR, "fork failed: %s", strerror(errno));
/* fork() failed. "load" will become "main" and
* MUST unlink last_task and xfrs2process.
* see: UNLINKING last_task and xfrs2process
*/
udb_ptr_unlink(&last_task, nsd->task[nsd->mytask]);
udb_ptr_unlink(&xfrs2process, nsd->task[nsd->mytask]);
break;
default:
/* PARENT */
Expand Down Expand Up @@ -3029,13 +3060,6 @@ server_main(struct nsd *nsd)
log_set_process_role("main");
#endif
}
/* xfrs2process and last_task need to be reset in case
* "old-main" becomes "main" (due to an failed (exited)
* xfr update). If needed xfrs2process gets unlinked by
* "load", and last_task by the xfrd.
*/
memset(&xfrs2process, 0, sizeof(xfrs2process));
memset(&last_task, 0, sizeof(last_task));
break;
case NSD_QUIT_SYNC:
/* synchronisation of xfrd, parent and reload */
Expand Down

0 comments on commit 5766307

Please sign in to comment.