Skip to content

Commit

Permalink
Add election leader idle issue and graphql (#2150)
Browse files Browse the repository at this point in the history
  • Loading branch information
yngvar-antonsson authored Nov 8, 2023
1 parent 0436c7d commit b5c946c
Show file tree
Hide file tree
Showing 7 changed files with 84 additions and 11 deletions.
8 changes: 8 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,14 @@ and this project adheres to
Unreleased
-------------------------------------------------------------------------------

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Added
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

- ``election_leader_idle`` field to GraphQL API.

- new issue when ``box.info.election.leader_idle`` is too high.

-------------------------------------------------------------------------------
[2.8.4] - 2023-10-31
-------------------------------------------------------------------------------
Expand Down
29 changes: 27 additions & 2 deletions cartridge/issues.lua
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,11 @@
--
-- * warning: "Instance ... has spaces with deprecated format: space1, ..."
--
-- Raft issues:
--
-- * warning: "Raft leader idle is 10.000 on ... .
-- Is raft leader alive and connection is healthy?"
--
-- Custom issues (defined by user):
--
-- * Custom roles can announce more issues with their own level, topic
Expand Down Expand Up @@ -203,8 +208,8 @@ local function list_on_instance(opts)
}
table.insert(ret, issue)
end

for _, replication_info in pairs(box.info.replication) do
local box_info = box.info
for _, replication_info in pairs(box_info.replication) do
local replica = enabled_servers[replication_info.uuid]
if replica == nil then
goto continue
Expand Down Expand Up @@ -292,6 +297,26 @@ local function list_on_instance(opts)
::continue::
end

if box_info.election then
local leader_idle = box_info.election.leader_idle
if leader_idle ~= nil
and leader_idle >= 4 * box.cfg.replication_timeout then
local issue = {
level = 'warning',
topic = 'raft',
replicaset_uuid = replicaset_uuid,
instance_uuid = instance_uuid,
message = string.format(
"Raft leader idle is %f on %s. "..
"Is raft leader alive and connection is healthy?",
leader_idle,
instance_uuid
)
}
table.insert(ret, issue)
end
end

local failover_error = failover.get_error()
if failover_error ~= nil then
table.insert(ret, {
Expand Down
1 change: 1 addition & 0 deletions cartridge/lua-api/boxinfo.lua
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,7 @@ local function get_info(uri)
ro = box_info.ro,
ro_reason = box_info.ro_reason,
election_state = box_info.election and box_info.election.state,
election_leader_idle = box_info.election and box_info.election.leader_idle,
election_mode = box.cfg.election_mode or "off",
synchro_queue_owner = box_info.synchro and box_info.synchro.queue.owner or 0,
},
Expand Down
5 changes: 4 additions & 1 deletion cartridge/webui/gql-boxinfo.lua
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,10 @@ local boxinfo_schema = {
kind = gql_types.string,
description = 'Current read-only state reason',
},

election_leader_idle = {
kind = gql_types.float,
description = 'Leader idle value in seconds',
},
election_state = {
kind = gql_types.string,
description = 'State after Raft leader election',
Expand Down
17 changes: 10 additions & 7 deletions doc/schema.graphql
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# source: http://127.0.0.1:8081/admin/api
# timestamp: Fri Mar 17 2023 16:41:38 GMT+0300 (Москва, стандартное время)
# timestamp: Fri Nov 03 2023 16:54:14 GMT+0300 (Moscow Standard Time)

"""Custom scalar specification."""
directive @specifiedBy(
Expand Down Expand Up @@ -747,27 +747,30 @@ type ServerInfoGeneral {
"""State after Raft leader election"""
election_state: String

"""The number of seconds since the instance started"""
uptime: Float!

"""
The maximum number of threads to use during execution of certain internal
processes (currently socket.getaddrinfo() and coio_call())
"""
worker_pool_threads: Int

"""The number of seconds since the instance started"""
uptime: Float!

"""The UUID of the replica set"""
replicaset_uuid: String!

"""Current working directory of a process"""
work_dir: String

"""A directory where write-ahead log (.xlog) files are stored"""
wal_dir: String

"""Current working directory of a process"""
work_dir: String

"""Current read-only state reason"""
ro_reason: String

"""Leader idle value in seconds"""
election_leader_idle: Float

"""Id of current queue owner"""
synchro_queue_owner: Int!

Expand Down
5 changes: 4 additions & 1 deletion test/integration/api_edit_test.lua
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,9 @@ local function test_all_rw(all_rw)
servers {
uuid
boxinfo {
general { ro ro_reason election_state election_mode synchro_queue_owner}
general { ro ro_reason election_leader_idle
election_state election_mode synchro_queue_owner
}
}
}
master {
Expand All @@ -292,6 +294,7 @@ local function test_all_rw(all_rw)

if helpers.tarantool_version_ge('2.10.0') then
t.assert_equals(srv['boxinfo']['general']['election_state'], 'follower')
t.assert_equals(srv['boxinfo']['general']['election_leader_idle'], box.NULL)
t.assert_equals(srv['boxinfo']['general']['ro_reason'], box.NULL)
end
else
Expand Down
30 changes: 30 additions & 0 deletions test/integration/issues_test.lua
Original file line number Diff line number Diff line change
Expand Up @@ -419,6 +419,36 @@ function g.test_state_hangs()
t.assert_equals(helpers.list_cluster_issues(g.master), {})
end

g.before_test('test_election_leader_high_idle', function()
t.skip_if(not helpers.tarantool_version_ge('2.10.0'), 'leader_idle is not supported')
g.master:exec(function()
rawset(_G, 'old_info_election', box.info.election)
local elect = table.deepcopy(_G.old_info_election)
elect.leader_idle = 10
rawset(box.info, 'election', elect)
end)
end)

function g.test_election_leader_high_idle()
t.assert_items_equals(helpers.list_cluster_issues(g.master), {
{
level = 'warning',
topic = 'raft',
message = ("Raft leader idle is 10.000000 on %s. "..
"Is raft leader alive and connection is healthy?"):
format(g.master.instance_uuid),
instance_uuid = g.master.instance_uuid,
replicaset_uuid = g.master.replicaset_uuid,
},
})
end

g.after_test('test_election_leader_high_idle', function()
g.master:exec(function()
rawset(box.info, 'election', _G.old_info_election)
end)
end)

function g.test_aliens()
g.alien:start()

Expand Down

0 comments on commit b5c946c

Please sign in to comment.