Skip to content

Commit

Permalink
[mongo] add mongo recommended cluster monitors (#18858)
Browse files Browse the repository at this point in the history
* add mongo recommended monitors

* fix typo
  • Loading branch information
lu-zhengda authored Oct 17, 2024
1 parent 2eb2e02 commit 9772e77
Show file tree
Hide file tree
Showing 6 changed files with 147 additions and 4 deletions.
6 changes: 3 additions & 3 deletions mongo/assets/monitors/high_connections.json
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
{
"version": 2,
"created_at": "2020-08-05",
"last_updated_at": "2021-01-11",
"last_updated_at": "2024-10-16",
"title": "Connection pool is reaching saturation",
"tags": [
"integration:mongodb"
],
"description": "A connection pool helps reduce application latency and the number of times new connections are created. This monitor tracks the number of incoming connections to alert when the connection pool is near the saturation point.",
"definition": {
"message": "The number of incoming connections is reaching the maximum. {{value}} % of the available connections have been used on {{replset_name.name}}",
"message": "The number of incoming connections is reaching the maximum. {{value}} % of the available connections have been used on MongoDB Cluster {{clustername.name}} Replica Set {{replset_name.name}}",
"name": "[MongoDB] High incoming connections",
"options": {
"escalation_message": "",
Expand All @@ -26,7 +26,7 @@
},
"timeout_h": 0
},
"query": "avg(last_5m):100 * sum:mongodb.connections.current{*} by {replset_name} / ( sum:mongodb.connections.current{*} by {replset_name} + sum:mongodb.connections.available{*} by {replset_name} ) > 90",
"query": "avg(last_5m):100 * sum:mongodb.connections.current{*} by {clustername,replset_name} / ( sum:mongodb.connections.current{*} by {clustername,replset_name} + sum:mongodb.connections.available{*} by {clustername,replset_name} ) > 90",
"tags": [
"integration:mongodb"
],
Expand Down
35 changes: 35 additions & 0 deletions mongo/assets/monitors/high_fsstorage_usage.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
{
"version": 2,
"created_at": "2024-10-16",
"last_updated_at": "2024-10-16",
"title": "Used file system storage is reaching capacity",
"tags": [
"integration:mongodb"
],
"description": "This monitor tracks the used file system storage on a MongoDB server to alert when it is reaching capacity.",
"definition": {
"message": "The used file system storage is reaching capacity for database host {{database_instance.name}} on MongoDB Cluster {{clustername.name}}. {{value}} % of the total storage has been used.",
"name": "[MongoDB] High file system storage usage",
"options": {
"escalation_message": "",
"include_tags": true,
"locked": false,
"new_host_delay": 300,
"no_data_timeframe": null,
"notify_audit": false,
"notify_no_data": false,
"renotify_interval": "0",
"require_full_window": true,
"thresholds": {
"critical": 80,
"warning": 70
},
"timeout_h": 0
},
"query": "avg(last_60m):100 * avg:mongodb.stats.fsusedsize{*} by {clustername,database_instance} / avg:mongodb.stats.fstotalsize{*} by {clustername,database_instance} > 80",
"tags": [
"integration:mongodb"
],
"type": "query alert"
}
}
35 changes: 35 additions & 0 deletions mongo/assets/monitors/high_replication_lag.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
{
"version": 2,
"created_at": "2024-10-16",
"last_updated_at": "2024-10-16",
"title": "High replication lag",
"tags": [
"integration:mongodb"
],
"description": "This monitor tracks the replication lag on a MongoDB replica set to alert when it is high.",
"definition": {
"message": "MongoDB Cluster {{clustername.name}} member {{member.name}} replication lag is high. The replication lag is {{value}} seconds.",
"name": "[MongoDB] High replication lag",
"options": {
"escalation_message": "",
"include_tags": true,
"locked": false,
"new_host_delay": 300,
"no_data_timeframe": null,
"notify_audit": false,
"notify_no_data": false,
"renotify_interval": "0",
"require_full_window": true,
"thresholds": {
"critical": 120,
"warning": 60
},
"timeout_h": 0
},
"query": "avg(last_5m):100 * avg:mongodb.replset.optime_lag{*} by {clustername,member} > 120",
"tags": [
"integration:mongodb"
],
"type": "query alert"
}
}
35 changes: 35 additions & 0 deletions mongo/assets/monitors/low_oplog_window.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
{
"version": 2,
"created_at": "2024-10-16",
"last_updated_at": "2024-10-16",
"title": "Low oplog window",
"tags": [
"integration:mongodb"
],
"description": "This monitor tracks the oplog window on a MongoDB replica set to alert when it is insufficient.",
"definition": {
"message": "Oplog window for database host {{database_instance.name}} on MongoDB Cluster {{clustername.name}} is below the threshold. The oplog window is {{value}} seconds.",
"name": "[MongoDB] Low oplog window",
"options": {
"escalation_message": "",
"include_tags": true,
"locked": false,
"new_host_delay": 300,
"no_data_timeframe": null,
"notify_audit": false,
"notify_no_data": false,
"renotify_interval": "0",
"require_full_window": true,
"thresholds": {
"critical": 3600,
"warning": 7200
},
"timeout_h": 0
},
"query": "avg(last_60m):100 * avg:mongodb.oplog.timediff{*} by {clustername,database_instance} < 3600",
"tags": [
"integration:mongodb"
],
"type": "query alert"
}
}
34 changes: 34 additions & 0 deletions mongo/assets/monitors/unhealthy_repliset_member.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
{
"version": 2,
"created_at": "2024-10-16",
"last_updated_at": "2024-10-16",
"title": "Unhealthy replica set member",
"tags": [
"integration:mongodb"
],
"description": "This monitor tracks the health of a MongoDB replica set member to alert when it is unhealthy.",
"definition": {
"message": "MongoDB Cluster {{clustername.name}} replica set member {{database_instance.name}} is unhealthy.",
"name": "[MongoDB] Unhealthy replica set member",
"options": {
"escalation_message": "",
"include_tags": true,
"locked": false,
"new_host_delay": 300,
"no_data_timeframe": null,
"notify_audit": false,
"notify_no_data": false,
"renotify_interval": "0",
"require_full_window": true,
"thresholds": {
"critical": 1
},
"timeout_h": 0
},
"query": "max(last_5m):avg:mongodb.replset.health{*} by {clustername,database_instance} != 1",
"tags": [
"integration:mongodb"
],
"type": "query alert"
}
}
6 changes: 5 additions & 1 deletion mongo/manifest.json
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,11 @@
"mongodb": "assets/dashboards/overview.json"
},
"monitors": {
"Connection pool is reaching saturation": "assets/monitors/high_connections.json"
"Connection pool is reaching saturation": "assets/monitors/high_connections.json",
"High replication lag": "assets/monitors/high_replication_lag.json",
"Low oplog window": "assets/monitors/low_oplog_window.json",
"Unhealthy replica set member": "assets/monitors/unhealthy_repliset_member.json",
"Used file system storage is reaching capacity": "assets/monitors/high_fsstorage_usage.json"
},
"saved_views": {
"operations_by_type_overview": "assets/saved_views/operations_by_type_overview.json",
Expand Down

0 comments on commit 9772e77

Please sign in to comment.