From d770fbf012f3fbab5107fbb1f72fdad8ae917a25 Mon Sep 17 00:00:00 2001 From: Dev Ojha Date: Wed, 5 Jun 2024 13:55:59 -0500 Subject: [PATCH 1/2] backport remaining commits (cherry picked from commit 423f7b6149dfb458074ef4d156b608d16c793e52) --- .../3184-remove-PeerSendBytesTotal-metric.md | 2 + docs/explanation/core/metrics.md | 77 +++++++++++++++++++ p2p/metrics.gen.go | 7 -- p2p/metrics.go | 2 - p2p/peer.go | 5 -- 5 files changed, 79 insertions(+), 14 deletions(-) create mode 100644 .changelog/unreleased/breaking-changes/3184-remove-PeerSendBytesTotal-metric.md create mode 100644 docs/explanation/core/metrics.md diff --git a/.changelog/unreleased/breaking-changes/3184-remove-PeerSendBytesTotal-metric.md b/.changelog/unreleased/breaking-changes/3184-remove-PeerSendBytesTotal-metric.md new file mode 100644 index 0000000000..7368fe02cc --- /dev/null +++ b/.changelog/unreleased/breaking-changes/3184-remove-PeerSendBytesTotal-metric.md @@ -0,0 +1,2 @@ +`[p2p]` Remove `PeerSendBytesTotal` metric as it is costly to track, +and not that informative in debugging. ([\#3184](https://github.com/cometbft/cometbft/issues/3184)) \ No newline at end of file diff --git a/docs/explanation/core/metrics.md b/docs/explanation/core/metrics.md new file mode 100644 index 0000000000..245e82c810 --- /dev/null +++ b/docs/explanation/core/metrics.md @@ -0,0 +1,77 @@ +--- +order: 5 +--- + +# Metrics + +CometBFT can report and serve the Prometheus metrics, which in their turn can +be consumed by Prometheus collector(s). + +This functionality is disabled by default. + +To enable the Prometheus metrics, set `instrumentation.prometheus=true` in your +config file. Metrics will be served under `/metrics` on 26660 port by default. +Listen address can be changed in the config file (see +`instrumentation.prometheus\_listen\_addr`). + +## List of available metrics + +The following metrics are available: + +| **Name** | **Type** | **Tags** | **Description** | +|--------------------------------------------|-----------|------------------|--------------------------------------------------------------------------------------------------------------------------------------------| +| abci\_connection\_method\_timing\_seconds | Histogram | method, type | Timings for each of the ABCI methods | +| blocksync\_syncing | Gauge | | Either 0 (not block syncing) or 1 (syncing) | +| consensus\_height | Gauge | | Height of the chain | +| consensus\_validators | Gauge | | Number of validators | +| consensus\_validators\_power | Gauge | validator\_address | Total voting power of all validators | +| consensus\_validator\_power | Gauge | validator\_address | Voting power of the node if in the validator set | +| consensus\_validator\_last\_signed\_height | Gauge | validator\_address | Last height the node signed a block, if the node is a validator | +| consensus\_validator\_missed\_blocks | Gauge | | Total amount of blocks missed for the node, if the node is a validator | +| consensus\_missing\_validators | Gauge | | Number of validators who did not sign | +| consensus\_missing\_validators\_power | Gauge | | Total voting power of the missing validators | +| consensus\_byzantine\_validators | Gauge | | Number of validators who tried to double sign | +| consensus\_byzantine\_validators\_power | Gauge | | Total voting power of the byzantine validators | +| consensus\_block\_interval\_seconds | Histogram | | Time between this and last block (Block.Header.Time) in seconds | +| consensus\_rounds | Gauge | | Number of rounds | +| consensus\_num\_txs | Gauge | | Number of transactions | +| consensus\_total\_txs | Gauge | | Total number of transactions committed | +| consensus\_block\_parts | Counter | peer\_id | Number of blockparts transmitted by peer | +| consensus\_latest\_block\_height | Gauge | | /status sync\_info number | +| consensus\_block\_size\_bytes | Gauge | | Block size in bytes | +| consensus\_step\_duration\_seconds | Histogram | step | Histogram of durations for each step in the consensus protocol | +| consensus\_round\_duration\_seconds | Histogram | | Histogram of durations for all the rounds that have occurred since the process started | +| consensus\_block\_gossip\_parts\_received | Counter | matches\_current | Number of block parts received by the node | +| consensus\_quorum\_prevote\_delay | Gauge | proposer\_address | Interval in seconds between the proposal timestamp and the timestamp of the earliest prevote that achieved a quorum | +| consensus\_full\_prevote\_delay | Gauge | proposer\_address | Interval in seconds between the proposal timestamp and the timestamp of the latest prevote in a round where all validators voted | +| consensus\_vote\_extension\_receive\_count | Counter | status | Number of vote extensions received | +| consensus\_proposal\_receive\_count | Counter | status | Total number of proposals received by the node since process start | +| consensus\_proposal\_create\_count | Counter | | Total number of proposals created by the node since process start | +| consensus\_round\_voting\_power\_percent | Gauge | vote\_type | A value between 0 and 1.0 representing the percentage of the total voting power per vote type received within a round | +| consensus\_late\_votes | Counter | vote\_type | Number of votes received by the node since process start that correspond to earlier heights and rounds than this node is currently in. | +| consensus\_duplicate\_vote | Counter | | Number of times we received a duplicate vote. | +| consensus\_duplicate\_block\_part | Counter | | Number of times we received a duplicate block part. | +| consensus\_proposal\_timestamp\_difference | Histogram | is\_timely | Difference between the timestamp in the proposal message and the local time of the validator at the time it received the message. | +| p2p\_message\_send\_bytes\_total | Counter | message\_type | Number of bytes sent to all peers per message type | +| p2p\_message\_receive\_bytes\_total | Counter | message\_type | Number of bytes received from all peers per message type | +| p2p\_peers | Gauge | | Number of peers node's connected to | +| p2p\_peer\_receive\_bytes\_total | Counter | peer\_id, chID | Number of bytes per channel received from a given peer | +| p2p\_peer\_pending\_send\_bytes | Gauge | peer\_id | Number of pending bytes to be sent to a given peer | +| p2p\_num\_txs | Gauge | peer\_id | Number of transactions submitted by each peer\_id | +| p2p\_pending\_send\_bytes | Gauge | peer\_id | Amount of data pending to be sent to peer | +| mempool\_size | Gauge | | Number of uncommitted transactions | +| mempool\_tx\_size\_bytes | Histogram | | Transaction sizes in bytes | +| mempool\_failed\_txs | Counter | | Number of failed transactions | +| mempool\_recheck\_times | Counter | | Number of transactions rechecked in the mempool | +| state\_block\_processing\_time | Histogram | | Time spent processing FinalizeBlock in ms | +| state\_consensus\_param\_updates | Counter | | Number of consensus parameter updates returned by the application since process start | +| state\_validator\_set\_updates | Counter | | Number of validator set updates returned by the application since process start | +| statesync\_syncing | Gauge | | Either 0 (not state syncing) or 1 (syncing) | + +## Useful queries + +Percentage of missing + byzantine validators: + +```md +((consensus\_byzantine\_validators\_power + consensus\_missing\_validators\_power) / consensus\_validators\_power) * 100 +``` diff --git a/p2p/metrics.gen.go b/p2p/metrics.gen.go index e452f16535..bdd4fda872 100644 --- a/p2p/metrics.gen.go +++ b/p2p/metrics.gen.go @@ -26,12 +26,6 @@ func PrometheusMetrics(namespace string, labelsAndValues ...string) *Metrics { Name: "peer_receive_bytes_total", Help: "Number of bytes received from a given peer.", }, append(labels, "peer_id", "chID")).With(labelsAndValues...), - PeerSendBytesTotal: prometheus.NewCounterFrom(stdprometheus.CounterOpts{ - Namespace: namespace, - Subsystem: MetricsSubsystem, - Name: "peer_send_bytes_total", - Help: "Number of bytes sent to a given peer.", - }, append(labels, "peer_id", "chID")).With(labelsAndValues...), PeerPendingSendBytes: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ Namespace: namespace, Subsystem: MetricsSubsystem, @@ -63,7 +57,6 @@ func NopMetrics() *Metrics { return &Metrics{ Peers: discard.NewGauge(), PeerReceiveBytesTotal: discard.NewCounter(), - PeerSendBytesTotal: discard.NewCounter(), PeerPendingSendBytes: discard.NewGauge(), NumTxs: discard.NewGauge(), MessageReceiveBytesTotal: discard.NewCounter(), diff --git a/p2p/metrics.go b/p2p/metrics.go index 808142e9af..0f979a01bf 100644 --- a/p2p/metrics.go +++ b/p2p/metrics.go @@ -30,8 +30,6 @@ type Metrics struct { Peers metrics.Gauge // Number of bytes received from a given peer. PeerReceiveBytesTotal metrics.Counter `metrics_labels:"peer_id,chID"` - // Number of bytes sent to a given peer. - PeerSendBytesTotal metrics.Counter `metrics_labels:"peer_id,chID"` // Pending bytes to be sent to a given peer. PeerPendingSendBytes metrics.Gauge `metrics_labels:"peer_id"` // Number of transactions submitted by each peer. diff --git a/p2p/peer.go b/p2p/peer.go index 60b3fc48e5..16ab86280c 100644 --- a/p2p/peer.go +++ b/p2p/peer.go @@ -284,11 +284,6 @@ func (p *peer) send(chID byte, msg proto.Message, sendFunc func(byte, []byte) bo } res := sendFunc(chID, msgBytes) if res { - labels := []string{ - "peer_id", string(p.ID()), - "chID", fmt.Sprintf("%#x", chID), - } - p.metrics.PeerSendBytesTotal.With(labels...).Add(float64(len(msgBytes))) p.metrics.MessageSendBytesTotal.With("message_type", metricLabelValue).Add(float64(len(msgBytes))) } return res From 9dc0c4beec4ad6930fed265ac0d7cee0dbdcbe1c Mon Sep 17 00:00:00 2001 From: Dev Ojha Date: Wed, 5 Jun 2024 13:59:20 -0500 Subject: [PATCH 2/2] add changelog (cherry picked from commit 944a8e50342c45b46acb2745ea6f514127b4b458) --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 67e26de415..eb374cd934 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,8 @@ * [#99](https://github.com/osmosis-labs/cometbft/pull/99) perf(consensus): Reuse an internal buffer for block building (#3162) * [#101](https://github.com/osmosis-labs/cometbft/pull/101) perf(consensus): Run broadcast routines out of process (speeds up consensus mutex) #3180 * [#102](https://github.com/osmosis-labs/cometbft/pull/102) perf(p2p): Remove broadcast return channel #3182 +* [#105](https://github.com/osmosis-labs/cometbft/pull/105) perf(p2p)!: Remove PeerSendBytesTotal metric #3184 + ## v0.37.4-v25-osmo-5