From caf499da64a96513f2cb0ff1f442d97df4d03dc3 Mon Sep 17 00:00:00 2001 From: Lymarenko Lev Date: Tue, 18 Jul 2023 19:52:41 +0300 Subject: [PATCH] Fix fill missing point bug --- stats/stats-server/src/read_service.rs | 14 +- stats/stats-server/src/serializers.rs | 179 +----------- .../stats/src/charts/lines/gas_used_growth.rs | 31 ++- stats/stats/src/charts/lines/new_blocks.rs | 8 +- stats/stats/src/charts/lines/new_txns.rs | 41 ++- stats/stats/src/charts/updater/dependent.rs | 2 +- stats/stats/src/lib.rs | 1 + stats/stats/src/missing_date.rs | 259 ++++++++++++++++++ stats/stats/src/read.rs | 12 +- stats/stats/src/tests/simple_test.rs | 34 ++- 10 files changed, 380 insertions(+), 201 deletions(-) create mode 100644 stats/stats/src/missing_date.rs diff --git a/stats/stats-server/src/read_service.rs b/stats/stats-server/src/read_service.rs index f596cc839..79098d492 100644 --- a/stats/stats-server/src/read_service.rs +++ b/stats/stats-server/src/read_service.rs @@ -1,7 +1,4 @@ -use crate::{ - charts::Charts, - serializers::{fill_missing_points, serialize_line_points}, -}; +use crate::{charts::Charts, serializers::serialize_line_points}; use async_trait::async_trait; use chrono::NaiveDate; use sea_orm::{DatabaseConnection, DbErr}; @@ -28,7 +25,10 @@ impl ReadService { fn map_read_error(err: ReadError) -> Status { match &err { ReadError::NotFound(_) => tonic::Status::not_found(err.to_string()), - _ => tonic::Status::internal(err.to_string()), + _ => { + tracing::error!(err = ?err, "internal read error"); + tonic::Status::internal(err.to_string()) + } } } @@ -87,9 +87,9 @@ impl StatsService for ReadService { .from .and_then(|date| NaiveDate::from_str(&date).ok()); let to = request.to.and_then(|date| NaiveDate::from_str(&date).ok()); - let mut data = stats::get_chart_data(&self.db, &request.name, from, to) + let policy = Some(chart_info.chart.missing_date_policy()); + let mut data = stats::get_chart_data(&self.db, &request.name, from, to, policy) .await - .map(|data| fill_missing_points(data, chart_info.chart.missing_date_policy(), from, to)) .map_err(map_read_error)?; if chart_info.chart.drop_last_point() { diff --git a/stats/stats-server/src/serializers.rs b/stats/stats-server/src/serializers.rs index a0ea09bdd..c5ee9eb6e 100644 --- a/stats/stats-server/src/serializers.rs +++ b/stats/stats-server/src/serializers.rs @@ -1,5 +1,4 @@ -use chrono::{Duration, NaiveDate}; -use stats::{DateValue, MissingDatePolicy}; +use stats::DateValue; use stats_proto::blockscout::stats::v1::Point; pub fn serialize_line_points(data: Vec) -> Vec { @@ -10,179 +9,3 @@ pub fn serialize_line_points(data: Vec) -> Vec { }) .collect() } - -pub fn fill_missing_points( - data: Vec, - policy: MissingDatePolicy, - from: Option, - to: Option, -) -> Vec { - let from = from.or(data.first().map(|v| v.date)); - let to = to.or(data.last().map(|v| v.date)); - let (from, to) = match (from, to) { - (Some(from), Some(to)) if from <= to => (from, to), - _ => return data, - }; - - match policy { - MissingDatePolicy::FillZero => fill_zeros(data, from, to), - MissingDatePolicy::FillPrevious => fill_previous(data, from, to), - } -} - -fn fill_zeros(data: Vec, from: NaiveDate, to: NaiveDate) -> Vec { - let n = (to - from).num_days() as usize; - let mut new_data: Vec = Vec::with_capacity(n); - - let mut current_date = from; - let mut i = 0; - while current_date <= to { - let maybe_value_exists = data.get(i).filter(|&v| v.date.eq(¤t_date)); - - let value = match maybe_value_exists { - Some(value) => { - i += 1; - value.clone() - } - None => DateValue::zero(current_date), - }; - new_data.push(value); - current_date += Duration::days(1); - } - - new_data -} - -fn fill_previous(data: Vec, from: NaiveDate, to: NaiveDate) -> Vec { - let n = (to - from).num_days() as usize; - let mut new_data: Vec = Vec::with_capacity(n); - let mut current_date = from; - let mut i = 0; - while current_date <= to { - let maybe_value_exists = data.get(i).filter(|&v| v.date.eq(¤t_date)); - let value = match maybe_value_exists { - Some(value) => { - i += 1; - value.clone() - } - None => new_data - .last() - .map(|value| DateValue { - date: current_date, - value: value.value.clone(), - }) - .unwrap_or_else(|| DateValue::zero(current_date)), - }; - new_data.push(value); - current_date += Duration::days(1); - } - new_data -} - -#[cfg(test)] -mod tests { - use super::*; - use chrono::NaiveDate; - use pretty_assertions::assert_eq; - use stats::DateValue; - - fn d(date: &str) -> NaiveDate { - date.parse().unwrap() - } - fn v(date: &str, value: &str) -> DateValue { - DateValue { - date: d(date), - value: value.to_string(), - } - } - - #[test] - fn fill_zeros_works() { - for (data, expected, from, to) in [ - (vec![], vec![], None, None), - (vec![], vec![], Some(d("2100-01-01")), None), - ( - vec![], - vec![v("2100-01-01", "0")], - Some(d("2100-01-01")), - Some(d("2100-01-01")), - ), - ( - vec![v("2022-01-01", "01")], - vec![v("2022-01-01", "01")], - Some(d("2022-01-01")), - Some(d("2022-01-01")), - ), - ( - vec![ - v("2022-08-20", "20"), - v("2022-08-22", "22"), - v("2022-08-23", "23"), - v("2022-08-25", "25"), - ], - vec![ - v("2022-08-18", "0"), - v("2022-08-19", "0"), - v("2022-08-20", "20"), - v("2022-08-21", "0"), - v("2022-08-22", "22"), - v("2022-08-23", "23"), - v("2022-08-24", "0"), - v("2022-08-25", "25"), - v("2022-08-26", "0"), - v("2022-08-27", "0"), - ], - Some(d("2022-08-18")), - Some(d("2022-08-27")), - ), - ] { - let actual = fill_missing_points(data, MissingDatePolicy::FillZero, from, to); - assert_eq!(expected, actual) - } - } - - #[test] - fn fill_previous_works() { - for (data, expected, from, to) in [ - (vec![], vec![], None, None), - (vec![], vec![], Some(d("2100-01-01")), None), - ( - vec![], - vec![v("2100-01-01", "0")], - Some(d("2100-01-01")), - Some(d("2100-01-01")), - ), - ( - vec![v("2022-01-01", "01")], - vec![v("2022-01-01", "01")], - Some(d("2022-01-01")), - Some(d("2022-01-01")), - ), - ( - vec![ - v("2022-08-20", "20"), - v("2022-08-22", "22"), - v("2022-08-23", "23"), - v("2022-08-25", "25"), - ], - vec![ - v("2022-08-18", "0"), - v("2022-08-19", "0"), - v("2022-08-20", "20"), - v("2022-08-21", "20"), - v("2022-08-22", "22"), - v("2022-08-23", "23"), - v("2022-08-24", "23"), - v("2022-08-25", "25"), - v("2022-08-26", "25"), - v("2022-08-27", "25"), - ], - Some(d("2022-08-18")), - Some(d("2022-08-27")), - ), - ] { - let actual = fill_missing_points(data, MissingDatePolicy::FillPrevious, from, to); - assert_eq!(expected, actual); - } - } -} diff --git a/stats/stats/src/charts/lines/gas_used_growth.rs b/stats/stats/src/charts/lines/gas_used_growth.rs index 51db86f64..4be51cf27 100644 --- a/stats/stats/src/charts/lines/gas_used_growth.rs +++ b/stats/stats/src/charts/lines/gas_used_growth.rs @@ -101,9 +101,8 @@ impl crate::Chart for GasUsedGrowth { #[cfg(test)] mod tests { - use crate::tests::simple_test::simple_test_chart; - use super::GasUsedGrowth; + use crate::tests::simple_test::{ranged_test_chart, simple_test_chart}; #[tokio::test] #[ignore = "needs database to run"] @@ -125,4 +124,32 @@ mod tests { ) .await; } + + #[tokio::test] + #[ignore = "needs database to run"] + async fn ranged_update_gas_used_growth() { + let chart = GasUsedGrowth::default(); + let value_2022_11_12 = "250680"; + ranged_test_chart( + "ranged_update_gas_used_growth", + chart, + vec![ + ("2022-11-20", value_2022_11_12), + ("2022-11-21", value_2022_11_12), + ("2022-11-22", value_2022_11_12), + ("2022-11-23", value_2022_11_12), + ("2022-11-24", value_2022_11_12), + ("2022-11-25", value_2022_11_12), + ("2022-11-26", value_2022_11_12), + ("2022-11-27", value_2022_11_12), + ("2022-11-28", value_2022_11_12), + ("2022-11-29", value_2022_11_12), + ("2022-11-30", value_2022_11_12), + ("2022-12-01", "288350"), + ], + "2022-11-20".parse().unwrap(), + "2022-12-01".parse().unwrap(), + ) + .await; + } } diff --git a/stats/stats/src/charts/lines/new_blocks.rs b/stats/stats/src/charts/lines/new_blocks.rs index 82d300dde..2904f11f9 100644 --- a/stats/stats/src/charts/lines/new_blocks.rs +++ b/stats/stats/src/charts/lines/new_blocks.rs @@ -115,7 +115,7 @@ mod tests { // Note that update is not full, therefore there is no entry with date `2022-11-09` updater.update(&db, &blockscout, false).await.unwrap(); - let data = get_chart_data(&db, updater.name(), None, None) + let data = get_chart_data(&db, updater.name(), None, None, None) .await .unwrap(); let expected = vec![ @@ -136,7 +136,7 @@ mod tests { // note that update is full, therefore there is entry with date `2022-11-09` updater.update(&db, &blockscout, true).await.unwrap(); - let data = get_chart_data(&db, updater.name(), None, None) + let data = get_chart_data(&db, updater.name(), None, None, None) .await .unwrap(); let expected = vec![ @@ -171,7 +171,7 @@ mod tests { updater.create(&db).await.unwrap(); updater.update(&db, &blockscout, true).await.unwrap(); - let data = get_chart_data(&db, updater.name(), None, None) + let data = get_chart_data(&db, updater.name(), None, None, None) .await .unwrap(); let expected = vec![ @@ -243,7 +243,7 @@ mod tests { .unwrap(); updater.update(&db, &blockscout, false).await.unwrap(); - let data = get_chart_data(&db, updater.name(), None, None) + let data = get_chart_data(&db, updater.name(), None, None, None) .await .unwrap(); let expected = vec![ diff --git a/stats/stats/src/charts/lines/new_txns.rs b/stats/stats/src/charts/lines/new_txns.rs index cc176b786..e082648f8 100644 --- a/stats/stats/src/charts/lines/new_txns.rs +++ b/stats/stats/src/charts/lines/new_txns.rs @@ -78,7 +78,7 @@ impl crate::Chart for NewTxns { #[cfg(test)] mod tests { use super::NewTxns; - use crate::tests::simple_test::simple_test_chart; + use crate::tests::simple_test::{ranged_test_chart, simple_test_chart}; #[tokio::test] #[ignore = "needs database to run"] @@ -100,4 +100,43 @@ mod tests { ) .await; } + + #[tokio::test] + #[ignore = "needs database to run"] + async fn ranged_update_new_txns() { + let chart = NewTxns::default(); + ranged_test_chart( + "ranged_update_new_txns", + chart, + vec![ + ("2022-11-08", "0"), + ("2022-11-09", "5"), + ("2022-11-10", "12"), + ("2022-11-11", "14"), + ("2022-11-12", "5"), + ("2022-11-13", "0"), + ("2022-11-14", "0"), + ("2022-11-15", "0"), + ("2022-11-16", "0"), + ("2022-11-17", "0"), + ("2022-11-18", "0"), + ("2022-11-19", "0"), + ("2022-11-20", "0"), + ("2022-11-21", "0"), + ("2022-11-22", "0"), + ("2022-11-23", "0"), + ("2022-11-24", "0"), + ("2022-11-25", "0"), + ("2022-11-26", "0"), + ("2022-11-27", "0"), + ("2022-11-28", "0"), + ("2022-11-29", "0"), + ("2022-11-30", "0"), + ("2022-12-01", "5"), + ], + "2022-11-08".parse().unwrap(), + "2022-12-01".parse().unwrap(), + ) + .await; + } } diff --git a/stats/stats/src/charts/updater/dependent.rs b/stats/stats/src/charts/updater/dependent.rs index 57627d6bf..d722c4390 100644 --- a/stats/stats/src/charts/updater/dependent.rs +++ b/stats/stats/src/charts/updater/dependent.rs @@ -32,7 +32,7 @@ where "updating parent" ); parent.update_with_mutex(db, blockscout, force_full).await?; - let data = get_chart_data(db, parent.name(), None, None).await?; + let data = get_chart_data(db, parent.name(), None, None, None).await?; Ok(data) } diff --git a/stats/stats/src/lib.rs b/stats/stats/src/lib.rs index 2e5d01e42..c77c6ede0 100644 --- a/stats/stats/src/lib.rs +++ b/stats/stats/src/lib.rs @@ -1,4 +1,5 @@ mod charts; +mod missing_date; mod read; pub mod metrics; diff --git a/stats/stats/src/missing_date.rs b/stats/stats/src/missing_date.rs new file mode 100644 index 000000000..e4c4cd7ca --- /dev/null +++ b/stats/stats/src/missing_date.rs @@ -0,0 +1,259 @@ +use crate::{DateValue, MissingDatePolicy}; +use chrono::{Duration, NaiveDate}; +use entity::chart_data; +use sea_orm::{prelude::*, sea_query::Expr, QueryOrder, QuerySelect}; + +pub async fn get_and_fill_chart( + db: &DatabaseConnection, + chart_id: i32, + from: Option, + to: Option, + policy: MissingDatePolicy, +) -> Result, DbErr> { + let mut data_request = chart_data::Entity::find() + .select_only() + .column(chart_data::Column::Date) + .column(chart_data::Column::Value) + .filter(chart_data::Column::ChartId.eq(chart_id)) + .order_by_asc(chart_data::Column::Date); + + if let Some(from) = from { + let custom_where = Expr::cust_with_values::( + "date >= (SELECT COALESCE(MAX(date), '1900-01-01'::date) FROM chart_data WHERE chart_id = $1 AND date <= $2)", + [chart_id.into(), from.into()], + ); + QuerySelect::query(&mut data_request).cond_where(custom_where); + } + if let Some(to) = to { + let custom_where = Expr::cust_with_values::( + "date <= (SELECT COALESCE(MIN(date), '9999-12-31'::date) FROM chart_data WHERE chart_id = $1 AND date >= $2)", + [chart_id.into(), to.into()], + ); + QuerySelect::query(&mut data_request).cond_where(custom_where); + }; + + let data_with_extra = data_request.into_model().all(db).await?; + let data_filled = fill_missing_points(data_with_extra, policy, from, to); + let data = filter_within_range(data_filled, from, to); + Ok(data) +} + +pub fn fill_missing_points( + data: Vec, + policy: MissingDatePolicy, + from: Option, + to: Option, +) -> Vec { + let from = vec![from.as_ref(), data.first().map(|v| &v.date)] + .into_iter() + .flatten() + .min(); + let to = vec![to.as_ref(), data.last().map(|v| &v.date)] + .into_iter() + .flatten() + .max(); + let (from, to) = match (from, to) { + (Some(from), Some(to)) if from <= to => (from.to_owned(), to.to_owned()), + _ => return data, + }; + + match policy { + MissingDatePolicy::FillZero => fill_zeros(data, from, to), + MissingDatePolicy::FillPrevious => fill_previous(data, from, to), + } +} + +fn fill_zeros(data: Vec, from: NaiveDate, to: NaiveDate) -> Vec { + let n = (to - from).num_days() as usize; + let mut new_data: Vec = Vec::with_capacity(n); + + let mut current_date = from; + let mut i = 0; + while current_date <= to { + let maybe_value_exists = data.get(i).filter(|&v| v.date.eq(¤t_date)); + + let value = match maybe_value_exists { + Some(value) => { + i += 1; + value.clone() + } + None => DateValue::zero(current_date), + }; + new_data.push(value); + current_date += Duration::days(1); + } + + new_data +} + +fn fill_previous(data: Vec, from: NaiveDate, to: NaiveDate) -> Vec { + let n = (to - from).num_days() as usize; + let mut new_data: Vec = Vec::with_capacity(n); + let mut current_date = from; + let mut i = 0; + while current_date <= to { + let maybe_value_exists = data.get(i).filter(|&v| v.date.eq(¤t_date)); + let value = match maybe_value_exists { + Some(value) => { + i += 1; + value.clone() + } + None => new_data + .last() + .map(|value| DateValue { + date: current_date, + value: value.value.clone(), + }) + .unwrap_or_else(|| DateValue::zero(current_date)), + }; + new_data.push(value); + current_date += Duration::days(1); + } + new_data +} + +fn filter_within_range( + data: Vec, + maybe_from: Option, + maybe_to: Option, +) -> Vec { + let is_within_range = |v: &DateValue| -> bool { + if let Some(from) = maybe_from { + if v.date < from { + return false; + } + } + if let Some(to) = maybe_to { + if v.date > to { + return false; + } + } + true + }; + + data.into_iter().filter(is_within_range).collect() +} + +#[cfg(test)] +mod tests { + use super::*; + use chrono::NaiveDate; + use pretty_assertions::assert_eq; + + fn d(date: &str) -> NaiveDate { + date.parse().unwrap() + } + fn v(date: &str, value: &str) -> DateValue { + DateValue { + date: d(date), + value: value.to_string(), + } + } + + #[test] + fn fill_zeros_works() { + for (data, expected, from, to) in [ + (vec![], vec![], None, None), + (vec![], vec![], Some(d("2100-01-01")), None), + ( + vec![], + vec![v("2100-01-01", "0")], + Some(d("2100-01-01")), + Some(d("2100-01-01")), + ), + ( + vec![v("2022-01-01", "01")], + vec![v("2022-01-01", "01")], + Some(d("2022-01-01")), + Some(d("2022-01-01")), + ), + ( + vec![ + v("2022-08-20", "20"), + v("2022-08-22", "22"), + v("2022-08-23", "23"), + v("2022-08-25", "25"), + ], + vec![ + v("2022-08-18", "0"), + v("2022-08-19", "0"), + v("2022-08-20", "20"), + v("2022-08-21", "0"), + v("2022-08-22", "22"), + v("2022-08-23", "23"), + v("2022-08-24", "0"), + v("2022-08-25", "25"), + v("2022-08-26", "0"), + v("2022-08-27", "0"), + ], + Some(d("2022-08-18")), + Some(d("2022-08-27")), + ), + ( + vec![ + v("2023-07-10", "10"), + v("2023-07-12", "12"), + v("2023-07-15", "12"), + ], + vec![ + v("2023-07-10", "10"), + v("2023-07-11", "0"), + v("2023-07-12", "12"), + v("2023-07-13", "0"), + v("2023-07-14", "0"), + v("2023-07-15", "12"), + ], + Some(d("2023-07-12")), + Some(d("2023-07-14")), + ), + ] { + let actual = fill_missing_points(data, MissingDatePolicy::FillZero, from, to); + assert_eq!(expected, actual) + } + } + + #[test] + fn fill_previous_works() { + for (data, expected, from, to) in [ + (vec![], vec![], None, None), + (vec![], vec![], Some(d("2100-01-01")), None), + ( + vec![], + vec![v("2100-01-01", "0")], + Some(d("2100-01-01")), + Some(d("2100-01-01")), + ), + ( + vec![v("2022-01-01", "01")], + vec![v("2022-01-01", "01")], + Some(d("2022-01-01")), + Some(d("2022-01-01")), + ), + ( + vec![ + v("2022-08-20", "20"), + v("2022-08-22", "22"), + v("2022-08-23", "23"), + v("2022-08-25", "25"), + ], + vec![ + v("2022-08-18", "0"), + v("2022-08-19", "0"), + v("2022-08-20", "20"), + v("2022-08-21", "20"), + v("2022-08-22", "22"), + v("2022-08-23", "23"), + v("2022-08-24", "23"), + v("2022-08-25", "25"), + v("2022-08-26", "25"), + v("2022-08-27", "25"), + ], + Some(d("2022-08-18")), + Some(d("2022-08-27")), + ), + ] { + let actual = fill_missing_points(data, MissingDatePolicy::FillPrevious, from, to); + assert_eq!(expected, actual); + } + } +} diff --git a/stats/stats/src/read.rs b/stats/stats/src/read.rs index 4c2b37b67..e95893e77 100644 --- a/stats/stats/src/read.rs +++ b/stats/stats/src/read.rs @@ -1,4 +1,4 @@ -use crate::charts::insert::DateValue; +use crate::{charts::insert::DateValue, missing_date::get_and_fill_chart, MissingDatePolicy}; use chrono::NaiveDate; use entity::{chart_data, charts}; use sea_orm::{ @@ -62,6 +62,7 @@ pub async fn get_chart_data( name: &str, from: Option, to: Option, + policy: Option, ) -> Result, ReadError> { let chart = charts::Entity::find() .column(charts::Column::Id) @@ -70,8 +71,11 @@ pub async fn get_chart_data( .await? .ok_or_else(|| ReadError::NotFound(name.into()))?; - let chart = get_chart(db, chart.id, from, to).await?; - Ok(chart) + let data = match policy { + Some(policy) => get_and_fill_chart(db, chart.id, from, to, policy).await?, + None => get_chart(db, chart.id, from, to).await?, + }; + Ok(data) } async fn get_chart( @@ -174,7 +178,7 @@ mod tests { let db = init_db::("get_chart_int_mock", None).await; insert_mock_data(&db).await; - let chart = get_chart_data(&db, "newBlocksPerDay", None, None) + let chart = get_chart_data(&db, "newBlocksPerDay", None, None, None) .await .unwrap(); assert_eq!( diff --git a/stats/stats/src/tests/simple_test.rs b/stats/stats/src/tests/simple_test.rs index ebb1b7187..a1ed5ddfd 100644 --- a/stats/stats/src/tests/simple_test.rs +++ b/stats/stats/src/tests/simple_test.rs @@ -1,5 +1,6 @@ use super::{init_db::init_db_all, mock_blockscout::fill_mock_blockscout_data}; -use crate::{get_chart_data, get_counters, Chart}; +use crate::{get_chart_data, get_counters, Chart, MissingDatePolicy}; +use chrono::NaiveDate; use pretty_assertions::assert_eq; use sea_orm::DatabaseConnection; @@ -10,18 +11,43 @@ pub async fn simple_test_chart(test_name: &str, chart: impl Chart, expected: Vec fill_mock_blockscout_data(&blockscout, "2023-03-01").await; chart.update(&db, &blockscout, true).await.unwrap(); - get_chart_and_assert_eq(&db, &chart, &expected).await; + get_chart_and_assert_eq(&db, &chart, &expected, None, None, None).await; chart.update(&db, &blockscout, false).await.unwrap(); - get_chart_and_assert_eq(&db, &chart, &expected).await; + get_chart_and_assert_eq(&db, &chart, &expected, None, None, None).await; +} + +pub async fn ranged_test_chart( + test_name: &str, + chart: impl Chart, + expected: Vec<(&str, &str)>, + from: NaiveDate, + to: NaiveDate, +) { + let _ = tracing_subscriber::fmt::try_init(); + let (db, blockscout) = init_db_all(test_name, None).await; + chart.create(&db).await.unwrap(); + fill_mock_blockscout_data(&blockscout, "2023-03-01").await; + let policy = chart.missing_date_policy(); + + chart.update(&db, &blockscout, true).await.unwrap(); + get_chart_and_assert_eq(&db, &chart, &expected, Some(from), Some(to), Some(policy)).await; + + chart.update(&db, &blockscout, false).await.unwrap(); + get_chart_and_assert_eq(&db, &chart, &expected, Some(from), Some(to), Some(policy)).await; } async fn get_chart_and_assert_eq( db: &DatabaseConnection, chart: &impl Chart, expected: &Vec<(&str, &str)>, + from: Option, + to: Option, + policy: Option, ) { - let data = get_chart_data(db, chart.name(), None, None).await.unwrap(); + let data = get_chart_data(db, chart.name(), from, to, policy) + .await + .unwrap(); let data: Vec<_> = data .into_iter() .map(|p| (p.date.to_string(), p.value))