From 7d5a85ee1060d86cad4a3f549e6937ac3fa0c2b9 Mon Sep 17 00:00:00 2001 From: Alex Torres Date: Wed, 30 Oct 2024 13:52:43 -0400 Subject: [PATCH] [postgres] Fix unicode decode error from Azure PostgreSQL Flexible Server (#18938) * manually decode backend_type * map backend_type to backend_type::bytea * add comments * add changelog * run lint --------- Co-authored-by: Zhengda Lu (cherry picked from commit 3d527cab62aa44313d690f64b8ee569ed1f0dc7f) --- postgres/changelog.d/18938.fixed | 1 + .../postgres/statement_samples.py | 18 +++++++++++++++--- 2 files changed, 16 insertions(+), 3 deletions(-) create mode 100644 postgres/changelog.d/18938.fixed diff --git a/postgres/changelog.d/18938.fixed b/postgres/changelog.d/18938.fixed new file mode 100644 index 0000000000000..88f3ce716e1dc --- /dev/null +++ b/postgres/changelog.d/18938.fixed @@ -0,0 +1 @@ +Fix unicode decode error from Azure PostgreSQL Flexible Server diff --git a/postgres/datadog_checks/postgres/statement_samples.py b/postgres/datadog_checks/postgres/statement_samples.py index afec8cd67806d..885ed0335c3db 100644 --- a/postgres/datadog_checks/postgres/statement_samples.py +++ b/postgres/datadog_checks/postgres/statement_samples.py @@ -81,6 +81,12 @@ "backend_type", ] +# PG_STAT_ACTIVITY_COLS_MAPPING applies additional data type casting to the columns +PG_STAT_ACTIVITY_COLS_MAPPING = { + # use the bytea type to avoid unicode decode errors on Azure PostgreSQL + 'backend_type': 'backend_type::bytea as backend_type', +} + PG_BLOCKING_PIDS_FUNC = ",pg_blocking_pids(pid) as blocking_pids" CURRENT_TIME_FUNC = "clock_timestamp() as now," @@ -240,7 +246,7 @@ def _get_active_connections(self): return [dict(row) for row in rows] @tracked_method(agent_check_getter=agent_check_getter, track_result_length=True) - def _get_new_pg_stat_activity(self, available_activity_columns): + def _get_new_pg_stat_activity(self, available_activity_columns, activity_columns_mapping): start_time = time.time() extra_filters, params = self._get_extra_filters_and_params(filter_stale_idle_conn=True) report_activity = self._report_activity_event() @@ -255,10 +261,11 @@ def _get_new_pg_stat_activity(self, available_activity_columns): blocking_func = PG_BLOCKING_PIDS_FUNC if report_activity: cur_time_func = CURRENT_TIME_FUNC + activity_columns = [activity_columns_mapping.get(col, col) for col in available_activity_columns] query = PG_STAT_ACTIVITY_QUERY.format( backend_type_predicate=backend_type_predicate, current_time_func=cur_time_func, - pg_stat_activity_cols=', '.join(available_activity_columns), + pg_stat_activity_cols=', '.join(activity_columns), pg_blocking_func=blocking_func, pg_stat_activity_view=self._config.pg_stat_activity_view, extra_filters=extra_filters, @@ -332,6 +339,11 @@ def _filter_and_normalize_statement_rows(self, rows): normalized_rows = [] for row in rows: total_count += 1 + if row.get('backend_type') is not None: + try: + row['backend_type'] = row['backend_type'].tobytes().decode('utf-8') + except UnicodeDecodeError: + row['backend_type'] = 'unknown' if (not row['datname'] or not row['query']) and row.get( 'backend_type', 'client backend' ) == 'client backend': @@ -459,7 +471,7 @@ def _collect_statement_samples(self): raw=True, ) return - rows = self._get_new_pg_stat_activity(pg_activity_cols) + rows = self._get_new_pg_stat_activity(pg_activity_cols, PG_STAT_ACTIVITY_COLS_MAPPING) rows = self._filter_and_normalize_statement_rows(rows) submitted_count = 0 if self._explain_plan_coll_enabled: