From a03779f9ae6d042f826182ff5cd2afc19d5c38fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Markus=20M=C3=BCller?= Date: Thu, 2 Feb 2023 19:44:05 +0100 Subject: [PATCH 1/7] rewrite frame merge and more logging --- iotfunctions/anomaly.py | 47 ++++++++++++++++++++--------------------- 1 file changed, 23 insertions(+), 24 deletions(-) diff --git a/iotfunctions/anomaly.py b/iotfunctions/anomaly.py index 1814c50b..32b07dd4 100644 --- a/iotfunctions/anomaly.py +++ b/iotfunctions/anomaly.py @@ -97,6 +97,10 @@ Saliency_normalizer = 1 Generalized_normalizer = 1 / 300 +# Do away with numba logs +numba_logger = logging.getLogger('numba') +numba_logger.setLevel(logging.INFO) + # from # https://stackoverflow.com/questions/44790072/sliding-window-on-time-series-data def view_as_windows1(temperature, length, step): @@ -261,27 +265,6 @@ def transform_spectral_residual(self, values): return spectral_residual -def merge_score(dfEntity, dfEntityOrig, column_name, score, mindelta): - """ - Fit interpolated score to original entity slice of the full dataframe - """ - - # equip score with time values, make sure it's positive - score[score < 0] = 0 - dfEntity[column_name] = score - - # merge - dfEntityOrig = pd.merge_asof(dfEntityOrig, dfEntity[column_name], left_index=True, right_index=True, - direction='nearest', tolerance=mindelta) - - if column_name + '_y' in dfEntityOrig: - merged_score = dfEntityOrig[column_name + '_y'].to_numpy() - else: - merged_score = dfEntityOrig[column_name].to_numpy() - - return merged_score - - ####################################################################################### # Scalers ####################################################################################### @@ -658,8 +641,24 @@ def _calc(self, df): linear_interpolate = sp.interpolate.interp1d(time_series_temperature, scores[i], kind='linear', fill_value='extrapolate') - zScoreII = merge_score(dfe, dfe_orig, output_item, - abs(linear_interpolate(np.arange(0, temperature.size, 1))), mindelta) + # stretch anomaly score to fit temperature.size + score = abs(linear_interpolate(np.arange(0, temperature.size, 1))) + + # and make sure sure it's positive + score[score < 0] = 0 + dfe[output_item] = score + + # merge so that data is stretched to match the original data w/o gaps and NaNs + dfe_orig = pd.merge_asof(dfe_orig, dfe[output_item], left_index=True, right_index=True, + direction='nearest', tolerance=mindelta) + + if output_item + '_y' in dfe_orig: + zScoreII = dfe_orig[output_item + '_y'].to_numpy() + else: + zScoreII = dfe_orig[output_item].to_numpy() + + logger.debug('Merge Score : ' + str(score.shape) + ', ' + str(zScoreII.shape)) + # fast path - either cut off or just copy elif diff < 0: zScoreII = scores[i][0:temperature.size] @@ -669,7 +668,7 @@ def _calc(self, df): # make sure shape is correct try: df[output_item] = zScoreII - except Exception as e2: + except Exception as e2: df[output_item] = zScoreII.reshape(-1,1) pass From b7edafd980c4b43850aa18d5d9ab26fcf4b3b157 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Markus=20M=C3=BCller?= Date: Fri, 3 Feb 2023 07:42:48 +0100 Subject: [PATCH 2/7] Fixed base function test --- tests/test_base_functions.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/test_base_functions.py b/tests/test_base_functions.py index fb8852c0..d875ff92 100644 --- a/tests/test_base_functions.py +++ b/tests/test_base_functions.py @@ -49,10 +49,10 @@ def test_base_functions(): df_i['Test2'] = df_i[Temperature] + addl df_i['Test3'] = df_i[Temperature] + addl df_i['Test4'] = df_i[Temperature] + addl - df_i['Test1'][3] = None - df_i['Test2'][2] = None - df_i['Test2'][3] = None - df_i['Test3'][1] = None + df_i['Test1'][3] = np.nan + df_i['Test2'][2] = np.nan + df_i['Test2'][3] = np.nan + df_i['Test3'][1] = np.nan df_i['Test4'][1] = 10000.0 df_i['Test4'][3] = 20000.0 From 8aac20ef6dc9be529724a3a19b00f1a05482ec58 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Markus=20M=C3=BCller?= Date: Fri, 3 Feb 2023 18:55:04 +0100 Subject: [PATCH 3/7] update --- iotfunctions/__init__.py | 2 +- iotfunctions/anomaly.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/iotfunctions/__init__.py b/iotfunctions/__init__.py index ca7a224e..9ffac57e 100644 --- a/iotfunctions/__init__.py +++ b/iotfunctions/__init__.py @@ -11,5 +11,5 @@ import os import pkgutil -__version__ = '8.8.0' +__version__ = '8.9.19' __all__ = list(module for (_, module, _) in pkgutil.iter_modules([os.path.dirname(__file__)])) diff --git a/iotfunctions/anomaly.py b/iotfunctions/anomaly.py index 32b07dd4..4e093f5d 100644 --- a/iotfunctions/anomaly.py +++ b/iotfunctions/anomaly.py @@ -99,7 +99,7 @@ # Do away with numba logs numba_logger = logging.getLogger('numba') -numba_logger.setLevel(logging.INFO) +numba_logger.setLevel(logging.ERROR) # from # https://stackoverflow.com/questions/44790072/sliding-window-on-time-series-data From f7bb9ceec780d5048bcd9521e02368f155145281 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Markus=20M=C3=BCller?= Date: Mon, 6 Feb 2023 14:37:08 +0100 Subject: [PATCH 4/7] update --- iotfunctions/anomaly.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/iotfunctions/anomaly.py b/iotfunctions/anomaly.py index 4e093f5d..4b5394a9 100644 --- a/iotfunctions/anomaly.py +++ b/iotfunctions/anomaly.py @@ -18,6 +18,7 @@ import logging import time import hashlib # encode feature names +import traceback import numpy as np import pandas as pd @@ -636,19 +637,26 @@ def _calc(self, df): # slow path - interpolate result score to stretch it to the size of the input data if diff > 0: dfe[output_item] = 0.0006 + logger.debug('HERE 1') time_series_temperature = np.linspace(self.windowsize // 2, temperature.size - self.windowsize // 2 + 1, temperature.size - diff) + logger.debug('HERE 2') linear_interpolate = sp.interpolate.interp1d(time_series_temperature, scores[i], kind='linear', fill_value='extrapolate') + logger.debug('HERE 3') # stretch anomaly score to fit temperature.size score = abs(linear_interpolate(np.arange(0, temperature.size, 1))) # and make sure sure it's positive + logger.debug('HERE 4 -> ' + str(dfe[output_item].values.shape)) score[score < 0] = 0 + + logger.debug('HERE 5') dfe[output_item] = score # merge so that data is stretched to match the original data w/o gaps and NaNs + logger.debug('HERE 6') dfe_orig = pd.merge_asof(dfe_orig, dfe[output_item], left_index=True, right_index=True, direction='nearest', tolerance=mindelta) @@ -673,7 +681,7 @@ def _calc(self, df): pass except Exception as e: - logger.error(self.whoami + ' score integration failed with ' + str(e)) + logger.error(self.whoami + ' score integration failed with ' + str(e) + '\n' + traceback.format_exc()) logger.debug('--->') From 8a022e67ea19bc0f6fd26a5afcff1ca0c7cc5a76 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Markus=20M=C3=BCller?= Date: Mon, 6 Feb 2023 18:50:38 +0100 Subject: [PATCH 5/7] update --- iotfunctions/anomaly.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/iotfunctions/anomaly.py b/iotfunctions/anomaly.py index 4b5394a9..7d9e16de 100644 --- a/iotfunctions/anomaly.py +++ b/iotfunctions/anomaly.py @@ -595,10 +595,13 @@ def _calc(self, df): # remove all rows with only null entries dfe = dfe_orig.dropna(how='all') + logger.info('Anomaly ' + str(df[self.output_items[0]].values.shape) + ', ' + # minimal time delta for merging mindelta, dfe_orig = min_delta(dfe_orig) + logger.info('Anomaly II ' + str(dfe_orig[self.output_items[0]].values.shape)) + logger.debug('Timedelta:' + str(mindelta) + ' Index: ' + str(dfe_orig.index)) # one dimensional time series - named temperature for catchyness @@ -637,26 +640,20 @@ def _calc(self, df): # slow path - interpolate result score to stretch it to the size of the input data if diff > 0: dfe[output_item] = 0.0006 - logger.debug('HERE 1') time_series_temperature = np.linspace(self.windowsize // 2, temperature.size - self.windowsize // 2 + 1, temperature.size - diff) - logger.debug('HERE 2') linear_interpolate = sp.interpolate.interp1d(time_series_temperature, scores[i], kind='linear', fill_value='extrapolate') - logger.debug('HERE 3') # stretch anomaly score to fit temperature.size score = abs(linear_interpolate(np.arange(0, temperature.size, 1))) # and make sure sure it's positive - logger.debug('HERE 4 -> ' + str(dfe[output_item].values.shape)) score[score < 0] = 0 - logger.debug('HERE 5') dfe[output_item] = score # merge so that data is stretched to match the original data w/o gaps and NaNs - logger.debug('HERE 6') dfe_orig = pd.merge_asof(dfe_orig, dfe[output_item], left_index=True, right_index=True, direction='nearest', tolerance=mindelta) From 61693033e9b72cd522a55898b0e3b64c12b0e59c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Markus=20M=C3=BCller?= Date: Mon, 6 Feb 2023 19:42:04 +0100 Subject: [PATCH 6/7] update --- iotfunctions/anomaly.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/iotfunctions/anomaly.py b/iotfunctions/anomaly.py index 7d9e16de..0607dbbc 100644 --- a/iotfunctions/anomaly.py +++ b/iotfunctions/anomaly.py @@ -596,6 +596,8 @@ def _calc(self, df): # remove all rows with only null entries dfe = dfe_orig.dropna(how='all') logger.info('Anomaly ' + str(df[self.output_items[0]].values.shape) + ', ' + + str(dfe_orig[self.output_items[0]].values.shape) + ', ' + + str(dfe[self.output_items[0]].values.shape)) # minimal time delta for merging mindelta, dfe_orig = min_delta(dfe_orig) From d1886a93643096f898297af7cec98860549d1d92 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Markus=20M=C3=BCller?= Date: Mon, 6 Feb 2023 20:51:45 +0100 Subject: [PATCH 7/7] update --- iotfunctions/anomaly.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/iotfunctions/anomaly.py b/iotfunctions/anomaly.py index 0607dbbc..fd9d1e84 100644 --- a/iotfunctions/anomaly.py +++ b/iotfunctions/anomaly.py @@ -547,7 +547,8 @@ def prepare_data(self, dfEntity): # interpolate gaps - data imputation try: - dfe = dfe.dropna(subset=[self.input_item]).interpolate(method="time") + #dfe = dfe.dropna(subset=[self.input_item]).interpolate(method="time") + dfe = dfe.interpolate(method="time") except Exception as e: logger.error('Prepare data error: ' + str(e))