From 97755151afe293ce1eb38f806693492cc244b01f Mon Sep 17 00:00:00 2001 From: Calum Date: Thu, 25 Jul 2024 15:17:06 +1200 Subject: [PATCH] Remove daylong --- eqcorrscan/core/match_filter/family.py | 6 +- .../core/match_filter/helpers/processes.py | 8 +- eqcorrscan/core/match_filter/helpers/tribe.py | 3 - .../core/match_filter/matched_filter.py | 2 +- eqcorrscan/core/match_filter/party.py | 10 +- eqcorrscan/core/match_filter/template.py | 15 +- eqcorrscan/core/match_filter/tribe.py | 51 +++--- eqcorrscan/core/template_gen.py | 26 +-- .../tests/matched_filter/helper_test.py | 2 - .../tests/matched_filter/match_filter_test.py | 34 ++-- eqcorrscan/tests/pre_processing_test.py | 80 ++------- eqcorrscan/utils/pre_processing.py | 154 ++++-------------- 12 files changed, 115 insertions(+), 276 deletions(-) diff --git a/eqcorrscan/core/match_filter/family.py b/eqcorrscan/core/match_filter/family.py index 729a01355..1660aefa5 100644 --- a/eqcorrscan/core/match_filter/family.py +++ b/eqcorrscan/core/match_filter/family.py @@ -565,8 +565,9 @@ def lag_calc(self, stream, pre_processed, shift_len=0.2, min_cc=0.4, `cores`). :type ignore_length: bool :param ignore_length: - If using daylong=True, then dayproc will try check that the data - are there for at least 80% of the day, if you don't want this check + Processing functions will check that the data are there for at + least 80% of the required length and raise an error if not. + If you don't want this check (which will raise an error if too much data are missing) then set ignore_length=True. This is not recommended! :type ignore_bad_data: bool @@ -770,7 +771,6 @@ def _process_streams(self, stream, pre_processed, process_cores=1, parallel=parallel, cores=process_cores, stream=template_stream.merge().copy(), - daylong=False, ignore_length=ignore_length, overlap=0.0, ignore_bad_data=ignore_bad_data) processed_stream = Stream() diff --git a/eqcorrscan/core/match_filter/helpers/processes.py b/eqcorrscan/core/match_filter/helpers/processes.py index c0da88235..781e172da 100644 --- a/eqcorrscan/core/match_filter/helpers/processes.py +++ b/eqcorrscan/core/match_filter/helpers/processes.py @@ -177,7 +177,6 @@ def _get_detection_stream( pre_process: bool = False, parallel_process: bool = True, process_cores: int = None, - daylong: bool = False, overlap: Union[str, float] = "calculate", ignore_length: bool = False, ignore_bad_data: bool = False, @@ -232,7 +231,6 @@ def _get_detection_stream( Whether to process data in parallel (uses multi-threading) :param process_cores: Maximum number of cores to use for parallel processing - :param daylong: See utils.pre_processing.multi_process :param overlap: See core.match_filter.tribe.detect :param ignore_length: See utils.pre_processing.multi_process :param ignore_bad_data: See utils.pre_processing.multi_process @@ -289,7 +287,7 @@ def _get_detection_stream( lowcut=lowcut, samp_rate=samp_rate, process_length=process_length, parallel=parallel_process, cores=process_cores, - daylong=daylong, ignore_length=ignore_length, + ignore_length=ignore_length, overlap=overlap, ignore_bad_data=ignore_bad_data) # We don't need to hold on to st! del st @@ -341,7 +339,6 @@ def _pre_processor( process_length: float, parallel: bool, cores: int, - daylong: bool, ignore_length: bool, overlap: float, ignore_bad_data: bool, @@ -373,7 +370,6 @@ def _pre_processor( :param process_length: See utils.pre_processing.multi_process :param parallel: See utils.pre_processing.multi_process :param cores: See utils.pre_processing.multi_process - :param daylong: See utils.pre_processing.multi_process :param ignore_length: See utils.pre_processing.multi_process :param overlap: See core.match_filter.tribe.detect :param ignore_bad_data: See utils.pre_processing.multi_process @@ -406,7 +402,7 @@ def _pre_processor( try: st_chunks = _pre_process( st, template_ids, pre_processed, filt_order, highcut, lowcut, - samp_rate, process_length, parallel, cores, daylong, + samp_rate, process_length, parallel, cores, ignore_length, ignore_bad_data, overlap) for chunk in st_chunks: if not os.path.isdir(temp_stream_dir): diff --git a/eqcorrscan/core/match_filter/helpers/tribe.py b/eqcorrscan/core/match_filter/helpers/tribe.py index c54c403ba..da493b329 100644 --- a/eqcorrscan/core/match_filter/helpers/tribe.py +++ b/eqcorrscan/core/match_filter/helpers/tribe.py @@ -198,7 +198,6 @@ def _pre_process( process_length: float, parallel: bool, cores: int, - daylong: bool, ignore_length: bool, ignore_bad_data: bool, overlap: float, **kwargs @@ -218,7 +217,6 @@ def _pre_process( :param process_length: See utils.pre_processing.multi_process :param parallel: See utils.pre_processing.multi_process :param cores: See utils.pre_processing.multi_process - :param daylong: See utils.pre_processing.multi_process :param ignore_length: See utils.pre_processing.multi_process :param overlap: See core.match_filter.tribe.detect :param ignore_bad_data: See utils.pre_processing.multi_process @@ -247,7 +245,6 @@ def _pre_process( parallel=parallel, cores=cores, stream=st, - daylong=daylong, ignore_length=ignore_length, overlap=overlap, ignore_bad_data=ignore_bad_data) diff --git a/eqcorrscan/core/match_filter/matched_filter.py b/eqcorrscan/core/match_filter/matched_filter.py index b40d2fa63..23fa42d6c 100644 --- a/eqcorrscan/core/match_filter/matched_filter.py +++ b/eqcorrscan/core/match_filter/matched_filter.py @@ -313,7 +313,7 @@ def match_filter(template_names, template_list, st, threshold, # Data must be pre-processed party = tribe.detect( stream=st, threshold=threshold, threshold_type=threshold_type, - trig_int=trig_int, plot=plot, plotdir=plotdir, daylong=False, + trig_int=trig_int, plot=plot, plotdir=plotdir, parallel_process=False, xcorr_func=xcorr_func, concurrency=concurrency, cores=cores, ignore_length=True, ignore_bad_data=True, group_size=None, overlap="calculate", full_peaks=full_peaks, save_progress=False, diff --git a/eqcorrscan/core/match_filter/party.py b/eqcorrscan/core/match_filter/party.py index fafec7f51..dd59c7f7f 100644 --- a/eqcorrscan/core/match_filter/party.py +++ b/eqcorrscan/core/match_filter/party.py @@ -20,6 +20,7 @@ import tempfile import logging from os.path import join +import warnings import numpy as np from obspy import Catalog, read_events, Stream @@ -927,8 +928,9 @@ def lag_calc(self, stream, pre_processed, shift_len=0.2, min_cc=0.4, `cores`). :type ignore_length: bool :param ignore_length: - If using daylong=True, then dayproc will try check that the data - are there for at least 80% of the day, if you don't want this check + Processing functions will check that the data are there for at + least 80% of the required length and raise an error if not. + If you don't want this check (which will raise an error if too much data are missing) then set ignore_length=True. This is not recommended! :type ignore_bad_data: bool @@ -961,6 +963,10 @@ def lag_calc(self, stream, pre_processed, shift_len=0.2, min_cc=0.4, .. Note:: Picks are corrected for the template pre-pick time. """ + # Cope with daylong deprecation + daylong = kwargs.pop("daylong", None) + if daylong: + warnings.warn("daylong argument deprecated - will be ignored") process_cores = process_cores or cores template_groups = group_templates( [_f.template for _f in self.families diff --git a/eqcorrscan/core/match_filter/template.py b/eqcorrscan/core/match_filter/template.py index 6bdf5c4e3..eccf9b1ca 100644 --- a/eqcorrscan/core/match_filter/template.py +++ b/eqcorrscan/core/match_filter/template.py @@ -393,7 +393,7 @@ def read(self, filename): return self def detect(self, stream, threshold, threshold_type, trig_int, - plot=False, plotdir=None, pre_processed=False, daylong=False, + plot=False, plotdir=None, pre_processed=False, parallel_process=True, xcorr_func=None, concurrency=None, cores=None, ignore_length=False, overlap="calculate", full_peaks=False, **kwargs): @@ -428,12 +428,6 @@ def detect(self, stream, threshold, threshold_type, trig_int, Defaults to False, which will use the :mod:`eqcorrscan.utils.pre_processing` routines to resample and filter the continuous data. - :type daylong: bool - :param daylong: - Set to True to use the - :func:`eqcorrscan.utils.pre_processing.dayproc` routine, which - preforms additional checks and is more efficient for day-long data - over other methods. :type parallel_process: bool :param parallel_process: :type xcorr_func: str or callable @@ -450,8 +444,9 @@ def detect(self, stream, threshold, threshold_type, trig_int, :param cores: Number of workers for processing and detection. :type ignore_length: bool :param ignore_length: - If using daylong=True, then dayproc will try check that the data - are there for at least 80% of the day, if you don't want this check + Processing functions will check that the data are there for at + least 80% of the required length and raise an error if not. + If you don't want this check (which will raise an error if too much data are missing) then set ignore_length=True. This is not recommended! :type overlap: float @@ -537,7 +532,7 @@ def detect(self, stream, threshold, threshold_type, trig_int, party = Tribe(templates=[self]).detect( stream=stream, threshold=threshold, threshold_type=threshold_type, trig_int=trig_int, plotdir=plotdir, - plot=plot, pre_processed=pre_processed, daylong=daylong, + plot=plot, pre_processed=pre_processed, parallel_process=parallel_process, xcorr_func=xcorr_func, concurrency=concurrency, cores=cores, ignore_length=ignore_length, overlap=overlap, full_peaks=full_peaks, **kwargs) diff --git a/eqcorrscan/core/match_filter/tribe.py b/eqcorrscan/core/match_filter/tribe.py index d7db6b1ba..9a7717ee9 100644 --- a/eqcorrscan/core/match_filter/tribe.py +++ b/eqcorrscan/core/match_filter/tribe.py @@ -22,6 +22,7 @@ import traceback import uuid import logging +import warnings from multiprocessing import Process, Queue, cpu_count from queue import Empty @@ -652,7 +653,7 @@ def cluster(self, method, **kwargs): return tribes def detect(self, stream, threshold, threshold_type, trig_int, plot=False, - plotdir=None, daylong=False, parallel_process=True, + plotdir=None, parallel_process=True, xcorr_func=None, concurrency=None, cores=None, concurrent_processing=False, ignore_length=False, ignore_bad_data=False, group_size=None, overlap="calculate", @@ -685,12 +686,6 @@ def detect(self, stream, threshold, threshold_type, trig_int, plot=False, :param plotdir: The path to save plots to. If `plotdir=None` (default) then the figure will be shown on screen. - :type daylong: bool - :param daylong: - Set to True to use the - :func:`eqcorrscan.utils.pre_processing.dayproc` routine, which - preforms additional checks and is more efficient for day-long data - over other methods. :type parallel_process: bool :param parallel_process: :type xcorr_func: str or callable @@ -712,8 +707,9 @@ def detect(self, stream, threshold, threshold_type, trig_int, plot=False, benchmarking. :type ignore_length: bool :param ignore_length: - If using daylong=True, then dayproc will try check that the data - are there for at least 80% of the day, if you don't want this check + Processing functions will check that the data are there for at + least 80% of the required length and raise an error if the data + are not long enough. if you don't want this check (which will raise an error if too much data are missing) then set ignore_length=True. This is not recommended! :type ignore_bad_data: bool @@ -832,6 +828,11 @@ def detect(self, stream, threshold, threshold_type, trig_int, plot=False, # We should not need to copy the stream, it is copied in chunks by # _group_process + # Cope with daylong deprecation + daylong = kwargs.pop("daylong", None) + if daylong: + warnings.warn("daylong argument deprecated - will be ignored") + # Argument handling if overlap is None: overlap = 0.0 @@ -871,7 +872,7 @@ def detect(self, stream, threshold, threshold_type, trig_int, plot=False, tr.id for template in self.templates for tr in template.st) args = (stream, template_ids, pre_processed, parallel_process, - process_cores, daylong, ignore_length, overlap, + process_cores, ignore_length, overlap, ignore_bad_data, group_size, groups, sampling_rate, threshold, threshold_type, save_progress, xcorr_func, concurrency, cores, export_cccsums, parallel, peak_cores, trig_int, full_peaks, @@ -899,7 +900,7 @@ def detect(self, stream, threshold, threshold_type, trig_int, plot=False, def _detect_serial( self, stream, template_ids, pre_processed, parallel_process, - process_cores, daylong, ignore_length, overlap, ignore_bad_data, + process_cores, ignore_length, overlap, ignore_bad_data, group_size, groups, sampling_rate, threshold, threshold_type, save_progress, xcorr_func, concurrency, cores, export_cccsums, parallel, peak_cores, trig_int, full_peaks, plot, plotdir, plot_format, @@ -923,7 +924,7 @@ def _detect_serial( lowcut=self.templates[0].lowcut, samp_rate=self.templates[0].samp_rate, process_length=self.templates[0].process_length, - parallel=parallel_process, cores=process_cores, daylong=daylong, + parallel=parallel_process, cores=process_cores, ignore_length=ignore_length, ignore_bad_data=ignore_bad_data, overlap=overlap, **kwargs) @@ -990,7 +991,7 @@ def _detect_serial( def _detect_concurrent( self, stream, template_ids, pre_processed, parallel_process, - process_cores, daylong, ignore_length, overlap, ignore_bad_data, + process_cores, ignore_length, overlap, ignore_bad_data, group_size, groups, sampling_rate, threshold, threshold_type, save_progress, xcorr_func, concurrency, cores, export_cccsums, parallel, peak_cores, trig_int, full_peaks, plot, plotdir, plot_format, @@ -1050,7 +1051,6 @@ def _detect_concurrent( process_length=self.templates[0].process_length, parallel=parallel_process, cores=process_cores, - daylong=daylong, ignore_length=ignore_length, overlap=overlap, ignore_bad_data=ignore_bad_data, @@ -1228,7 +1228,7 @@ def _detect_concurrent( def client_detect(self, client, starttime, endtime, threshold, threshold_type, trig_int, plot=False, plotdir=None, - min_gap=None, daylong=False, parallel_process=True, + min_gap=None, parallel_process=True, xcorr_func=None, concurrency=None, cores=None, concurrent_processing=False, ignore_length=False, ignore_bad_data=False, group_size=None, @@ -1269,12 +1269,6 @@ def client_detect(self, client, starttime, endtime, threshold, :param min_gap: Minimum gap allowed in data - use to remove traces with known issues - :type daylong: bool - :param daylong: - Set to True to use the - :func:`eqcorrscan.utils.pre_processing.dayproc` routine, which - preforms additional checks and is more efficient for day-long data - over other methods. :type parallel_process: bool :param parallel_process: :type xcorr_func: str or callable @@ -1296,8 +1290,9 @@ def client_detect(self, client, starttime, endtime, threshold, benchmarking. :type ignore_length: bool :param ignore_length: - If using daylong=True, then dayproc will try check that the data - are there for at least 80% of the day, if you don't want this check + Processing functions will check that the data are there for at + least 80% of the required length and raise an error if not. + If you don't want this check (which will raise an error if too much data are missing) then set ignore_length=True. This is not recommended! :type ignore_bad_data: bool @@ -1389,6 +1384,11 @@ def client_detect(self, client, starttime, endtime, threshold, from eqcorrscan.core.match_filter.helpers.processes import ( _get_detection_stream) + # Cope with daylong deprecation + daylong = kwargs.pop("daylong", None) + if daylong: + warnings.warn("daylong argument deprecated - will be ignored") + # This uses get_waveforms_bulk to get data - not all client types have # this, so we check and monkey patch here. if not hasattr(client, "get_waveforms_bulk"): @@ -1433,7 +1433,7 @@ def client_detect(self, client, starttime, endtime, threshold, detector_kwargs = dict( threshold=threshold, threshold_type=threshold_type, trig_int=trig_int, plot=plot, plotdir=plotdir, - daylong=daylong, parallel_process=parallel_process, + parallel_process=parallel_process, xcorr_func=xcorr_func, concurrency=concurrency, cores=cores, ignore_length=ignore_length, ignore_bad_data=ignore_bad_data, group_size=group_size, overlap=None, full_peaks=full_peaks, @@ -1442,6 +1442,7 @@ def client_detect(self, client, starttime, endtime, threshold, poison_queue=poison_queue, shutdown=False, concurrent_processing=concurrent_processing, groups=groups, make_events=make_events, min_stations=min_stations) + detector_kwargs.update(kwargs) if not concurrent_processing: Logger.warning("Using concurrent_processing=True can be faster if" @@ -1486,7 +1487,7 @@ def client_detect(self, client, starttime, endtime, threshold, temp_stream_dir=self._stream_dir, full_stream_dir=full_stream_dir, pre_process=True, parallel_process=parallel_process, - process_cores=process_cores, daylong=daylong, + process_cores=process_cores, overlap=0.0, ignore_length=ignore_length, ignore_bad_data=ignore_bad_data, filt_order=self.templates[0].filt_order, diff --git a/eqcorrscan/core/template_gen.py b/eqcorrscan/core/template_gen.py index dc7007f33..7c0d08609 100644 --- a/eqcorrscan/core/template_gen.py +++ b/eqcorrscan/core/template_gen.py @@ -246,6 +246,12 @@ def template_gen(method, lowcut, highcut, samp_rate, filt_order, >>> print(len(templates[0])) 15 """ + # Cope with daylong deprecation + daylong = kwargs.pop("daylong", None) + if daylong: + warnings.warn( + "daylong argument deprecated - process-len will be set to 86400") + process_len = 86400.0 client_map = {'from_client': 'fdsn'} assert method in ('from_client', 'from_meta_file', 'from_sac') if not isinstance(swin, list): @@ -319,22 +325,6 @@ def template_gen(method, lowcut, highcut, samp_rate, filt_order, Logger.info("No data") continue if process: - data_len = max([len(tr.data) / tr.stats.sampling_rate - for tr in st]) - if 80000 < data_len < 90000: - daylong = True - starttime = min([tr.stats.starttime for tr in st]) - min_delta = min([tr.stats.delta for tr in st]) - # Cope with the common starttime less than 1 sample before the - # start of day. - if (starttime + min_delta).date > starttime.date: - starttime = (starttime + min_delta) - # Check if this is stupid: - if abs(starttime - UTCDateTime(starttime.date)) > 600: - daylong = False - starttime = starttime.date - else: - daylong = False # Check if the required amount of data have been downloaded - skip # channels if arg set. for tr in st: @@ -356,9 +346,7 @@ def template_gen(method, lowcut, highcut, samp_rate, filt_order, kwargs = dict( st=st, lowcut=lowcut, highcut=highcut, filt_order=filt_order, samp_rate=samp_rate, - parallel=parallel, num_cores=num_cores, daylong=daylong) - if daylong: - kwargs.update(dict(starttime=UTCDateTime(starttime))) + parallel=parallel, num_cores=num_cores) st = pre_processing.multi_process(**kwargs) data_start = min([tr.stats.starttime for tr in st]) data_end = max([tr.stats.endtime for tr in st]) diff --git a/eqcorrscan/tests/matched_filter/helper_test.py b/eqcorrscan/tests/matched_filter/helper_test.py index 610801064..4e5cd6781 100644 --- a/eqcorrscan/tests/matched_filter/helper_test.py +++ b/eqcorrscan/tests/matched_filter/helper_test.py @@ -243,7 +243,6 @@ def setUpClass(cls): process_length=process_length, parallel=False, cores=1, - daylong=False, overlap=3.0, ) @@ -293,7 +292,6 @@ def setUpClass(cls): pre_process=False, parallel_process=True, process_cores=1, - daylong=False, overlap=3.0, ignore_length=False, ignore_bad_data=False, diff --git a/eqcorrscan/tests/matched_filter/match_filter_test.py b/eqcorrscan/tests/matched_filter/match_filter_test.py index 76180f897..fd9cb3e1d 100644 --- a/eqcorrscan/tests/matched_filter/match_filter_test.py +++ b/eqcorrscan/tests/matched_filter/match_filter_test.py @@ -304,7 +304,7 @@ def test_geonet_tribe_detect(self): party = self.tribe.copy().client_detect( client=client, starttime=self.t1, endtime=self.t2, threshold=8.0, threshold_type='MAD', trig_int=6.0, - daylong=False, plot=False, concurrent_processing=conc_proc) + plot=False, concurrent_processing=conc_proc) self.assertEqual(len(party), 16) @@ -722,7 +722,7 @@ def test_tribe_detect(self): for conc_proc in [True, False]: party = self.tribe.detect( stream=self.unproc_st, threshold=8.0, threshold_type='MAD', - trig_int=6.0, daylong=False, plot=False, + trig_int=6.0, plot=False, parallel_process=False, concurrent_processing=conc_proc) self.assertEqual(len(party), 4) compare_families( @@ -740,7 +740,7 @@ def test_min_stations(self): for conc_proc in [False, True]: party = local_tribe.detect( stream=self.unproc_st, threshold=8.0, threshold_type='MAD', - trig_int=6.0, daylong=False, plot=False, min_stations=5, + trig_int=6.0, plot=False, min_stations=5, parallel_process=False, concurrent_processing=conc_proc) self.assertEqual(len(party), 3) @@ -758,7 +758,7 @@ def test_min_stations_network(self): client=Client('NCEDC'), starttime=UTCDateTime(2004, 9, 28, 17), endtime=UTCDateTime(2004, 9, 28, 18), threshold=8.0, threshold_type='MAD', - trig_int=6.0, daylong=False, plot=False, min_stations=5, + trig_int=6.0, plot=False, min_stations=5, parallel_process=False, concurrent_processing=conc_proc) self.assertEqual(len(party), 3) @@ -771,7 +771,7 @@ def test_no_stations(self): for conc_proc in [False, True]: party = local_tribe.detect( stream=self.unproc_st, threshold=8.0, threshold_type='MAD', - trig_int=6.0, daylong=False, plot=False, min_stations=6, + trig_int=6.0, plot=False, min_stations=6, parallel_process=False, concurrent_processing=conc_proc) self.assertEqual(len(party), 0) @@ -792,7 +792,7 @@ def test_tribe_detect_with_empty_streams(self): # run detection with 2 templates in tribe party1 = tribe1.detect( stream=st, threshold=8.0, threshold_type='MAD', - trig_int=6.0, daylong=False, plotvar=False, + trig_int=6.0, plotvar=False, parallel_process=False, concurrent_processing=conc_proc) self.assertEqual(len(party1), 2) party1 = Party([f for f in party1 @@ -802,7 +802,7 @@ def test_tribe_detect_with_empty_streams(self): if t.name == '2004_09_28t17_19_25']) party2 = tribe2.detect( stream=st, threshold=8.0, threshold_type='MAD', - trig_int=6.0, daylong=False, plotvar=False, + trig_int=6.0, plotvar=False, parallel_process=False, concurrent_processing=conc_proc) self.assertEqual(len(party2), 1) # This should fail in v0.4.2 @@ -819,7 +819,7 @@ def test_tribe_detect_short_data(self): template.process_length = 2400 party = tribe.detect( stream=short_st, threshold=8.0, threshold_type='MAD', - trig_int=6.0, daylong=False, plot=False, + trig_int=6.0, plot=False, parallel_process=False, concurrent_processing=conc_proc, ignore_bad_data=True) self.assertEqual(len(party), 4) @@ -830,7 +830,7 @@ def test_tribe_detect_parallel_process(self): for conc_proc in [True, False]: party = self.tribe.detect( stream=self.unproc_st, threshold=8.0, threshold_type='MAD', - trig_int=6.0, daylong=False, plot=False, parallel_process=True, + trig_int=6.0, plot=False, parallel_process=True, process_cores=2, concurrent_processing=conc_proc) self.assertEqual(len(party), 4) compare_families( @@ -842,7 +842,7 @@ def test_tribe_detect_save_progress(self): for conc_proc in [True, False]: party = self.tribe.detect( stream=self.unproc_st, threshold=8.0, threshold_type='MAD', - trig_int=6.0, daylong=False, plot=False, + trig_int=6.0, plot=False, parallel_process=False, save_progress=True, concurrent_processing=conc_proc) self.assertEqual(len(party), 4) @@ -866,7 +866,7 @@ def test_tribe_detect_masked_data(self): stream[0].stats.starttime + 1900, stream[0].stats.endtime)) party = self.tribe.detect( stream=stream, threshold=8.0, threshold_type='MAD', - trig_int=6.0, daylong=False, plot=False, + trig_int=6.0, plot=False, parallel_process=False, xcorr_func='fftw', concurrency='concurrent', concurrent_processing=conc_proc) self.assertEqual(len(party), 4) @@ -880,7 +880,7 @@ def test_tribe_detect_no_processing(self): template.highcut = None party = tribe.detect( stream=self.st, threshold=8.0, threshold_type='MAD', - trig_int=6.0, daylong=False, plot=False, + trig_int=6.0, plot=False, parallel_process=False, concurrent_processing=conc_proc) self.assertEqual(len(party), 4) compare_families( @@ -896,7 +896,7 @@ def test_client_detect(self): party = self.tribe.copy().client_detect( client=client, starttime=self.t1 + 2.75, endtime=self.t2, threshold=8.0, threshold_type='MAD', trig_int=6.0, - daylong=False, plot=False, concurrent_processing=conc_proc) + plot=False, concurrent_processing=conc_proc) compare_families( party=party, party_in=self.party, float_tol=0.05, check_event=False) @@ -910,7 +910,7 @@ def test_client_detect_save_progress(self): party = self.tribe.copy().client_detect( client=client, starttime=self.t1 + 2.75, endtime=self.t2, threshold=8.0, threshold_type='MAD', trig_int=6.0, - daylong=False, plot=False, save_progress=True, + plot=False, save_progress=True, concurrent_processing=conc_proc) self.assertTrue(os.path.isdir(".parties")) @@ -931,7 +931,7 @@ def test_party_lag_calc(self): for conc_proc in [True, False]: chained_cat = self.tribe.detect( stream=self.unproc_st, threshold=8.0, threshold_type='MAD', - trig_int=6.0, daylong=False, plot=False, + trig_int=6.0, plot=False, concurrent_processing=conc_proc).lag_calc( stream=self.unproc_st, pre_processed=False) catalog = self.party.copy().lag_calc( @@ -1083,7 +1083,7 @@ def test_day_long_methods(self): Logger.info(f"Running conc_proc={conc_proc}") day_party = daylong_tribe.detect( stream=st, threshold=8.0, threshold_type='MAD', trig_int=6.0, - daylong=True, plot=False, parallel_process=False, + plot=False, parallel_process=False, concurrent_processing=conc_proc) self.assertEqual(len(day_party), 4) day_catalog = day_party.lag_calc(stream=st, pre_processed=False, @@ -1104,7 +1104,7 @@ def test_template_detect(self): for conc_proc in [True, False]: party_t = test_template.detect( stream=self.unproc_st, threshold=8.0, threshold_type='MAD', - trig_int=6.0, daylong=False, plot=False, overlap=None, + trig_int=6.0, plot=False, overlap=None, concurrent_processing=conc_proc) self.assertEqual(len(party_t), 1) diff --git a/eqcorrscan/tests/pre_processing_test.py b/eqcorrscan/tests/pre_processing_test.py index 7b842d0cf..344694439 100644 --- a/eqcorrscan/tests/pre_processing_test.py +++ b/eqcorrscan/tests/pre_processing_test.py @@ -232,73 +232,19 @@ def test_parallel_core_unset(self): self.assertEqual(self.instart, tr.stats.starttime) self.assertEqual(self.inend, tr.stats.endtime) - def test_dayproc(self): - """Test a straight-forward day processing implementation.""" - processed = multi_process( - st=self.st.copy(), lowcut=0.1, highcut=0.4, filt_order=3, - samp_rate=1, starttime=self.day_start, parallel=True, - num_cores=2, daylong=True) - self.assertEqual(len(processed), self.nchans) - for tr in processed: - self.assertEqual(UTCDateTime(self.day_start), tr.stats.starttime) - self.assertEqual(tr.stats.npts, 86400) - - def test_dayproc_trace(self): - """ - Test a straight-forward day processing implementation with a Trace. - """ - processed = multi_process( - st=self.st[0].copy(), lowcut=0.1, highcut=0.4, filt_order=3, - samp_rate=1, starttime=self.day_start, parallel=True, - num_cores=2, daylong=True) - self.assertTrue(isinstance(processed, Trace)) - self.assertEqual(UTCDateTime(self.day_start), - processed.stats.starttime) - self.assertEqual(processed.stats.npts, 86400) - - def test_dayproc_nyquist_error(self): - """Test a failing day processing.""" - with self.assertRaises(IOError): - multi_process( - st=self.st.copy(), lowcut=0.1, highcut=0.6, filt_order=3, - samp_rate=1, starttime=self.day_start, - parallel=True, num_cores=2, daylong=True) - - def test_dayproc_serial(self): - """Test the serial implementation of dayproc.""" - processed = multi_process( - st=self.st.copy(), lowcut=0.1, highcut=0.4, filt_order=3, - samp_rate=1, starttime=self.day_start, parallel=False, - num_cores=2, daylong=True) - self.assertEqual(len(processed), self.nchans) - for tr in processed: - self.assertEqual(UTCDateTime(self.day_start), tr.stats.starttime) - self.assertEqual(tr.stats.npts, 86400) - - def test_dayproc_parallel_cores_unset(self): - """Test a straight-forward day processing implementation.""" - processed = multi_process( - st=self.st.copy(), lowcut=0.1, highcut=0.4, filt_order=3, - samp_rate=1, starttime=self.day_start, parallel=True, - num_cores=False, daylong=True) - self.assertEqual(len(processed), self.nchans) - for tr in processed: - self.assertEqual(UTCDateTime(self.day_start), tr.stats.starttime) - self.assertEqual(tr.stats.npts, 86400) - def test_process(self): """Test a basic process implementation.""" processed = multi_process( st=self.st[0].copy(), lowcut=0.1, highcut=0.4, filt_order=3, samp_rate=1, starttime=False, - daylong=True, seisan_chan_names=True, ignore_length=False) + seisan_chan_names=True, ignore_length=False) self.assertEqual(processed.stats.npts, 86400) def test_process_datetime(self): """Test a basic process implementation.""" processed = multi_process( st=self.st[0].copy(), lowcut=0.1, highcut=0.4, filt_order=3, - samp_rate=1, starttime=self.day_start, daylong=True, + samp_rate=1, starttime=self.day_start, seisan_chan_names=True, ignore_length=False) self.assertEqual(processed.stats.npts, 86400) @@ -307,7 +253,7 @@ def test_process_nyquist_fail(self): with self.assertRaises(IOError): multi_process( st=self.st[0].copy(), lowcut=0.1, highcut=0.6, - filt_order=3, samp_rate=1, starttime=False, daylong=True, + filt_order=3, samp_rate=1, starttime=False, seisan_chan_names=True, ignore_length=False) def test_process_bad_data(self): @@ -320,7 +266,7 @@ def test_process_bad_data(self): with self.assertRaises(ValueError): multi_process( st=not_daylong, lowcut=0.1, highcut=0.4, - filt_order=3, samp_rate=1, starttime=False, daylong=True, + filt_order=3, samp_rate=1, starttime=False, seisan_chan_names=True, ignore_length=False) def test_short_data_fail(self): @@ -330,7 +276,8 @@ def test_short_data_fail(self): st=self.st[0].copy().trim( endtime=self.st[0].stats.endtime - 18000), lowcut=0.1, highcut=0.4, filt_order=3, samp_rate=1, - starttime=self.day_start, daylong=True, + starttime=self.day_start, + endtime=UTCDateTime(self.day_start) + 86400., seisan_chan_names=True, ignore_length=False) def test_short_data_pass(self): @@ -339,7 +286,8 @@ def test_short_data_pass(self): st=self.st[0].copy().trim(endtime=self. st[0].stats.endtime - 18000), lowcut=0.1, highcut=0.4, filt_order=3, samp_rate=1, - starttime=self.day_start, daylong=True, + starttime=self.day_start, + endtime=UTCDateTime(self.day_start) + 86400., seisan_chan_names=True, ignore_length=True) self.assertEqual(processed.stats.npts, 86400) @@ -349,10 +297,12 @@ def test_short_data_empty_return(self): ignore_bad_data is True. """ processed = multi_process( - st=self.st[0].copy().trim(endtime=self. - st[0].stats.endtime - 28000), lowcut=0.1, + st=self.st[0].copy().trim( + endtime=self.st[0].stats.endtime - 28000), + lowcut=0.1, highcut=0.4, filt_order=3, samp_rate=1, - starttime=self.day_start, daylong=True, + starttime=self.day_start, + endtime=UTCDateTime(self.day_start) + 86400., seisan_chan_names=True, ignore_bad_data=True) self.assertEqual(processed.stats.npts, 0) @@ -360,7 +310,7 @@ def test_highcut_debug(self): """Test a basic process implementation with just a highcut""" processed = multi_process( st=self.st[0].copy(), lowcut=None, highcut=0.4, - filt_order=3, samp_rate=1, starttime=False, daylong=True, + filt_order=3, samp_rate=1, starttime=False, seisan_chan_names=True, ignore_length=False) self.assertEqual(processed.stats.npts, 86400) @@ -368,7 +318,7 @@ def test_lowcut_debug(self): """Test a basic process implementation with just a highcut""" processed = multi_process( st=self.st[0].copy(), lowcut=0.1, highcut=None, - filt_order=3, samp_rate=1, starttime=False, daylong=True, + filt_order=3, samp_rate=1, starttime=False, seisan_chan_names=True, ignore_length=False) self.assertEqual(processed.stats.npts, 86400) diff --git a/eqcorrscan/utils/pre_processing.py b/eqcorrscan/utils/pre_processing.py index 65123303a..ecdaaab8e 100644 --- a/eqcorrscan/utils/pre_processing.py +++ b/eqcorrscan/utils/pre_processing.py @@ -83,7 +83,7 @@ def _simple_qc(st, max_workers=None, chunksize=1): return qual -def _sanitize_length(st, starttime=None, endtime=None, daylong=False): +def _sanitize_length(st, starttime=None, endtime=None): """ Check length and work out start, end, length and trimming criteria @@ -93,54 +93,31 @@ def _sanitize_length(st, starttime=None, endtime=None, daylong=False): :type starttime: obspy.core.UTCDateTime :param endtime: DEsired endtime - can be None :type endtime: obspy.core.UTCDateTime - :param daylong: Whether data should be one-day long. - :type daylong: bool :return: obspy.core.Stream, length[float], clip[bool], starttime[obspy.core.UTCDateTime] """ length, clip = None, False - if daylong: - length, clip = 86400, True - # Set the start-time to a day start - cope with - if starttime is None: - startdates = [] - for tr in st: - if abs(tr.stats.starttime - (UTCDateTime( - tr.stats.starttime.date) + 86400)) < tr.stats.delta: - # If the trace starts within 1 sample of the next day, - # use the next day as the startdate - startdates.append((tr.stats.starttime + 86400).date) - Logger.warning( - f'{tr.id} starts within 1 sample of the next day, ' - f'using this time {(tr.stats.starttime + 86400).date}') - else: - startdates.append(tr.stats.starttime.date) - # Check that all traces start on the same date... - if not len(set(startdates)) == 1: - raise NotImplementedError('Traces start on different days') - starttime = UTCDateTime(startdates[0]) - else: - if starttime is not None and endtime is not None: - for tr in st: - Logger.info( - f"Trimming {tr.id} between {starttime} and {endtime}") - tr.trim(starttime, endtime) - if len(tr.data) == ((endtime - starttime) * - tr.stats.sampling_rate) + 1: - Logger.info(f"{tr.id} is overlength dropping first sample") - tr.data = tr.data[1:len(tr.data)] - # TODO: this should adjust the start-time - # tr.stats.starttime += tr.stats.delta - length = endtime - starttime - clip = True - elif starttime: - for tr in st: - tr.trim(starttime=starttime) - elif endtime: - for tr in st: - tr.trim(endtime=endtime) + if starttime is not None and endtime is not None: + for tr in st: + Logger.info( + f"Trimming {tr.id} between {starttime} and {endtime}") + tr.trim(starttime, endtime) + if len(tr.data) == ((endtime - starttime) * + tr.stats.sampling_rate) + 1: + Logger.info(f"{tr.id} is overlength dropping first sample") + tr.data = tr.data[1:len(tr.data)] + # TODO: this should adjust the start-time + # tr.stats.starttime += tr.stats.delta + length = endtime - starttime + clip = True + elif starttime: + for tr in st: + tr.trim(starttime=starttime) + elif endtime: + for tr in st: + tr.trim(endtime=endtime) return st, length, clip, starttime @@ -153,7 +130,7 @@ def _get_window(window, npts): def multi_process(st, lowcut, highcut, filt_order, samp_rate, parallel=False, num_cores=False, starttime=None, endtime=None, - daylong=False, seisan_chan_names=False, fill_gaps=True, + seisan_chan_names=False, fill_gaps=True, ignore_length=False, ignore_bad_data=False): """ Apply standardised processing workflow to data for matched-filtering @@ -199,11 +176,6 @@ def multi_process(st, lowcut, highcut, filt_order, samp_rate, parallel=False, :type starttime: obspy.core.UTCDateTime :param endtime: Desired endtime of data :type endtime: obspy.core.UTCDateTime - :param daylong: - Whether data should be considered to be one-day long. Setting this will - assume that your data should start as close to the start of a day - as possible given the sampling. - :type daylong: bool :param seisan_chan_names: Whether to convert channel names to two-char seisan channel names :type seisan_chan_names: bool @@ -254,7 +226,7 @@ def multi_process(st, lowcut, highcut, filt_order, samp_rate, parallel=False, chunksize = len(st) // max_workers st, length, clip, starttime = _sanitize_length( - st=st, starttime=starttime, endtime=endtime, daylong=daylong) + st=st, starttime=starttime, endtime=endtime) for tr in st: if len(tr.data) == 0: @@ -776,7 +748,7 @@ def _fill_gaps(tr): def _group_process(filt_order, highcut, lowcut, samp_rate, process_length, - parallel, cores, stream, daylong, + parallel, cores, stream, ignore_length, ignore_bad_data, overlap): """ Process and chunk data. @@ -787,14 +759,11 @@ def _group_process(filt_order, highcut, lowcut, samp_rate, process_length, :param cores: Number of cores to use, can be False to use all available. :type stream: :class:`obspy.core.stream.Stream` :param stream: Stream to process, will be left intact. - :type daylong: bool - :param daylong: Whether to enforce day-length files or not. :type ignore_length: bool :param ignore_length: - If using daylong=True, then processing will try check that the data - are there for at least 80% of the day, if you don't want this check - (which will raise an error if too much data are missing) then set - ignore_length=True. This is not recommended! + Check that the data are there for at least 80% of the required length, + if you don't want this check (which will raise an error if too much + data are missing) then set ignore_length=True. This is not recommended! :type ignore_bad_data: bool :param ignore_bad_data: If False (default), errors will be raised if data are excessively @@ -818,23 +787,7 @@ def _group_process(filt_order, highcut, lowcut, samp_rate, process_length, starttimes = sorted([tr.stats.starttime for tr in stream]) endtimes = sorted([tr.stats.endtime for tr in stream]) - if daylong: - if process_length != 86400: - Logger.warning( - f'Processing day-long data, but template was cut from ' - f'{process_length} s long data, will reduce correlations') - process_length = 86400 - # Check that data all start on the same day, otherwise strange - # things will happen... - startdates = [starttime.date for starttime in starttimes] - if not len(set(startdates)) == 1: - Logger.warning('Data start on different days, setting to last day') - starttime = UTCDateTime(startdates[-1]) - else: - starttime = UTCDateTime(startdates[0]) # Can take any - else: - # We want to use shortproc to allow overlaps - starttime = starttimes[0] + starttime = starttimes[0] endtime = endtimes[-1] data_len_samps = round((endtime - starttime) * samp_rate) + 1 assert overlap < process_length, "Overlap must be less than process length" @@ -850,11 +803,8 @@ def _group_process(filt_order, highcut, lowcut, samp_rate, process_length, for i in range(n_chunks): kwargs.update( {'starttime': starttime + (i * (process_length - overlap))}) - if not daylong: - _endtime = kwargs['starttime'] + process_length - kwargs.update({'endtime': _endtime}) - else: - _endtime = kwargs['starttime'] + 86400 + _endtime = kwargs['starttime'] + process_length + kwargs.update({'endtime': _endtime}) # This is where data should be copied and only here! if n_chunks > 1: @@ -1233,57 +1183,15 @@ def _prep_data_for_correlation(stream, templates, template_names=None, return out_stream, out_templates -def shortproc(st, lowcut, highcut, filt_order, samp_rate, parallel=False, - num_cores=False, starttime=None, endtime=None, - seisan_chan_names=False, fill_gaps=True, ignore_length=False, - ignore_bad_data=False, fft_threads=1): - """ - Deprecated - """ - Logger.warning("Shortproc is depreciated after 0.4.4 and will " - "be removed in a future version. Use multi_process" - " instead") - st = multi_process( - st=st, lowcut=lowcut, highcut=highcut, filt_order=filt_order, - samp_rate=samp_rate, parallel=parallel, num_cores=num_cores, - starttime=starttime, endtime=endtime, daylong=False, - seisan_chan_names=seisan_chan_names, fill_gaps=fill_gaps, - ignore_length=ignore_length, ignore_bad_data=ignore_bad_data) - return st - - -def dayproc(st, lowcut, highcut, filt_order, samp_rate, starttime, - parallel=True, num_cores=False, ignore_length=False, - seisan_chan_names=False, fill_gaps=True, ignore_bad_data=False, - fft_threads=1): - """ - Deprecated - """ - Logger.warning("dayproc is depreciated after 0.4.4 and will be " - "removed in a future version. Use multi_process instead") - st = multi_process( - st=st, lowcut=lowcut, highcut=highcut, filt_order=filt_order, - samp_rate=samp_rate, parallel=parallel, num_cores=num_cores, - starttime=starttime, endtime=None, daylong=True, - seisan_chan_names=seisan_chan_names, fill_gaps=fill_gaps, - ignore_length=ignore_length, ignore_bad_data=ignore_bad_data) - return st - - def process(tr, lowcut, highcut, filt_order, samp_rate, starttime=False, clip=False, length=86400, seisan_chan_names=False, ignore_length=False, fill_gaps=True, - ignore_bad_data=False, fft_threads=1): + ignore_bad_data=False): """ Deprecated """ Logger.warning("process is depreciated after 0.4.4 and will be removed " "in a future version. Use multi_process instead") - if length == 86400: - daylong = True - else: - daylong = False - endtime = None if clip: if not starttime: @@ -1294,7 +1202,7 @@ def process(tr, lowcut, highcut, filt_order, samp_rate, st = multi_process( st=tr, lowcut=lowcut, highcut=highcut, filt_order=filt_order, samp_rate=samp_rate, parallel=False, num_cores=1, - starttime=starttime, endtime=endtime, daylong=daylong, + starttime=starttime, endtime=endtime, seisan_chan_names=seisan_chan_names, fill_gaps=fill_gaps, ignore_length=ignore_length, ignore_bad_data=ignore_bad_data) return st