From 1ccf00e3c1053d4179e4878b78c04dbe9a9d460a Mon Sep 17 00:00:00 2001 From: Danglewood <85772166+deeleeramone@users.noreply.github.com> Date: Tue, 22 Oct 2024 15:45:31 -0700 Subject: [PATCH 1/3] purge nan values and put word boundaries on form_type filter. --- .../providers/sec/openbb_sec/models/company_filings.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/openbb_platform/providers/sec/openbb_sec/models/company_filings.py b/openbb_platform/providers/sec/openbb_sec/models/company_filings.py index 835206a23aa..882a0193bc7 100644 --- a/openbb_platform/providers/sec/openbb_sec/models/company_filings.py +++ b/openbb_platform/providers/sec/openbb_sec/models/company_filings.py @@ -283,7 +283,8 @@ def transform_data( ) -> List[SecCompanyFilingsData]: """Transform the data.""" # pylint: disable=import-outside-toplevel - from pandas import DataFrame, to_datetime + from numpy import nan + from pandas import NA, DataFrame, to_datetime if not data: raise EmptyDataError( @@ -333,6 +334,7 @@ def transform_data( ) if query.form_type: form_types = query.form_type.replace("_", " ").replace(",", "|") + form_types = f"\\b{form_types}\\b" filings = filings[ filings.form.str.contains(form_types, case=False, regex=True, na=False) @@ -343,6 +345,7 @@ def transform_data( if len(filings) == 0: raise EmptyDataError("No filings were found using the filters provided.") + filings = filings.replace({NA: None, nan: None}) return [ SecCompanyFilingsData.model_validate(d) for d in filings.to_dict("records") From 47a6f1e2fd14f59d5400d977031f4460ea376690 Mon Sep 17 00:00:00 2001 From: Danglewood <85772166+deeleeramone@users.noreply.github.com> Date: Tue, 22 Oct 2024 15:50:15 -0700 Subject: [PATCH 2/3] just replace nan at the output --- .../providers/sec/openbb_sec/models/company_filings.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/openbb_platform/providers/sec/openbb_sec/models/company_filings.py b/openbb_platform/providers/sec/openbb_sec/models/company_filings.py index 882a0193bc7..5581a90e81d 100644 --- a/openbb_platform/providers/sec/openbb_sec/models/company_filings.py +++ b/openbb_platform/providers/sec/openbb_sec/models/company_filings.py @@ -306,12 +306,7 @@ def transform_data( "isXBRL", "size", ] - filings = ( - DataFrame(data, columns=cols) - .fillna(value="N/A") - .replace("N/A", None) - .astype(str) - ) + filings = DataFrame(data, columns=cols).astype(str) filings["reportDate"] = to_datetime(filings["reportDate"]).dt.date filings["filingDate"] = to_datetime(filings["filingDate"]).dt.date filings = filings.sort_values(by=["reportDate", "filingDate"], ascending=False) From ce258d82c419f6099c0791e45b7722d9fabb7ac5 Mon Sep 17 00:00:00 2001 From: Danglewood <85772166+deeleeramone@users.noreply.github.com> Date: Tue, 22 Oct 2024 16:15:39 -0700 Subject: [PATCH 3/3] use isin instead --- .../providers/sec/openbb_sec/models/company_filings.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/openbb_platform/providers/sec/openbb_sec/models/company_filings.py b/openbb_platform/providers/sec/openbb_sec/models/company_filings.py index 5581a90e81d..9ba181b53e3 100644 --- a/openbb_platform/providers/sec/openbb_sec/models/company_filings.py +++ b/openbb_platform/providers/sec/openbb_sec/models/company_filings.py @@ -328,13 +328,8 @@ def transform_data( base_url + filings["accessionNumber"] + "-index.htm" ) if query.form_type: - form_types = query.form_type.replace("_", " ").replace(",", "|") - form_types = f"\\b{form_types}\\b" - - filings = filings[ - filings.form.str.contains(form_types, case=False, regex=True, na=False) - ] - + form_types = query.form_type.replace("_", " ").split(",") + filings = filings[filings.form.isin(form_types)] if query.limit: filings = filings.head(query.limit) if query.limit != 0 else filings