Skip to content

Commit

Permalink
Merge pull request #16 from getyourguide/art-130-refine-logging
Browse files Browse the repository at this point in the history
Refine logging
  • Loading branch information
hsiehkl authored May 2, 2023
2 parents f02c52a + 8f21b50 commit b638ee3
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 22 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
# 1.1.15

- Disable logs when DDataflow is not enable. Support setting logger level.

# 1.1.14

- Fix sample and download function

# 1.1.13

- Support s3 path and default database
Expand Down
60 changes: 39 additions & 21 deletions ddataflow/ddataflow.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import logging as logger
import logging
import os
from typing import List, Optional, Union

Expand All @@ -10,6 +10,10 @@
from ddataflow.sampling.sampler import Sampler
from ddataflow.utils import get_or_create_spark, using_databricks_connect

logger = logging.getLogger(__name__)
handler = logging.StreamHandler()
logger.addHandler(handler)


class DDataflow:
"""
Expand Down Expand Up @@ -108,6 +112,13 @@ def __init__(
if default_database:
self.set_up_database(default_database)

# Print detailed logs when ddataflow is enabled
if self._ddataflow_enabled:
self.set_logger_level(logging.DEBUG)
else:
logger.info("DDataflow is now DISABLED."
"PRODUCTION data will be used and it will write to production tables.")

@staticmethod
def setup_project():
"""
Expand All @@ -133,7 +144,7 @@ def current_project() -> "DDataflow":
CONFIGURATION_FILE_NAME = "ddataflow_config.py"

current_folder = os.getcwd()
print("Loading config from folder", current_folder)
logger.debug("Loading config from folder", current_folder)
config_location = os.path.join(current_folder, CONFIGURATION_FILE_NAME)

if not os.path.exists(config_location):
Expand Down Expand Up @@ -188,16 +199,16 @@ def source(self, name: str, debugger=False):
You can also use this function in the terminal with --debugger=True to inspect the dataframe.
"""
logger.info(f"Debugger enabled: {debugger}")
self.print_status()

logger.info("Loading data source")
logger.debug("Loading data source")
data_source: DataSource = self._data_sources.get_data_source(name)
logger.debug("Data source loaded")
df = self._get_df_from_source(data_source)

if debugger:
logger.info("In debug mode now, use query to inspect it")
logger.debug(f"Debugger enabled: {debugger}")
logger.debug("In debug mode now, use query to inspect it")
breakpoint()

return df
Expand All @@ -207,15 +218,15 @@ def source_name(self, name, disable_view_creation=False):
Given the name of a production table, returns the name of the corresponding ddataflow table when ddataflow is enabled
If ddataflow is disabled get the production one.
"""
logger.info("Source name used: ", name)
logger.debug("Source name used: ", name)
source_name = name

if self._ddataflow_enabled:
source_name = self._get_new_table_name(name)
if disable_view_creation:
return source_name

print(f"Creating a temp view with the name: {source_name}")
logger.debug(f"Creating a temp view with the name: {source_name}")
data_source: DataSource = self._data_sources.get_data_source(name)

if self._offline_enabled:
Expand Down Expand Up @@ -277,21 +288,21 @@ def disable(self):

def _get_df_from_source(self, data_source):
if not self._ddataflow_enabled:
print("DDataflow not enabled")
logger.debug("DDataflow not enabled")
# goes directly to production without prefilters
return data_source.query_without_filter()

if self._offline_enabled:
# uses snapshot data
if using_databricks_connect():
print(
logger.debug(
"Looks like you are using databricks-connect in offline mode. You probably want to run it "
"without databricks connect in offline mode"
)

return data_source.query_locally()

print("DDataflow enabled and filtering")
logger.debug("DDataflow enabled and filtering")
return data_source.query()

def download_data_sources(self, overwrite: bool = True, debug=False):
Expand Down Expand Up @@ -400,24 +411,23 @@ def print_status(self):
Print the status of the ddataflow
"""
if self._offline_enabled:
print("DDataflow is now ENABLED in OFFLINE mode")
print(
logger.debug("DDataflow is now ENABLED in OFFLINE mode")
logger.debug(
"To disable it remove from your code or unset the enviroment variable 'unset ENABLE_DDATAFLOW ; unset ENABLE_OFFLINE_MODE'"
)
elif self._ddataflow_enabled:
print(
logger.debug(
"""
DDataflow is now ENABLED in ONLINE mode. Filtered data will be used and it will write to temporary tables.
"""
DDataflow is now ENABLED in ONLINE mode. Filtered data will be used and it will write to temporary tables.
"""
)
else:
print(
logger.debug(
f"""
DDataflow is now DISABLED. So PRODUCTION data will be used and it will write to production tables.
Use enable() function or export {self._ENABLE_DDATAFLOW_ENVVARIABLE}=True to enable it.
If you are working offline use export ENABLE_OFFLINE_MODE=True instead.
"""
DDataflow is now DISABLED. So PRODUCTION data will be used and it will write to production tables.
Use enable() function or export {self._ENABLE_DDATAFLOW_ENVVARIABLE}=True to enable it.
If you are working offline use export ENABLE_OFFLINE_MODE=True instead.
"""
)

def _get_current_environment_data_folder(self) -> Optional[str]:
Expand All @@ -429,6 +439,14 @@ def _get_current_environment_data_folder(self) -> Optional[str]:

return self._snapshot_path

def set_logger_level(self, level):
"""
Set logger level.
Levels can be found here: https://docs.python.org/3/library/logging.html#logging-levels
"""
logger.info(f"Set logger level to: {level}")
logger.setLevel(level)


def main():
import fire
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "DDataFlow"
version = "1.1.14"
version = "1.1.15"
description = "A tool for end2end data tests"
authors = ["Data products GYG <[email protected]>"]
readme = "README.md"
Expand Down

0 comments on commit b638ee3

Please sign in to comment.