diff --git a/ontologytimemachine/custom_proxy.py b/ontologytimemachine/custom_proxy.py index a5c7cb6..bb13590 100644 --- a/ontologytimemachine/custom_proxy.py +++ b/ontologytimemachine/custom_proxy.py @@ -17,19 +17,18 @@ from http.client import responses import proxy import sys -import logging -from ontologytimemachine.utils.config import HttpsInterception, ClientConfigViaProxyAuth - +from ontologytimemachine.utils.config import ( + HttpsInterception, + ClientConfigViaProxyAuth, + logger, +) -IP = "0.0.0.0" -PORT = "8896" +default_cfg: Config = Config() config = None -logging.basicConfig( - level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s" -) -logger = logging.getLogger(__name__) +IP = default_cfg.host +PORT = default_cfg.port class OntologyTimeMachinePlugin(HttpProxyBasePlugin): @@ -188,14 +187,12 @@ def queue_response(self, response): ] sys.argv += [ - "--hostname", IP, - "--port", PORT, - '--insecure-tls-interception', # without it the proxy would not let through a response using an invalid upstream certificate in interception mode - # since there currently is a bug in proxypy when a connect request uses an IP address instead of a domain name - # the proxy would not be able to work corectly in transparent mode using 3proxy setup since it tries to match - # the IP address as hostname with the certificate instead of the domain name in the SNI field - "--log-level", config.logLevel.name, - "--plugins", __name__ + ".OntologyTimeMachinePlugin", + "--hostname", + config.host, + "--port", + config.port, + "--plugins", + __name__ + ".OntologyTimeMachinePlugin", ] logger.info("Starting OntologyTimeMachineProxy server...") diff --git a/ontologytimemachine/proxy_wrapper.py b/ontologytimemachine/proxy_wrapper.py index 37d6e74..c612db0 100644 --- a/ontologytimemachine/proxy_wrapper.py +++ b/ontologytimemachine/proxy_wrapper.py @@ -1,14 +1,8 @@ from abc import ABC, abstractmethod from proxy.http.parser import HttpParser -import logging from typing import Tuple, Dict, Any import base64 - -# Configure logger -logging.basicConfig( - level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s" -) -logger = logging.getLogger(__name__) +from ontologytimemachine.utils.config import logger class AbstractRequestWrapper(ABC): diff --git a/ontologytimemachine/utils/config.py b/ontologytimemachine/utils/config.py index 174494a..c53f700 100644 --- a/ontologytimemachine/utils/config.py +++ b/ontologytimemachine/utils/config.py @@ -1,9 +1,16 @@ import argparse from dataclasses import dataclass, field from enum import Enum +import logging from typing import Dict, Any, Type, TypeVar +logging.basicConfig( + level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s" +) +logger = logging.getLogger(__name__) + + class EnumValuePrint( Enum ): # redefine how the enum is printed such that it will show up properly the cmd help message (choices) @@ -70,6 +77,8 @@ class Config: httpsInterception: HttpsInterception = HttpsInterception.ALL disableRemovingRedirects: bool = False timestamp: str = "" + host: str = "0.0.0.0" + port: str = "8896" # manifest: Dict[str, Any] = None @@ -90,7 +99,10 @@ def enum_parser(enum_class: Type[E], value: str) -> E: def parse_arguments(config_str: str = "") -> Config: default_cfg: Config = Config() - parser = argparse.ArgumentParser(description="Process ontology format and version.") + parser = argparse.ArgumentParser( + description="Process ontology format and version.", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) # Defining ontoFormat argument with nested options parser.add_argument( @@ -98,7 +110,7 @@ def parse_arguments(config_str: str = "") -> Config: type=lambda s: enum_parser(OntoFormat, s), default=default_cfg.ontoFormatConf.format, choices=list(OntoFormat), - help="Format of the ontology: turtle, ntriples, rdfxml, htmldocu", + help="Format of the ontology: turtle, ntriples, rdfxml, htmldocu. (default: %(default)s)", ) parser.add_argument( @@ -106,14 +118,14 @@ def parse_arguments(config_str: str = "") -> Config: type=lambda s: enum_parser(OntoPrecedence, s), default=default_cfg.ontoFormatConf.precedence, choices=list(OntoPrecedence), - help="Precedence of the ontology: default, enforcedPriority, always", + help="Precedence of the ontology: default, enforcedPriority, always. (default: %(default)s)", ) parser.add_argument( "--patchAcceptUpstream", type=bool, default=default_cfg.ontoFormatConf.patchAcceptUpstream, - help="Defines if the Accept Header is patched upstream in original mode.", + help="Defines if the Accept Header is patched upstream in original mode. (default: %(default)s)", ) # Defining ontoVersion argument @@ -122,7 +134,7 @@ def parse_arguments(config_str: str = "") -> Config: type=lambda s: enum_parser(OntoVersion, s), default=default_cfg.ontoVersion, choices=list(OntoVersion), - help="Version of the ontology: original, originalFailoverLive, originalFailoverArchivoMonitor, latestArchive, timestampArchive, dependencyManifest", + help="Version of the ontology: original, originalFailoverLive, originalFailoverArchivoMonitor, latestArchive, timestampArchive, dependencyManifest. (default: %(default)s)", ) # Enable/disable mode to only proxy requests to ontologies @@ -130,7 +142,7 @@ def parse_arguments(config_str: str = "") -> Config: "--restrictedAccess", type=bool, default=default_cfg.restrictedAccess, - help="Enable/disable mode to only proxy requests to ontologies stored in Archivo.", + help="Enable/disable mode to only proxy requests to ontologies stored in Archivo. (default: %(default)s)", ) # Enable HTTPS interception for specific domains @@ -139,7 +151,7 @@ def parse_arguments(config_str: str = "") -> Config: type=lambda s: enum_parser(HttpsInterception, s), default=default_cfg.httpsInterception, choices=list(HttpsInterception), - help="Enable HTTPS interception for specific domains: none, archivo, all, listfilename.", + help="Enable HTTPS interception for specific domains: none, archivo, all, listfilename. (default: %(default)s)", ) # Enable/disable inspecting or removing redirects @@ -147,7 +159,7 @@ def parse_arguments(config_str: str = "") -> Config: "--disableRemovingRedirects", type=bool, default=default_cfg.disableRemovingRedirects, - help="Enable/disable inspecting or removing redirects.", + help="Enable/disable inspecting or removing redirects. (default: %(default)s)", ) parser.add_argument( @@ -155,7 +167,7 @@ def parse_arguments(config_str: str = "") -> Config: type=lambda s: enum_parser(ClientConfigViaProxyAuth, s), default=default_cfg.clientConfigViaProxyAuth, choices=list(ClientConfigViaProxyAuth), - help="Define the configuration of the proxy via the proxy auth.", + help="Define the configuration of the proxy via the proxy auth. (default: %(default)s)", ) # Log level @@ -164,7 +176,23 @@ def parse_arguments(config_str: str = "") -> Config: type=lambda s: enum_parser(LogLevel, s), default=default_cfg.logLevel, choices=list(LogLevel), - help="Level of the logging: debug, info, warning, error.", + help="Level of the logging: debug, info, warning, error. (default: %(default)s)", + ) + + # Host + parser.add_argument( + "--host", + type=str, + default=default_cfg.host, + help="Hostname or IP address to bind the proxy to. (default: %(default)s)", + ) + + # Port + parser.add_argument( + "--port", + type=str, + default=default_cfg.port, + help="Port number to bind the proxy to. (default: %(default)s)", ) if config_str: @@ -194,6 +222,14 @@ def parse_arguments(config_str: str = "") -> Config: # print the default configuration with all nested members # print(default_cfg) # TODO remove + if args.logLevel != LogLevel.INFO: + logging.basicConfig( + level=args.logLevel.value, + format="%(asctime)s - %(levelname)s - %(message)s", + ) + logger = logging.getLogger(__name__) + logger.info(f"Logging level set to: {args.logLevel}") + # Initialize the Config class with parsed arguments config = Config( logLevel=args.logLevel, @@ -206,6 +242,8 @@ def parse_arguments(config_str: str = "") -> Config: clientConfigViaProxyAuth=args.clientConfigViaProxyAuth, disableRemovingRedirects=args.disableRemovingRedirects, timestamp=args.timestamp if hasattr(args, "timestamp") else "", + host=args.host, + port=args.port, ) return config diff --git a/ontologytimemachine/utils/download_archivo_urls.py b/ontologytimemachine/utils/download_archivo_urls.py index 030fff5..ba68c01 100644 --- a/ontologytimemachine/utils/download_archivo_urls.py +++ b/ontologytimemachine/utils/download_archivo_urls.py @@ -1,6 +1,5 @@ import os import hashlib -import logging import requests import schedule import time @@ -8,6 +7,7 @@ from datetime import datetime, timedelta from urllib.parse import urlparse from typing import Set, Tuple +from ontologytimemachine.utils.config import logger ARCHIVO_PARSED_URLS: Set[Tuple[str, str]] = set() @@ -22,12 +22,6 @@ DOWNLOAD_INTERVAL = timedelta(days=1) # 1 day interval for checking the download -logging.basicConfig( - level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s" -) -logger = logging.getLogger(__name__) - - def schedule_daily_download(): """Schedule the download to run at 3 AM every day.""" schedule.every().day.at("03:00").do(download_archivo_urls) diff --git a/ontologytimemachine/utils/proxy_logic.py b/ontologytimemachine/utils/proxy_logic.py index 941f8c2..39d0fc2 100644 --- a/ontologytimemachine/utils/proxy_logic.py +++ b/ontologytimemachine/utils/proxy_logic.py @@ -1,4 +1,3 @@ -import logging import requests from ontologytimemachine.utils.config import parse_arguments from ontologytimemachine.proxy_wrapper import AbstractRequestWrapper @@ -26,15 +25,10 @@ OntoVersion, HttpsInterception, ClientConfigViaProxyAuth, + logger, ) -logging.basicConfig( - level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s" -) -logger = logging.getLogger(__name__) - - def do_block_CONNECT_request(config: Config) -> bool: if config.httpsInterception == HttpsInterception.BLOCK: logger.info("decided to block CONNECT request due to config enum") diff --git a/ontologytimemachine/utils/utils.py b/ontologytimemachine/utils/utils.py index f51267c..7e511a2 100644 --- a/ontologytimemachine/utils/utils.py +++ b/ontologytimemachine/utils/utils.py @@ -1,13 +1,10 @@ -import logging -import argparse from werkzeug.http import parse_accept_header -from ontologytimemachine.utils.config import OntoVersion, OntoFormat, OntoPrecedence - - -logging.basicConfig( - level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s" +from ontologytimemachine.utils.config import ( + OntoVersion, + OntoFormat, + OntoPrecedence, + logger, ) -logger = logging.getLogger(__name__) archivo_api = "https://archivo.dbpedia.org/download" diff --git a/tests/test_integration.py b/tests/test_integration.py index afb903a..26150d0 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -111,14 +111,24 @@ def test_15_linked_web_apis(): def generic_test(iri, content_type): - response = requests.get(iri, proxies=PROXIES, verify=CA_CERT_PATH) + response = requests.get( + iri, + proxies=PROXIES, + verify=CA_CERT_PATH, + auth=HTTPBasicAuth("admin", "archivo"), + ) assert response.status_code == 200 assert iri in response.content.decode("utf-8") def iri_generic_test(iri): try: - response = requests.get(iri, proxies=PROXIES, verify=CA_CERT_PATH) + response = requests.get( + iri, + proxies=PROXIES, + verify=CA_CERT_PATH, + auth=HTTPBasicAuth("admin", "archivo"), + ) assert response.status_code == 200 assert iri in response.content.decode("utf-8") except AssertionError: