diff --git a/ontologytimemachine/custom_proxy.py b/ontologytimemachine/custom_proxy.py index f31f521..d47cca5 100644 --- a/ontologytimemachine/custom_proxy.py +++ b/ontologytimemachine/custom_proxy.py @@ -45,6 +45,7 @@ def before_upstream_connection(self, request: HttpParser) -> HttpParser | None: wrapped_request = HttpRequestWrapper(request) if (self.config.clientConfigViaProxyAuth == ClientConfigViaProxyAuth.REQUIRED or self.config.clientConfigViaProxyAuth == ClientConfigViaProxyAuth.OPTIONAL): + logger.info('Setting up config from auth') config_from_auth = evaluate_configuration(wrapped_request, self.config) if (not config_from_auth and self.config.clientConfigViaProxyAuth == ClientConfigViaProxyAuth.REQUIRED): logger.info( "Client configuration via proxy auth is required btu configuration is not provided, return 500.") @@ -135,12 +136,11 @@ def queue_response(self, response): response.status_code, reason=bytes(responses[response.status_code], "utf-8"), headers={ - b"Content-Type": bytes( - response.headers.get("Content-Type"), "utf-8" - ) + bytes(key, "utf-8"): bytes(value, "utf-8") for key, value in response.headers.items() }, body=response.content, ) + ) diff --git a/ontologytimemachine/proxy_wrapper.py b/ontologytimemachine/proxy_wrapper.py index 8ef2a74..cb3c8e0 100644 --- a/ontologytimemachine/proxy_wrapper.py +++ b/ontologytimemachine/proxy_wrapper.py @@ -135,5 +135,6 @@ def get_authentication_from_request(self) -> str: if auth_type.lower() != "basic": return None decoded_credentials = base64.b64decode(encoded_credentials).decode() + logger.info(f'Decoded credentials: {decoded_credentials}') return decoded_credentials return None diff --git a/ontologytimemachine/utils/config.py b/ontologytimemachine/utils/config.py index 6c9a0cd..680d4ca 100644 --- a/ontologytimemachine/utils/config.py +++ b/ontologytimemachine/utils/config.py @@ -152,7 +152,7 @@ def parse_arguments(config_str: str = "") -> Config: type=lambda s: enum_parser(OntoFormat, s), default=default_cfg.ontoFormatConf.format, choices=list(OntoFormat), - help=f"Format of the ontology: turtle, ntriples, rdfxml, htmldocu. {help_suffix_template}", + help=f"Format of the ontology. {help_suffix_template}", ) parser.add_argument( @@ -160,7 +160,7 @@ def parse_arguments(config_str: str = "") -> Config: type=lambda s: enum_parser(OntoPrecedence, s), default=default_cfg.ontoFormatConf.precedence, choices=list(OntoPrecedence), - help=f"Precedence of the ontology: default, enforcedPriority, always. {help_suffix_template}", + help=f"Precedence of the ontology. {help_suffix_template}", ) parser.add_argument( @@ -176,7 +176,7 @@ def parse_arguments(config_str: str = "") -> Config: type=lambda s: enum_parser(OntoVersion, s), default=default_cfg.ontoVersion, choices=list(OntoVersion), - help=f"Version of the ontology: original, originalFailoverLive, originalFailoverArchivoMonitor, latestArchive, timestampArchive, dependencyManifest. {help_suffix_template}", + help=f"Version of the ontology. {help_suffix_template}", ) # Enable/disable mode to only proxy requests to ontologies @@ -193,7 +193,7 @@ def parse_arguments(config_str: str = "") -> Config: type=lambda s: enum_parser(HttpsInterception, s), default=default_cfg.httpsInterception, choices=list(HttpsInterception), - help=f"Enable HTTPS interception for specific domains: none, archivo, all, listfilename. {help_suffix_template}", + help=f"Enable HTTPS interception for specific domains. {help_suffix_template}", ) # Enable/disable inspecting or removing redirects @@ -218,7 +218,7 @@ def parse_arguments(config_str: str = "") -> Config: type=lambda s: enum_parser(LogLevel, s), default=default_cfg.logLevelTimeMachine, choices=list(LogLevel), - help=f"Level of the logging: debug, info, warning, error. {help_suffix_template}", + help=f"Level of the logging. {help_suffix_template}", ) # Log level @@ -227,7 +227,7 @@ def parse_arguments(config_str: str = "") -> Config: type=lambda s: enum_parser(LogLevel, s), default=default_cfg.logLevelTimeMachine, choices=list(LogLevel), - help=f"Level of the logging: debug, info, warning, error. {help_suffix_template}", + help=f"Level of the logging. {help_suffix_template}", ) # Host diff --git a/ontologytimemachine/utils/proxy_logic.py b/ontologytimemachine/utils/proxy_logic.py index a7db142..3587a69 100644 --- a/ontologytimemachine/utils/proxy_logic.py +++ b/ontologytimemachine/utils/proxy_logic.py @@ -156,7 +156,6 @@ def request_ontology( url=url, headers=headers, allow_redirects=allow_redirects, timeout=5 ) logger.info("Successfully fetched ontology") - print(response.content) return response except Exception as e: logger.error(f"Error fetching original ontology: {e}") @@ -165,7 +164,7 @@ def request_ontology( # change the function definition and pass only the config def proxy_logic(wrapped_request, config): - logger.info("Proxy has to intervene") + logger.info("Proxy starting to analyze request") response = mock_response_500 #default if we somehow forget to set the response set_onto_format_headers(wrapped_request, config) @@ -180,15 +179,19 @@ def proxy_logic(wrapped_request, config): return mock_response_500 if config.ontoVersion == OntoVersion.ORIGINAL: + logger.info('OntoVersion ORIGINAL') ontology, _, _ = wrapped_request.get_request_url_host_path() response = fetch_original(wrapped_request, ontology, headers, config) elif config.ontoVersion == OntoVersion.ORIGINAL_FAILOVER_LIVE_LATEST: + logger.info('OntoVersion ORIGINAL_FAILOVER_LIVE_LATEST') response = fetch_failover( wrapped_request, headers, config.disableRemovingRedirects ) elif config.ontoVersion == OntoVersion.LATEST_ARCHIVED: + logger.info('OntoVersion LATEST_ARCHIVED') response = fetch_latest_archived(wrapped_request, ontology, headers) - elif config.ontoVersion == OntoVersion.LATEST_ARCHIVED: + elif config.ontoVersion == OntoVersion.TIMESTAMP_ARCHIVED: + logger.info('OntoVersion TIMESTAMP_ARCHIVED') response = fetch_timestamp_archived(wrapped_request, headers, config) # Commenting the manifest related part because it is not supported in the current version # elif ontoVersion == 'dependencyManifest': diff --git a/tests/test_proxy_new.py b/tests/test_proxy_auth_header.py similarity index 87% rename from tests/test_proxy_new.py rename to tests/test_proxy_auth_header.py index bb6d74f..a7e37c8 100644 --- a/tests/test_proxy_new.py +++ b/tests/test_proxy_auth_header.py @@ -4,16 +4,17 @@ import csv from typing import List, Tuple from requests.auth import HTTPBasicAuth +from requests.auth import _basic_auth_str from ontologytimemachine.custom_proxy import IP, PORT # Proxy settings -PROXY = f"0.0.0.0:{PORT}" +PROXY = f"0.0.0.0:8894" HTTP_PROXY = f"http://{PROXY}" HTTPS_PROXY = f"http://{PROXY}" PROXIES = {"http": HTTP_PROXY, "https": HTTPS_PROXY} logging.basicConfig( - level=logging.DEBUG, + level=logging.ERROR, format="%(asctime)s - %(levelname)s - %(message)s", ) logger = logging.getLogger(__name__) @@ -35,10 +36,10 @@ def create_fake_response(status_code='error'): def make_request_without_proxy(iri: str) -> Tuple[int, str]: """Make a direct request to the IRI without using the proxy.""" headers = { - "Content-Type": "text/turtle" + "Accept": "text/turtle" } try: - response = requests.get(iri, timeout=10, headers=headers) + response = requests.get(iri, timeout=10, headers=headers, allow_redirects=True) return response except Exception as e: # logger.info(f'Error: {e}') @@ -46,14 +47,16 @@ def make_request_without_proxy(iri: str) -> Tuple[int, str]: return create_fake_response() def make_request_with_proxy(iri: str, mode: str) -> Tuple[int, str]: + logger.info('Run') """Make a request to the IRI using the proxy.""" username = f"--ontoVersion {mode}" password = "my_password" headers = { - "Content-Type": "text/turtle" + "Accept": "text/turtle", + "Proxy-Authorization": _basic_auth_str(username, password) } try: - response = requests.get(iri, proxies=PROXIES, timeout=10, headers=headers, auth=HTTPBasicAuth(username, password)) + response = requests.get(iri, proxies=PROXIES, headers=headers, allow_redirects=True) return response except Exception as e: # logger.info(f'Error: {e}') @@ -72,18 +75,10 @@ def test_proxy_responses(test_case): # Make direct and proxy requests direct_response = make_request_without_proxy(iri) proxy_response = make_request_with_proxy(iri, 'original') + #proxy_response = make_request_with_proxy(iri, 'original') + #proxy_response = make_request_with_proxy(iri, 'laters') + #proxy_response = make_request_with_proxy(iri, 'original') - - try: - direct_response = requests.get(iri) - except Exception as e: - logger.error(f"Error making direct request to {iri}: {e}") - - try: - proxy_response = requests.get(iri, proxies=PROXIES) - except Exception as e: - logger.error(f"Error making proxy request to {iri} using proxy {PROXY}: {e}") - # Evaluation based on error_dimension if error_dimension == 'http-code': logger.info(f"Comparing direct response status code: expected {expected_error}, got {direct_response.status_code}") diff --git a/tests/test_proxypy.py b/tests/test_proxypy.py index bc57573..4240863 100644 --- a/tests/test_proxypy.py +++ b/tests/test_proxypy.py @@ -85,7 +85,7 @@ def wait_for_server(proxy_port: int, wait_for_seconds: float = 10.0) -> None: def make_request_without_proxy(self, iri: str) -> Tuple[int, str]: """Make a direct request to the IRI without using the proxy.""" headers = { - "Content-Type": "text/turtle" + "Accept": "text/turtle" } try: response = requests.get(iri, timeout=10, headers=headers) @@ -105,7 +105,7 @@ def make_request_with_proxy(self, iri: str, proxy_port: int, mode: str) -> Tuple username = f"--ontoVersion {mode}" password = "my_password" headers = { - "Content-Type": "text/turtle" + "Accept": "text/turtle" } try: response = requests.get(iri, proxies=proxies, timeout=10, headers=headers, auth=HTTPBasicAuth(username, password)) diff --git a/tests/test_proxypy_test_proxy_spinup.py b/tests/test_proxypy_test_proxy_spinup.py new file mode 100644 index 0000000..4240863 --- /dev/null +++ b/tests/test_proxypy_test_proxy_spinup.py @@ -0,0 +1,220 @@ +import unittest +import requests +import csv +from proxy.proxy import Proxy +from proxy.common.utils import new_socket_connection +from ontologytimemachine.utils.config import Config, parse_arguments +from ontologytimemachine.custom_proxy import OntologyTimeMachinePlugin +from typing import List, Tuple +from requests.auth import HTTPBasicAuth +import time +import logging + + +logging.basicConfig( + level=logging.DEBUG, # Set the logging level to DEBUG + format="%(asctime)s - %(levelname)s - %(message)s", +) +logger = logging.getLogger(__name__) + + +class OntoVersionTestCase(unittest.TestCase): + """Test case for making requests with different OntoVersions.""" + + DEFAULT_PROXY_PY_STARTUP_FLAGS = [ + '--hostname', '0.0.0.0', + '--port', '0', # Automatically bind to an available port + '--num-workers', '1', + '--num-acceptors', '1', + ] + + PROXY: Proxy = None + INPUT_ARGS: List[str] = None + + test_data = [] + plugin_config: Config = None + + @classmethod + def setUpClass(cls): + """Set up class-level resources, including reading the TSV test data.""" + # Load test data from TSV file + cls.load_test_data_from_tsv("tests/archivo_test_IRIs.tsv") + + @classmethod + def load_test_data_from_tsv(cls, filepath: str): + """Load test cases from a TSV file.""" + with open(filepath, mode='r', encoding='utf-8') as file: + reader = csv.DictReader(file, delimiter='\t') + for row in reader: + iri = row['iri'] + error_dimension = row['error_dimension'] + expected_error = row['expected_error'] + iri_type = row['iri_type'] + comment = row['comment'] + cls.test_data.append((iri, error_dimension, expected_error, iri_type, comment)) + + + def setUpProxy(self) -> None: + self.PROXY = Proxy(self.DEFAULT_PROXY_PY_STARTUP_FLAGS) + self.PROXY.flags.plugins[b'HttpProxyBasePlugin'].append( + OntologyTimeMachinePlugin, + ) + self.PROXY.__enter__() + self.wait_for_server(self.PROXY.flags.port) + + def tearDownProxy(self) -> None: + """Tear down the proxy.""" + if self.PROXY: + self.PROXY.__exit__(None, None, None) + self.PROXY = None + + @staticmethod + def wait_for_server(proxy_port: int, wait_for_seconds: float = 10.0) -> None: + """Wait for the proxy to be available.""" + start_time = time.time() + while True: + try: + new_socket_connection(('localhost', proxy_port)).close() + break + except ConnectionRefusedError: + time.sleep(0.1) + + if time.time() - start_time > wait_for_seconds: + raise TimeoutError('Timed out while waiting for proxy to start...') + + def make_request_without_proxy(self, iri: str) -> Tuple[int, str]: + """Make a direct request to the IRI without using the proxy.""" + headers = { + "Accept": "text/turtle" + } + try: + response = requests.get(iri, timeout=10, headers=headers) + return response + except Exception as e: + # logger.info(f'Error: {e}') + # logger.info('Error with the connection') + response.status_code = 'error' + return response + + def make_request_with_proxy(self, iri: str, proxy_port: int, mode: str) -> Tuple[int, str]: + """Make a request to the IRI using the proxy.""" + proxies = { + "http": f"http://localhost:{proxy_port}", + "https": f"https://localhost:{proxy_port}", + } + username = f"--ontoVersion {mode}" + password = "my_password" + headers = { + "Accept": "text/turtle" + } + try: + response = requests.get(iri, proxies=proxies, timeout=10, headers=headers, auth=HTTPBasicAuth(username, password)) + return response + except Exception as e: + # logger.info(f'Error: {e}') + # logger.info('Error with the connection') + return {'status_code': 'error'} + + + + def compare_responses(self, direct_response: Tuple[int, str], proxy_response: Tuple[int, str]): + """Compare the results of the direct and proxy responses.""" + self.assertEqual(direct_response[0], proxy_response[0], "Status codes do not match.") + self.assertEqual(direct_response[1], proxy_response[1], "Content types do not match.") + + def evaluate_results(self, direct_response, proxy_response, error_dimension, expected_error): + error_found = False # Flag to track if any assertion fails + logger.info('Test without proxy results') + try: + if error_dimension == 'http-code': + logger.info(f"Comparing direct response status code: expected {expected_error}, got {direct_response.status_code}") + self.assertEqual(int(expected_error), direct_response.status_code) + elif error_dimension == 'None': + logger.info(f"Comparing direct response status code for 'None' error dimension: expected 200, got {direct_response.status_code}") + self.assertEqual(200, direct_response.status_code) + elif error_dimension == 'content': + logger.info(f"Comparing direct response content length: expected 0, got {len(direct_response.content)}") + self.assertEqual(0, len(direct_response.content)) + else: + logger.info(f"Comparing direct response status code for unknown error dimension: expected 'error', got '{direct_response}'") + self.assertEqual('error', direct_response.status_code) + except AssertionError as e: + logger.error(f"Direct response assertion failed: {e}") + error_found = True # Mark that an error occurred but continue + + # Logs before proxy response assertions + logger.info('Test Proxy original results') + try: + logger.info(error_dimension) + if error_dimension == 'http-code': + logger.info(f"Comparing proxy response status code: expected {expected_error}, got {proxy_response.status_code}") + self.assertEqual(int(expected_error), proxy_response.status_code) + elif error_dimension == 'None': + logger.info(f"Comparing proxy response status code for 'None' error dimension: expected 200, got {proxy_response.status_code}") + self.assertEqual(200, proxy_response.status_code) + elif error_dimension == 'content': + logger.info(f"Comparing proxy response content length: expected 0, got {len(proxy_response.content)}") + self.assertEqual(0, len(proxy_response.content)) + else: + logger.info(f"Comparing proxy response status code for unknown error dimension: expected 'error', got '{proxy_response.status_code}'") + self.assertEqual('error', proxy_response.status_code) + except AssertionError as e: + logger.error(f"Proxy response assertion failed: {e}") + error_found = True # Mark that an error occurred but continue + + # If any assertion failed, mark the test as failed + if error_found: + self.fail("One or more assertions failed. See logs for details.") + + + def test_requests_with_different_onto_versions(self): + """Test requests with different OntoVersions and compare results.""" + # Make request without proxy + mode = 'original' + for iri, error_dimension, expected_error, iri_type, comment in self.test_data: + logger.info(f'IRI: {iri}') + with self.subTest(iri=iri, expected_error=expected_error, mode=mode): + self.setUpProxy() + + try: + # Make requests + direct_response = self.make_request_without_proxy(iri) + proxy_response = self.make_request_with_proxy(iri, self.PROXY.flags.port, mode) + + # Evaluate the results + logger.info('Test without proxy results') + if error_dimension == 'http-code': + logger.info(f"Comparing direct response status code: expected {expected_error}, got {direct_response.status_code}") + self.assertEqual(int(expected_error), direct_response.status_code) + logger.info(f"Comparing proxy response status code: expected {expected_error}, got {proxy_response.status_code}") + self.assertEqual(int(expected_error), proxy_response.status_code) + elif error_dimension == 'None': + logger.info(f"Comparing direct response status code for 'None' error dimension: expected 200, got {direct_response.status_code}") + self.assertEqual(200, direct_response.status_code) + logger.info(f"Comparing proxy response status code for 'None' error dimension: expected 200, got {proxy_response.status_code}") + self.assertEqual(200, proxy_response.status_code) + elif error_dimension == 'content': + logger.info(f"Comparing direct response content length: expected 0, got {len(direct_response.content)}") + self.assertEqual(0, len(direct_response.content)) + logger.info(f"Comparing proxy response content length: expected 0, got {len(proxy_response.content)}") + self.assertEqual(0, len(proxy_response.content)) + else: + logger.info(f"Comparing direct response status code for unknown error dimension: expected 'error', got '{direct_response}'") + self.assertEqual('error', direct_response.status_code) + logger.info(f"Comparing proxy response status code for unknown error dimension: expected 'error', got '{proxy_response.status_code}'") + self.assertEqual('error', proxy_response.status_code) + + + finally: + # Tear down the proxy after each test case + self.tearDownProxy() + + # Set up proxy with another OntoVersion and compare results + # self.setUpProxy("latestArchived") + # proxy_response_latest = self.make_request_with_proxy(iri, self.PROXY.flags.port) + # self.compare_responses(direct_response, proxy_response_latest) + # self.tearDownProxy() + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_tsv.py b/tests/test_tsv.py deleted file mode 100644 index c1e003d..0000000 --- a/tests/test_tsv.py +++ /dev/null @@ -1,88 +0,0 @@ -import requests -from urllib.parse import urlparse -from ontologytimemachine.custom_proxy import IP, PORT - -# Mock proxy modes and settings for demonstration purposes -PROXY_MODES = ["live", "failover", "latest", "timestamp"] -INTERCEPTION_MODES = ["all", "archivo", "block"] - - - -PROXY = f"{IP}:{PORT}" -HTTP_PROXY = f"http://{PROXY}" -HTTPS_PROXY = f"http://{PROXY}" -PROXIES = {"http": HTTP_PROXY, "https": HTTPS_PROXY} - -# Load the TSV data -def load_tsv_data(tsv_file): - iri_data = [] - with open(tsv_file, 'r') as f: - for line in f.readlines()[1:]: - parts = line.strip().split('\t') - iri_data.append({ - "iri": parts[0], - "error_dimension": parts[1], - "expected_error": parts[2], - "iri_type": parts[3], - "comment": parts[4] if len(parts) > 4 else "" - }) - return iri_data - -# Basic request function (direct or proxied) -def make_request(iri, proxy=None, mode=None): - try: - response = requests.get(iri, proxies=proxy if proxy else {}) - return response.status_code, response.text - except requests.exceptions.RequestException as e: - return "error", str(e) - -# Test function for direct Archivo IRI requests -def test_DA(iri_data): - for item in iri_data: - if item["iri_type"] == "hash" or item["iri_type"] == "term": - status, content = make_request(item["iri"]) - print(f"DA Request to {item['iri']}: Status {status}, Expected {item['expected_error']}") - -# Test function for direct non-Archivo IRI requests -def test_DN(iri_data): - for item in iri_data: - if item["iri_type"] == "slash": - status, content = make_request(item["iri"]) - print(f"DN Request to {item['iri']}: Status {status}, Expected {item['expected_error']}") - -# Proxied Archivo requests with different modes -def test_PA(iri_data, mode): - for item in iri_data: - if item["iri_type"] == "hash" or item["iri_type"] == "term": - status, content = make_request(item["iri"], proxy=PROXY) - print(f"{mode}-PA Request to {item['iri']}: Status {status}, Expected {item['expected_error']}") - -# Proxied non-Archivo requests with different modes -def test_PN(iri_data, mode): - for item in iri_data: - if item["iri_type"] == "slash": - status, content = make_request(item["iri"], proxy=PROXY) - print(f"{mode}-PN Request to {item['iri']}: Status {status}, Expected {item['expected_error']}") - -# Main function to run the tests based on the proxy mode and interception settings -def run_tests(tsv_file, interception_mode="block"): - iri_data = load_tsv_data(tsv_file) - - print("Running Direct Archivo (DA) Tests...") - test_DA(iri_data) - - print("\nRunning Direct Non-Archivo (DN) Tests...") - test_DN(iri_data) - - if interception_mode != "block": - for mode in PROXY_MODES: - print(f"\nRunning Proxied Archivo ({mode}-PA) Tests...") - test_PA(iri_data, mode) - - print(f"\nRunning Proxied Non-Archivo ({mode}-PN) Tests...") - test_PN(iri_data, mode) - -# Example of running the tests -if __name__ == "__main__": - tsv_file = "tests/archivo_test_IRIs.tsv" - run_tests(tsv_file, interception_mode="archivo")