Skip to content

Commit

Permalink
fixing some comments
Browse files Browse the repository at this point in the history
  • Loading branch information
Jenifer Tabita Ciuciu-Kiss committed Sep 27, 2024
1 parent f6814ab commit 415fceb
Show file tree
Hide file tree
Showing 5 changed files with 76 additions and 58 deletions.
10 changes: 5 additions & 5 deletions ontologytimemachine/custom_proxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
from ontologytimemachine.utils.utils import parse_arguments
from ontologytimemachine.utils.mock_responses import mock_response_403
from ontologytimemachine.proxy_wrapper import HttpRequestWrapper
from ontologytimemachine.utils.proxy_logic import proxy_logic, is_ontology_request_only_ontology
from ontologytimemachine.utils.proxy_logic import is_archivo_ontology_request
from ontologytimemachine.utils.proxy_logic import proxy_logic, is_archivo_ontology_request
from ontologytimemachine.utils.proxy_logic import do_deny_request_due_non_archivo_ontology_uri
from ontologytimemachine.utils.proxy_logic import if_intercept_host
from http.client import responses
import proxy
Expand Down Expand Up @@ -46,7 +46,7 @@ def before_upstream_connection(self, request: HttpParser):
return None

# If only ontology mode, return None in all other cases
if is_ontology_request_only_ontology(wrapped_request, self.restrictedAccess):
if do_deny_request_due_non_archivo_ontology_uri(wrapped_request, self.restrictedAccess):
logger.warning('Request denied: not an ontology request and only ontologies mode is enabled')
self.queue_response(mock_response_403)
return None
Expand All @@ -56,6 +56,7 @@ def before_upstream_connection(self, request: HttpParser):
response = proxy_logic(wrapped_request, self.ontoFormat, self.ontoVersion, self.disableRemovingRedirects, self.timestamp, self.manifest)
self.queue_response(response)
return None

return request

def handle_client_request(self, request: HttpParser):
Expand All @@ -66,8 +67,7 @@ def handle_client_request(self, request: HttpParser):
if wrapped_request.is_connect_request():
return request

is_ontology_request = is_archivo_ontology_request(wrapped_request)
if not is_ontology_request:
if not do_deny_request_due_non_archivo_ontology_uri(wrapped_request):
logger.info('The requested IRI is not part of DBpedia Archivo')
return request

Expand Down
46 changes: 22 additions & 24 deletions ontologytimemachine/proxy_wrapper.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
from abc import ABC, abstractmethod
from proxy.http.parser import HttpParser
import logging
from typing import Tuple, Dict, Any


# Configure logger
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)


class AbstractRequestWrapper(ABC):
def __init__(self, request):
def __init__(self, request: Any) -> None:
self.request = request

@abstractmethod
Expand All @@ -28,74 +29,71 @@ def is_https_request(self) -> bool:
pass

@abstractmethod
def get_request(self):
def get_request(self) -> Any:
pass

@abstractmethod
def get_request_headers(self):
def get_request_headers(self) -> Dict[str, str]:
pass

@abstractmethod
def get_request_accept_header(self):
def get_request_accept_header(self) -> str:
pass

@abstractmethod
def set_request_accept_header(self, mime_type):
def set_request_accept_header(self, mime_type: str) -> None:
pass

@abstractmethod
def get_ontology_from_request(self):
def get_ontology_iri_host_path_from_request(self) -> Tuple[str, str, str]:
pass


class HttpRequestWrapper(AbstractRequestWrapper):
def __init__(self, request: HttpParser):
def __init__(self, request: HttpParser) -> None:
super().__init__(request)

def is_get_request(self) -> bool:
return self.request.method == b'GET'

def is_connect_request(self):
def is_connect_request(self) -> bool:
return self.request.method == b'CONNECT'

def is_head_request(self):
def is_head_request(self) -> bool:
return self.request.method == b'HEAD'

def is_https_request(self):
def is_https_request(self) -> bool:
return self.request.method == b'CONNECT' or self.request.headers.get(b'Host', b'').startswith(b'https')

def get_request(self):
def get_request(self) -> HttpParser:
return self.request

def get_request_headers(self):
headers = {}
def get_request_headers(self) -> Dict[str, str]:
headers: Dict[str, str] = {}
for k, v in self.request.headers.items():
headers[v[0].decode('utf-8')] = v[1].decode('utf-8')
return headers

def get_request_accept_header(self):
def get_request_accept_header(self) -> str:
logger.info('Wrapper - get_request_accept_header')
return self.request.headers[b'accept'][1].decode('utf-8')

def set_request_accept_header(self, mime_type):
def set_request_accept_header(self, mime_type: str) -> None:
self.request.headers[b'accept'] = (b'Accept', mime_type.encode('utf-8'))
logger.info(f'Accept header set to: {self.request.headers[b"accept"][1]}')

def get_ontology_from_request(self):
def get_ontology_iri_host_path_from_request(self) -> Tuple[str, str, str]:
logger.info('Get ontology from request')
print(f'Request protocol: {self.request.protocol}')
print(f'Request host: {self.request.host}')
print(f'Request _url: {self.request._url}')
print(f'Request path: {self.request.path}')
if (self.request.method == b'GET' or self.request.method == b'HEAD') and not self.request.host:
if (self.request.method in {b'GET', b'HEAD'}) and not self.request.host:
for k, v in self.request.headers.items():
if v[0].decode('utf-8') == 'Host':
host = v[1].decode('utf-8')
path = self.request.path.decode('utf-8')
ontology = 'https://' + host + path
ontology = f'https://{host}{path}'
else:
host = self.request.host.decode('utf-8')
path = self.request.path.decode('utf-8')
ontology = str(self.request._url)

logger.info(f'Ontology: {ontology}')
return ontology, host, path
return ontology, host, path
49 changes: 33 additions & 16 deletions ontologytimemachine/utils/proxy_logic.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,42 +2,59 @@
import requests
import rdflib
from urllib.parse import urlparse

from ontologytimemachine.utils.utils import set_onto_format_headers, get_format_from_accept_header
from ontologytimemachine.utils.utils import parse_accept_header_with_priority
from ontologytimemachine.utils.utils import dbpedia_api, passthrough_status_codes
from ontologytimemachine.utils.mock_responses import mock_response_500
from ontologytimemachine.utils.mock_responses import mock_response_404
from typing import Set, Tuple


logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)


ARCHIVO_PARSED_URLS: Set[Tuple[str, str]] = set()


def if_intercept_host(https_intercept):
if https_intercept in ['all']:
return True
return False


def is_ontology_request_only_ontology(wrapped_request, only_ontologies):
is_archivo_ontology = is_archivo_ontology_request(wrapped_request)
if only_ontologies and not is_archivo_ontology:
return True
def do_deny_request_due_non_archivo_ontology_uri (wrapped_request, only_ontologies):
if only_ontologies:
is_archivo_ontology = is_archivo_ontology_request(wrapped_request)
if not is_archivo_ontology:
return True
return False


def is_archivo_ontology_request(wrapped_request):
logger.info('Chekc if the requested ontology is in archivo')
with open('ontologytimemachine/utils/archivo_ontologies.txt', 'r') as file:
urls = [line.strip() for line in file]
parsed_urls = [(urlparse(url).netloc, urlparse(url).path) for url in urls]
def load_archivo_urls() -> None:
"""Load the archivo URLs into the global variable if not already loaded."""
global ARCHIVO_PARSED_URLS
if not ARCHIVO_PARSED_URLS: # Load only if the set is empty
logger.info('Loading archivo ontologies from file')
with open('ontologytimemachine/utils/archivo_ontologies.txt', 'r') as file:
ARCHIVO_PARSED_URLS = {
(urlparse(line.strip()).netloc, urlparse(line.strip()).path) for line in file
}

_, request_host, request_path = wrapped_request.get_ontology_from_request()
for host, path in parsed_urls:
if request_host == host and request_path.startswith(path):
return True
return False

def is_archivo_ontology_request(wrapped_request) -> bool:
"""Check if the requested ontology is in the archivo."""
logger.info('Check if the requested ontology is in archivo')

# Ensure the archivo URLs are loaded
load_archivo_urls()

# Extract the request's host and path
request_host = wrapped_request.get_request().host.decode('utf-8')

This comment has been minimized.

Copy link
@JJ-Author

JJ-Author Oct 10, 2024

Collaborator

in the logic you are supposed to use the wrapper not the internal datastructures of proxypy.
to me it looks like you are still operating on httprequest object directly???

request_path = wrapped_request.get_request().path.decode('utf-8')

# Check if the (host, path) tuple exists in ARCHIVO_PARSED_URLS
return (request_host, request_path) in ARCHIVO_PARSED_URLS


def request_ontology(url, headers, disableRemovingRedirects=False, timeout=5):
Expand All @@ -57,7 +74,7 @@ def proxy_logic(wrapped_request, ontoFormat, ontoVersion, disableRemovingRedirec
set_onto_format_headers(wrapped_request, ontoFormat, ontoVersion)

headers = wrapped_request.get_request_headers()
ontology, _, _ = wrapped_request.get_ontology_from_request()
ontology, _, _ = wrapped_request.get_ontology_iri_host_path_from_request()

# if the requested format is not in Archivo and the ontoVersion is not original
# we can stop because the archivo request will not go through
Expand Down
2 changes: 1 addition & 1 deletion ontologytimemachine/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def parse_arguments():
help='Enable/disable mode to only proxy requests to ontologies stored in Archivo.')

# Enable HTTPS interception for specific domains
parser.add_argument('--httpsInterception', type=str, choices=['none', 'all'],
parser.add_argument('--httpsInterception', type=str, choices=['none', 'all', 'block'],
default='all', help='Enable HTTPS interception for specific domains: none, archivo, all, listfilename.')

# Enable/disable inspecting or removing redirects
Expand Down
27 changes: 15 additions & 12 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 415fceb

Please sign in to comment.