Skip to content

Commit

Permalink
working on the testcases and some bugfixes on the proxy
Browse files Browse the repository at this point in the history
  • Loading branch information
Jenifer Tabita Ciuciu-Kiss committed Oct 26, 2024
1 parent a2f5245 commit 4f0ff50
Show file tree
Hide file tree
Showing 6 changed files with 107 additions and 78 deletions.
29 changes: 24 additions & 5 deletions ontologytimemachine/custom_proxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,11 @@ def __init__(self, *args, **kwargs):

def before_upstream_connection(self, request: HttpParser) -> HttpParser | None:
# self.client.config = QUOTE_NONE
logger.info("Before upstream connection hook")
logger.info("Before upstcream connection hook")
logger.info(f"Request method: {request.method} - Request host: {request.host} - Request path: {request.path} - Request headers: {request.headers}")
wrapped_request = HttpRequestWrapper(request)

if (self.config.clientConfigViaProxyAuth == ClientConfigViaProxyAuth.REQUIRED or self.config.clientConfigViaProxyAuth == ClientConfigViaProxyAuth.OPTIONAL) and not wrapped_request.is_connect_request():
if (self.config.clientConfigViaProxyAuth == ClientConfigViaProxyAuth.REQUIRED or self.config.clientConfigViaProxyAuth == ClientConfigViaProxyAuth.OPTIONAL):
logger.info('Setting up config from auth')
config_from_auth = evaluate_configuration(wrapped_request, self.config)
if (not config_from_auth and self.config.clientConfigViaProxyAuth == ClientConfigViaProxyAuth.REQUIRED):
Expand All @@ -68,10 +68,13 @@ def before_upstream_connection(self, request: HttpParser) -> HttpParser | None:
config = self.client.config
else:
logger.info("Using the proxy configuration")
config = self.config

config = self.config
if wrapped_request.is_connect_request():
logger.info(f"Handling CONNECT request: configured HTTPS interception mode: {config.httpsInterception}")
# Mark if there is a connect request
if not hasattr(self.client, "mark_connect"):
self.client.mark_connect = True

# Check whether to allow CONNECT requests since they can impose a security risk
if not do_block_CONNECT_request(config):
Expand All @@ -82,7 +85,7 @@ def before_upstream_connection(self, request: HttpParser) -> HttpParser | None:
return None

response = get_response_from_request(wrapped_request, config)
if response:
if response.status_code:
logger.info(response.status_code)
self.queue_response(response)
return None
Expand All @@ -98,6 +101,7 @@ def do_intercept(self, _request: HttpParser) -> bool:
if hasattr(self.client, "config"):
logger.info("Using the configuration from the Auth")
config = self.client.config
logger.info(f'Config: {config}')
else:
logger.info("Using the proxy configuration")
config = self.config
Expand Down Expand Up @@ -129,6 +133,21 @@ def handle_client_request(self, request: HttpParser) -> HttpParser:
logger.info("Handle client request hook")
logger.info(f"Request method: {request.method} - Request host: {request.host} - Request path: {request.path} - Request headers: {request.headers}")

wrapped_request = HttpRequestWrapper(request)
if (wrapped_request.is_head_request() or wrapped_request.is_get_request()) and hasattr(self.client, "mark_connect"):
if self.client.mark_connect:
if hasattr(self.client, "config"):
logger.info("Using the configuration from the Auth")
config = self.client.config
else:
logger.info("Using the proxy configuration")
config = self.config
response = get_response_from_request(wrapped_request, config)
if response.status_code:
logger.info(response.status_code)
self.queue_response(response)
return None

return request

def handle_upstream_chunk(self, chunk: memoryview):
Expand Down
4 changes: 2 additions & 2 deletions ontologytimemachine/utils/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ class OntoVersion(EnumValuePrint):
ORIGINAL_FAILOVER_LIVE_LATEST = "originalFailoverLiveLatest"
LATEST_ARCHIVED = "latestArchived"
TIMESTAMP_ARCHIVED = "timestampArchived"
DEPENDENCY_MANIFEST = "dependencyManifest"
#DEPENDENCY_MANIFEST = "dependencyManifest"


class HttpsInterception(EnumValuePrint):
Expand Down Expand Up @@ -91,7 +91,7 @@ class Config:
ontoFormatConf: OntoFormatConfig = field(default_factory=OntoFormatConfig)
ontoVersion: OntoVersion = OntoVersion.ORIGINAL_FAILOVER_LIVE_LATEST
restrictedAccess: bool = False
clientConfigViaProxyAuth: ClientConfigViaProxyAuth = ClientConfigViaProxyAuth.IGNORE
clientConfigViaProxyAuth: ClientConfigViaProxyAuth = ClientConfigViaProxyAuth.REQUIRED
httpsInterception: HttpsInterception = HttpsInterception.ALL
disableRemovingRedirects: bool = False
timestamp: str = ""
Expand Down
53 changes: 28 additions & 25 deletions ontologytimemachine/utils/proxy_logic.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ def do_deny_request_due_non_archivo_ontology_uri(wrapped_request, config):


def get_response_from_request(wrapped_request, config):
logger.info('Ger response from tequest')
do_deny = do_deny_request_due_non_archivo_ontology_uri(wrapped_request, config)
if do_deny:
logger.warning(
Expand Down Expand Up @@ -148,16 +149,14 @@ def request_ontology(
):
allow_redirects = not disableRemovingRedirects
try:
logger.info(headers)
logger.info(allow_redirects)
if wrapped_request.is_head_request():
response = requests.head(
url=url, headers=headers, allow_redirects=allow_redirects, timeout=3
)
response = requests.head(url=url, headers=headers, allow_redirects=allow_redirects, timeout=3)
logger.info(response.content)
logger.info(response.status_code)
else:
response = requests.get(
url=url, headers=headers, allow_redirects=allow_redirects, timeout=3
)
response = requests.get(url=url, headers=headers, allow_redirects=allow_redirects, timeout=3)
logger.info(response.content)
logger.info(response.status_code)
logger.info("Successfully fetched ontology")
return response
except Exception as e:
Expand Down Expand Up @@ -192,7 +191,7 @@ def proxy_logic(wrapped_request, config):
)
elif config.ontoVersion == OntoVersion.LATEST_ARCHIVED:
logger.info('OntoVersion LATEST_ARCHIVED')
response = fetch_latest_archived(wrapped_request, ontology, headers)
response = fetch_latest_archived(wrapped_request, headers)
elif config.ontoVersion == OntoVersion.TIMESTAMP_ARCHIVED:
logger.info('OntoVersion TIMESTAMP_ARCHIVED')
response = fetch_timestamp_archived(wrapped_request, headers, config)
Expand All @@ -218,25 +217,29 @@ def fetch_failover(wrapped_request, headers, disableRemovingRedirects):
original_response = request_ontology(
wrapped_request, ontology, headers, disableRemovingRedirects
)
if original_response.status_code in passthrough_status_codes:
requested_mimetypes_with_priority = parse_accept_header_with_priority(
headers["Accept"]
)
requested_mimetypes = [x[0] for x in requested_mimetypes_with_priority]
response_mime_type = original_response.headers.get("Content-Type", ";").split(
";"
)[0]
logger.info(f"Requested mimetypes: {requested_mimetypes}")
logger.info(f"Response mimetype: {response_mime_type}")
if response_mime_type in requested_mimetypes:
return original_response
logger.info(f'Original response: {original_response}')
if original_response:
logger.info('Got an original response')
if original_response.status_code in passthrough_status_codes:
requested_mimetypes_with_priority = parse_accept_header_with_priority(
headers["Accept"]
)
requested_mimetypes = [x[0] for x in requested_mimetypes_with_priority]
response_mime_type = original_response.headers.get("Content-Type", ";").split(
";"
)[0]
logger.info(f"Requested mimetypes: {requested_mimetypes}")
logger.info(f"Response mimetype: {response_mime_type}")
if response_mime_type in requested_mimetypes:
return original_response
else:
logger.info(f"The returned type is not the same as the requested one")
return fetch_latest_archived(wrapped_request, headers)
else:
logger.info(f"The returned type is not the same as the requested one")
logger.info(f"The returend status code is not accepted: {original_response.status_code}")
return fetch_latest_archived(wrapped_request, headers)
else:
logger.info(
f"The returend status code is not accepted: {original_response.status_code}"
)
logger.info("No original response")
return fetch_latest_archived(wrapped_request, headers)


Expand Down
30 changes: 15 additions & 15 deletions tests/archivo_test_IRIs.tsv
Original file line number Diff line number Diff line change
@@ -1,22 +1,22 @@
enable_testcase iri error_dimension expected_error iri_type comment
0 http://buzzword.org.uk/rdf/personal-link-types# content text/html hash weird html instead of text/turtle
0 http://data.finlex.fi/schema/sfl/ content 0-bytes slash 0 bytes content-length
0 http://data.bigdatagrapes.eu/resource/ontology/ dns nxdomain slash
0 http://data.bigdatagrapes.eu/resource/ontology/MeasurementContext dns nxdomain term
0 http://data.ontotext.com/resource/leak/ http-code 502 slash
0 http://data.europa.eu/esco/flow http-code 406 slash
0 http://bdi.si.ehu.es/bdi/ontologies/ExtruOnt/ExtruOnt transport connect-timeout slash
0 http://catalogus-professorum.org/cpm/2/ transport connection-refused slash
0 http://www.w3.org/1999/02/22-rdf-syntax-ns# None hash
0 http://xmlns.com/foaf/0.1/ None slash
0 http://xmlns.com/foaf/0.1/Person None term
0 http://dbpedia.org/ontology/ None term
0 http://dbpedia.org/ontology/Person None term
1 https://bag2.basisregistraties.overheid.nl/bag/def/ http-code 404 slash
1 http://buzzword.org.uk/rdf/personal-link-types# content text/html hash weird html instead of text/turtle
1 http://data.finlex.fi/schema/sfl/ content 0-bytes slash 0 bytes content-length
1 http://data.bigdatagrapes.eu/resource/ontology/ dns nxdomain slash
1 http://data.bigdatagrapes.eu/resource/ontology/MeasurementContext dns nxdomain term
1 http://data.ontotext.com/resource/leak/ http-code 502 slash
1 http://data.europa.eu/esco/flow http-code 406 slash
1 http://bdi.si.ehu.es/bdi/ontologies/ExtruOnt/ExtruOnt transport connect-timeout slash
1 http://catalogus-professorum.org/cpm/2/ transport connection-refused slash
1 http://www.w3.org/1999/02/22-rdf-syntax-ns# None hash
1 http://xmlns.com/foaf/0.1/ None slash
1 http://xmlns.com/foaf/0.1/Person None term
1 http://dbpedia.org/ontology/ None term
1 http://dbpedia.org/ontology/Person None term
0 https://bag2.basisregistraties.overheid.nl/bag/def/ http-code 404 slash
0 https://bag2.basisregistraties.overheid.nl/bag/def/Gebruiksdoel http-code 404 term
0 https://id.parliament.uk/schema http-code 404 slash slash onto without trailing slash /
0 https://id.parliament.uk/schema/Approval http-code 404 term slash onto without trailing slash /
0 https://bmake.th-brandenburg.de/spv# http-code 403 hash
0 https://bmake.th-brandenburg.de/spv http-code 403 hash just test whether Archivo API is used correctly
0 https://w3id.org/ttla/ transport cert-expired hash
0 http://data-gov.tw.rpi.edu/2009/data-gov-twc.rdf transport connection-refused hash
1 http://data-gov.tw.rpi.edu/2009/data-gov-twc.rdf transport connection-refused hash
24 changes: 12 additions & 12 deletions tests/non_archivo_test_IRIs.tsv
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
iri error_dimension expected_error iri_type comment
https://data.ontotext.com/resource/leak/ http-code 401 https is not ID
https://www.w3.org/1999/02/22-rdf-syntax-ns# None https is not ID
http://example.org None
https://example.org None
http://1.1.1.1 None
https://1.1.1.1 None
https://data.globalchange.gov/gcis.owl http-code "403 " https is not ID
https://data.ordnancesurvey.co.uk/ontology/geometry/ http-code 404 https is not ID
https://data.ordnancesurvey.co.uk/ontology/ http-code 301 https is not ID
https://google.com None

enable_testcase iri error_dimension expected_error iri_type comment
0 https://data.ontotext.com/resource/leak/ http-code 401 https is not ID
0 https://www.w3.org/1999/02/22-rdf-syntax-ns# None https is not ID
0 http://example.org None
0 https://example.org None
0 http://1.1.1.1 None
0 https://1.1.1.1 None
0 https://data.globalchange.gov/gcis.owl http-code 403 https is not ID
0 https://data.ordnancesurvey.co.uk/ontology/geometry/ http-code 404 https is not ID
0 https://data.ordnancesurvey.co.uk/ontology/ http-code 301 https is not ID
0 https://google.com None
0
45 changes: 26 additions & 19 deletions tests/test_proxy_auth_header.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
HTTP_PROXY = f"http://{PROXY}"
HTTPS_PROXY = f"http://{PROXY}"
PROXIES = {"http": HTTP_PROXY, "https": HTTPS_PROXY}
CA_CERT_PATH = "ca-cert.pem"

logging.basicConfig(
level=logging.ERROR,
Expand All @@ -37,7 +38,9 @@ def create_fake_response(status_code='error'):

def make_request_without_proxy(iri: str) -> Tuple[int, str]:
"""Make a direct request to the IRI without using the proxy."""
headers = {}
headers = {
"Accept": "text/turtle"
}
try:
response = requests.get(iri, timeout=10, headers=headers, allow_redirects=True)
return response
Expand Down Expand Up @@ -73,11 +76,13 @@ def make_request_with_proxy(iri: str, mode: str) -> Tuple[int, str]:
username = f"--ontoVersion {mode}"
password = "my_password"
headers = {
"Accept": "text/turtle",
"Accept-Encoding": "identity",
"Proxy-Authorization": _basic_auth_str(username, password)
}
try:
response = requests.get(iri, proxies=PROXIES, headers=headers, timeout=10)
# There is an issue here for https requests
response = requests.get(iri, proxies=PROXIES, verify=CA_CERT_PATH, headers=headers, timeout=10)
return response
except SSLError as e:
mock_response = Mock()
Expand All @@ -103,7 +108,7 @@ def make_request_with_proxy(iri: str, mode: str) -> Tuple[int, str]:
else:
mock_response = Mock()
mock_response.content = ''
mock_response.status_code = 406
mock_response.status_code = 'error'
return mock_response
except Exception as e:
mock_response = Mock()
Expand All @@ -125,48 +130,50 @@ def test_proxy_responses(test_case):
if enabled == '1':
# Make direct and proxy requests
direct_response = make_request_without_proxy(iri)
logger.info(direct_response)
proxy_response = make_request_with_proxy(iri, 'original')
#proxy_response = make_request_with_proxy(iri, 'original')
#proxy_response = make_request_with_proxy(iri, 'laters')
#proxy_response = make_request_with_proxy(iri, 'original')

proxy_original_response = make_request_with_proxy(iri, 'original')
proxy_failover_response = make_request_with_proxy(iri, 'originalFailoverLiveLatest')
proxy_archivo_laest_response = make_request_with_proxy(iri, 'latestArchived')

# Evaluation based on error_dimension
if error_dimension == 'http-code':
assert int(expected_error) == direct_response.status_code
assert int(expected_error) == proxy_response.status_code
assert int(expected_error) == proxy_original_response.status_code


elif error_dimension == 'None':
assert direct_response.status_code == 200
assert proxy_response.status_code == 200
assert proxy_original_response.status_code == 200

elif error_dimension == 'content':
if expected_error == 'text_html':
assert direct_response.headers.get('Content-Type') == 'text/html'
assert proxy_response.headers.get('Content-Type') == 'text/html'
assert proxy_original_response.headers.get('Content-Type') == 'text/html'
elif expected_error == '0-bytes':
assert len(direct_response.content) == 0
assert len(proxy_response.content) == 0
assert len(proxy_original_response.content) == 0

elif error_dimension == 'dns':
if expected_error == 'nxdomain':
assert direct_response.status_code == 'nxdomain-error'
assert proxy_response.status_code == 502
assert proxy_original_response.status_code == 502

elif error_dimension == 'transport':
if expected_error == 'cert-expired':
assert direct_response.status_code == 'ssl-error'
assert proxy_response.status_code == 'ssl-error'
assert proxy_original_response.status_code == 'ssl-error'
elif expected_error == 'connect-timeout':
assert direct_response.status_code == 'timeout-error'
assert proxy_response.status_code == 'timeout-error'
assert proxy_original_response.status_code == 'timeout-error'
elif expected_error == 'connect-refused':
assert direct_response.status_code == 'connection-refused-error'
assert proxy_response.status_code == 'connection-refused-error'
assert proxy_original_response.status_code == 'connection-refused-error'

else:
assert True == True
assert 200 == proxy_failover_response.status_code
assert 200 == proxy_archivo_laest_response.status_code

else:
assert True



if __name__ == "__main__":
Expand Down

0 comments on commit 4f0ff50

Please sign in to comment.