Skip to content

Commit

Permalink
finally connect to trle without needing the cert file
Browse files Browse the repository at this point in the history
  • Loading branch information
noisecode3 committed Oct 24, 2024
1 parent 0674139 commit f89bcc4
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 34 deletions.
2 changes: 1 addition & 1 deletion database/.gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
data.json
file_info.json
__pycache__
trle_cert.pem

84 changes: 51 additions & 33 deletions database/index_scrape.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,10 @@
from cryptography.hazmat.primitives import serialization

import index_data
import get_leaf_cert

MISCONFIGURED_SERVER = False
LEAF_CERT = None

# Set up logging
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s %(levelname)s:%(message)s')
Expand Down Expand Up @@ -64,6 +66,7 @@ def get_response(url, content_type):
curl = None
headers = None
response_buffer = None
temp_cert_path = None

while retries < max_retries:
try:
Expand All @@ -74,42 +77,68 @@ def get_response(url, content_type):
curl.setopt(pycurl.WRITEDATA, response_buffer)
curl.setopt(pycurl.WRITEHEADER, headers_buffer)

# Set the path to the certificate for SSL/TLS verification
curl.setopt(pycurl.CAINFO, 'trle_cert.pem') # Use your certificate file
global MISCONFIGURED_SERVER
if MISCONFIGURED_SERVER:
global LEAF_CERT
if not LEAF_CERT:
sys.exit(1)

# Write the certificate to a temporary file manually
temp_cert_file = tempfile.NamedTemporaryFile(delete=False) # `delete=False` prevents auto-deletion
temp_cert_file.write(LEAF_CERT)
temp_cert_file.flush()
temp_cert_path = temp_cert_file.name
temp_cert_file.close() # Close the file so it can be accessed by pycurl

# Set CAINFO to use the temporary certificate file
curl.setopt(pycurl.CAINFO, temp_cert_path)

headers_list = [
'User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
'Accept: */*',
'Referer: https://trcustoms.org/' # Change this to the appropriate referrer if needed
]
curl.setopt(pycurl.HTTPHEADER, headers_list)
# Perform the request
curl.perform()

# Get the response code
response_code = curl.getinfo(pycurl.RESPONSE_CODE)

# If the response is not 200 OK, retry
if response_code != 200:
retries += 1
time.sleep(3)
logging.warning(f"Retrying... Response code: {response_code}")
curl.close()
continue

# Get the headers
headers = headers_buffer.getvalue().decode('utf-8')


# Break the loop on success
break

except Exception as e:
except pycurl.error as curl_error:
if curl_error.args[0] == 60: # SSL certificate error
LEAF_CERT = get_leaf_cert.run(url)

if LEAF_CERT:
try:
LEAF_CERT = LEAF_CERT.public_bytes(encoding=serialization.Encoding.PEM)
MISCONFIGURED_SERVER = True
logging.info("Leaf certificate retrieved and applied.")
except Exception as e:
logging.error("Failed to convert leaf certificate to PEM: %s", e)
sys.exit(1)
else:
logging.error("Failed to retrieve leaf certificate. Exiting.")
sys.exit(1)
continue

logging.error("Request failed: %s", curl_error)
retries += 1
logging.error(f"Request failed: {e}")
if retries >= max_retries:
logging.error("Max retries reached. Exiting.")
sys.exit(1)

finally:
if temp_cert_path and os.path.exists(temp_cert_path):
os.remove(temp_cert_path) # Ensure the temp cert file is deleted after the request

if curl is None:
logging.error("No curl instance")
sys.exit(1)
Expand All @@ -130,29 +159,18 @@ def get_response(url, content_type):
break

# Validate and return the response based on content type
if response_content_type == 'text/html' and content_type == 'text/html':
response = response_buffer.getvalue().decode('utf-8') # Plain text
curl.close()
return response
elif response_content_type == 'application/json' and content_type == 'application/json':
response = response_buffer.getvalue().decode('utf-8')
curl.close()
return json.loads(response) # Parse and return JSON
elif response_content_type in ['image/jpeg', 'image/png'] and content_type in ['image/jpeg', 'image/png']:
response = response_buffer.getvalue()
curl.close()
return response # Return raw image data
elif response_content_type == 'application/pkix-cert' and content_type == 'application/pkix-cert':
response = response_buffer.getvalue()
curl.close()
# Add custom validation for certificates here if needed
return response # Return raw certificate data
if response_content_type == content_type:
if content_type == 'text/html':
return response_buffer.getvalue().decode('utf-8')
elif content_type == 'application/json':
return json.loads(response_buffer.getvalue().decode('utf-8'))
elif content_type in ['image/jpeg', 'image/png']:
return response_buffer.getvalue()
elif content_type == 'application/pkix-cert':
return response_buffer.getvalue()
else:
logging.error("Unexpected content type: %s, expected %s",
response_content_type, content_type)
logging.error("Unexpected content type: %s, expected %s", response_content_type, content_type)
sys.exit(1)


def validate_pem(pem):
"""Validate the certificate as a text"""
# Check if the response contains a PEM key
Expand Down

0 comments on commit f89bcc4

Please sign in to comment.