Skip to content

Commit

Permalink
Allow mulitple files at once
Browse files Browse the repository at this point in the history
  • Loading branch information
jakep-allenai committed Dec 2, 2024
1 parent 3212a6d commit bee0770
Show file tree
Hide file tree
Showing 3 changed files with 179 additions and 105 deletions.
60 changes: 60 additions & 0 deletions scripts/release.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,68 @@

set -e

# Function to extract version components from version.py using regex
get_version_from_file() {
VERSION_FILE="tinyhost/version.py"

if [[ ! -f "$VERSION_FILE" ]]; then
echo "Error: $VERSION_FILE does not exist."
exit 1
fi

# Extract _MAJOR
_MAJOR=$(grep -E '^_MAJOR\s*=\s*"([^"]+)"' "$VERSION_FILE" | sed -E 's/_MAJOR\s*=\s*"([^"]+)"/\1/')
if [[ -z "$_MAJOR" ]]; then
echo "Error: Could not extract _MAJOR from $VERSION_FILE."
exit 1
fi

# Extract _MINOR
_MINOR=$(grep -E '^_MINOR\s*=\s*"([^"]+)"' "$VERSION_FILE" | sed -E 's/_MINOR\s*=\s*"([^"]+)"/\1/')
if [[ -z "$_MINOR" ]]; then
echo "Error: Could not extract _MINOR from $VERSION_FILE."
exit 1
fi

# Extract _PATCH
_PATCH=$(grep -E '^_PATCH\s*=\s*"([^"]+)"' "$VERSION_FILE" | sed -E 's/_PATCH\s*=\s*"([^"]+)"/\1/')
if [[ -z "$_PATCH" ]]; then
echo "Error: Could not extract _PATCH from $VERSION_FILE."
exit 1
fi

# Extract _SUFFIX (optional)
_SUFFIX=$(grep -E '^_SUFFIX\s*=\s*"([^"]*)"' "$VERSION_FILE" | sed -E 's/_SUFFIX\s*=\s*"([^"]*)"/\1/')
if [[ -z "$_SUFFIX" ]]; then
_SUFFIX=""
fi

# Construct VERSION
VERSION_PY="${_MAJOR}.${_MINOR}.${_PATCH}${_SUFFIX}"
echo "$VERSION_PY"
}

TAG=$(python -c 'from tinyhost.version import VERSION; print("v" + VERSION)')

# Get the VERSION from version.py
VERSION_PY=$(get_version_from_file)

# Compare the two versions
if [[ "v$VERSION_PY" != "$TAG" ]]; then
echo "Version mismatch detected:"
echo " Python reported version: $TAG"
echo " version.py contains: v$VERSION_PY"
echo
read -p "The versions do not match. Please run 'pip install -e .' to synchronize versions. Do you want to continue? [Y/n] " prompt

if [[ ! "$prompt" =~ ^([yY][eE][sS]|[yY])$ ]]; then
echo "Release process aborted due to version mismatch."
exit 1
else
echo "Proceeding with the release despite the version mismatch."
fi
fi

read -p "Creating new release for $TAG. Do you want to continue? [Y/n] " prompt

if [[ $prompt == "y" || $prompt == "Y" || $prompt == "yes" || $prompt == "Yes" ]]; then
Expand Down
222 changes: 118 additions & 104 deletions tinyhost/tinyhost.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,134 +31,148 @@
default=604800,
help="Length of time in seconds that this resulting link will work for. Default is 1 week. Max is also 1 week.",
)
@click.argument("html_file", type=click.Path(exists=True))
def tinyhost(html_file: str, bucket: str, prefix: str, duration: int, reset: bool):
@click.argument("html_files", nargs=-1, type=click.Path(exists=True))
def tinyhost(html_files: list[str], bucket: str, prefix: str, duration: int, reset: bool):
"""
Hosts your html_file on an S3 bucket, and gives back a signed URL.
Hosts your html_files on an S3 bucket, and gives back signed URLs.
Assumes that you have AWS credentials in your environment. Run `aws configure` if not.
If you don't pass in an S3 bucket, the script will prompt you to create one, if it's possible.
Otherwise, it will use the specified bucket
"""
try:
if not bucket:
bucket = run_new_bucket_flow()

if not bucket:
raise click.ClickException(
"Unable to automatically detect/create an S3 bucket, please specify one using --bucket"
)

# Make sure that your file content is a text/html page to begin with
file_basename = os.path.splitext(os.path.basename(html_file))[0].lower()
file_extension = os.path.splitext(html_file)[-1].lower()

if file_extension in [".htm", ".html"]:
mime = magic.Magic(mime=True)
content_type = mime.from_file(html_file)

if content_type != "text/html":
raise click.ClickException("Your file was not detected as text/html.")

with open(html_file, "r") as f:
html_content = f.read()

soup = BeautifulSoup(html_content, "html.parser")

head_tag = soup.find("head")

# Write or update the datastore section
if not head_tag:
raise click.ClickException("Could not find a <head> tag in your html, you'll need to add one")
if not html_files:
click.echo(tinyhost.get_help(click.Context(tinyhost)))
return

if not bucket:
bucket = run_new_bucket_flow()

if not bucket:
raise click.ClickException(
"Unable to automatically detect/create an S3 bucket, please specify one using --bucket"
)

script_tags = head_tag.find_all("script")
found_existing_template = False
# Loop over files
for html_file in html_files:
temp_file_name = None # Initialize temp_file_name
try:
# Make sure that your file content is a text/html page to begin with
file_basename = os.path.splitext(os.path.basename(html_file))[0].lower()
file_extension = os.path.splitext(html_file)[-1].lower()

for script_tag in script_tags:
if script_tag.string and "BEGIN TINYHOST DATASTORE SECTION" in script_tag.string:
if reset:
datastore_id = generate_new_datastore()
else:
datastore_re = re.search(r"const datastoreId = \"(\w+)\";", script_tag.string)
datastore_id = datastore_re[1] if datastore_re else generate_new_datastore()
if file_extension in [".htm", ".html"]:
mime = magic.Magic(mime=True)
content_type = mime.from_file(html_file)

click.echo("Found existing datastore section, replacing...")
if content_type != "text/html":
raise click.ClickException("Your file was not detected as text/html.")

get_url, post_dict = get_datastore_presigned_urls(bucket, prefix, datastore_id, duration)
script_tag.string = get_datastore_section(datastore_id, get_url, post_dict)
found_existing_template = True
break
with open(html_file, "r") as f:
html_content = f.read()

if not found_existing_template:
click.echo("Need to write in new script template")
new_script = soup.new_tag("script")
soup = BeautifulSoup(html_content, "html.parser")

datastore_id = generate_new_datastore()
head_tag = soup.find("head")

get_url, post_dict = get_datastore_presigned_urls(bucket, prefix, datastore_id, duration)
new_script.string = get_datastore_section(datastore_id, get_url, post_dict)
head_tag.append(new_script)
head_tag.append(soup.new_string("\n"))
# Write or update the datastore section
if not head_tag:
raise click.ClickException("Could not find a <head> tag in your html, you'll need to add one")

html_content = str(soup)
script_tags = head_tag.find_all("script")
found_existing_template = False

# Write the datastore back to the file, to help user debug and test if needed
with open(html_file, "w") as f:
f.write(html_content)
elif file_extension in [".ipynb"]:
from nbconvert import HTMLExporter
from nbformat import NO_CONVERT, read
for script_tag in script_tags:
if script_tag.string and "BEGIN TINYHOST DATASTORE SECTION" in script_tag.string:
if reset:
datastore_id = generate_new_datastore()
else:
datastore_re = re.search(r'const datastoreId = "(\w+)";', script_tag.string)
datastore_id = datastore_re[1] if datastore_re else generate_new_datastore()

with open(html_file, "r", encoding="utf-8") as f:
notebook_content = read(f, NO_CONVERT)
click.echo("Found existing datastore section, replacing...")

# Initialize the HTML exporter and specify the template
html_exporter = HTMLExporter(template_name="classic") # Specify template name here
html_exporter.embed_images = True
get_url, post_dict = get_datastore_presigned_urls(bucket, prefix, datastore_id, duration)
script_tag.string = get_datastore_section(datastore_id, get_url, post_dict)
found_existing_template = True
break

# Convert the notebook to HTML using the specified template
(body, resources) = html_exporter.from_notebook_node(notebook_content)
if not found_existing_template:
click.echo("Need to write in new script template")
new_script = soup.new_tag("script")

temp_file = tempfile.NamedTemporaryFile("w")
temp_file.write(body)
temp_file.flush()
datastore_id = generate_new_datastore()

html_file = temp_file.name
else:
raise click.ClickException(
"You must use a .htm or .html extension for html pages, or .ipynb for Jupyter notebooks"
get_url, post_dict = get_datastore_presigned_urls(bucket, prefix, datastore_id, duration)
new_script.string = get_datastore_section(datastore_id, get_url, post_dict)
head_tag.append(new_script)
head_tag.append(soup.new_string("\n"))

html_content = str(soup)

# Write the datastore back to the file, to help user debug and test if needed
with open(html_file, "w") as f:
f.write(html_content)
elif file_extension in [".ipynb"]:
from nbconvert import HTMLExporter
from nbformat import NO_CONVERT, read

with open(html_file, "r", encoding="utf-8") as f:
notebook_content = read(f, NO_CONVERT)

# Initialize the HTML exporter and specify the template
html_exporter = HTMLExporter(template_name="classic") # Specify template name here
html_exporter.embed_images = True

# Convert the notebook to HTML using the specified template
(body, resources) = html_exporter.from_notebook_node(notebook_content)

# Create a temporary file to hold the HTML content
with tempfile.NamedTemporaryFile("w", delete=False) as temp_file:
temp_file.write(body)
temp_file.flush()
temp_file_name = temp_file.name

html_file = temp_file_name
else:
raise click.ClickException(
"You must use a .htm or .html extension for html pages, or .ipynb for Jupyter notebooks"
)

sha1_hash = compute_sha1_hash(html_file)

# Keep the original basename, so you can tell what to expect by looking at the URL
new_file_name = f"{file_basename}-{sha1_hash[:12]}{file_extension}"

s3_key = f"{prefix}/{new_file_name}" if prefix else new_file_name

# Upload with a long cache, because our content is served via hashed link anyways
# This helps when you have large single-page files, ex. with images embedded in them
s3_client.upload_file(
html_file,
bucket,
s3_key,
ExtraArgs={"ContentType": "text/html", "CacheControl": "max-age=31536000, public"},
)

sha1_hash = compute_sha1_hash(html_file)

# Keep the original basename, so you can tell what to expect by looking at the URL
new_file_name = f"{file_basename}-{sha1_hash[:12]}{file_extension}"

s3_key = f"{prefix}/{new_file_name}" if prefix else new_file_name

# Upload with a long cache, because our content is served via hashed link anyways
# This helps when you have large single-page files, ex. with images embedded in them
s3_client.upload_file(
html_file,
bucket,
s3_key,
ExtraArgs={"ContentType": "text/html", "CacheControl": "max-age=31536000, public"},
)

signed_url = s3_client.generate_presigned_url(
"get_object", Params={"Bucket": bucket, "Key": s3_key}, ExpiresIn=duration
)

if signed_url:
click.echo(
f"Your file has been uploaded successfully!\nAccess it via the following signed URL:\n\n{signed_url}"
signed_url = s3_client.generate_presigned_url(
"get_object", Params={"Bucket": bucket, "Key": s3_key}, ExpiresIn=duration
)
else:
click.echo("Failed to generate a signed URL.")

except NoCredentialsError:
click.echo("AWS credentials not found. Please configure them.")
if signed_url:
click.echo(
f"Your file '{html_file}' has been uploaded successfully!\nAccess it via the following signed URL:\n\n{signed_url}\n"
)
else:
click.echo(f"Failed to generate a signed URL for '{html_file}'.")

except NoCredentialsError:
click.echo("AWS credentials not found. Please configure them.")
except Exception as e:
click.echo(f"An error occurred while processing '{html_file}': {e}")
finally:
if temp_file_name:
os.unlink(temp_file_name)


def generate_new_datastore() -> str:
Expand Down Expand Up @@ -235,7 +249,7 @@ def run_new_bucket_flow() -> str:
username = arn.split("/")[-1]
bucket = f"{username}-tinyhost"

# Search if bucket exists and is writeable
# Search if bucket exists and is writable
try:
s3_client.head_bucket(Bucket=bucket)
return bucket
Expand Down
2 changes: 1 addition & 1 deletion tinyhost/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
_MINOR = "4"
# On main and in a nightly release the patch should be one ahead of the last
# released build.
_PATCH = "9"
_PATCH = "10"
# This is mainly for nightly builds which have the suffix ".dev$DATE". See
# https://semver.org/#is-v123-a-semantic-version for the semantics.
_SUFFIX = ""
Expand Down

0 comments on commit bee0770

Please sign in to comment.