Skip to content

Commit

Permalink
Remove duplicated code for dev and prod
Browse files Browse the repository at this point in the history
  • Loading branch information
aranega committed Oct 12, 2024
1 parent 4eff509 commit edff50f
Showing 1 changed file with 0 additions and 102 deletions.
102 changes: 0 additions & 102 deletions applications/visualizer/backend/visualizer/settings/development.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,105 +61,3 @@ def get_metadata_files(self, dataset_id):

RAW_DB_DATA_DOWNLOADER = DbDataDownloader
METADATA_DOWNLOADER = DbDataDownloader


GCS_BUCKET = "celegans"
GCS_BUCKET_URL = f"https://storage.googleapis.com/{GCS_BUCKET}"
DB_RAW_DATA_FOLDER = "db-raw-data"


class DbDataDownloader:
def __init__(self):
self.session = Session(multiplexed=True)

def get_summary(self):
summary_content = self.session.get(
f"{GCS_BUCKET_URL}/{DB_RAW_DATA_FOLDER}/summary.txt", allow_redirects=True
)
if summary_content.status_code != 200:
raise Exception(
f"Error while pulling 'summary.txt' from the bucket: {summary_content}"
)
assert summary_content.text, "The summary.txt looks empty"
return summary_content.text

def pull_files(self):
summary = self.get_summary()
files = {}
print("Pulling DB data files from the bucket (multiplexed)...")
for bucket_file_path in summary.split():
destination = BASE_DIR / bucket_file_path
print(f" . pulling gs://{GCS_BUCKET}/{bucket_file_path} to {destination}")
files[destination] = self.session.get(
f"{GCS_BUCKET_URL}/{bucket_file_path}", allow_redirects=True
)
self.session.gather()
print("Writing the files...")
for file_path, result in files.items():
file_path.parent.mkdir(parents=True, exist_ok=True)
file_path.write_text(result.text)

# We pull the segmentation metadata and the EM viewer metadata
self._pull_metadata()

return BASE_DIR / DB_RAW_DATA_FOLDER

def _pull_metadata(self):
db_data_folder = BASE_DIR / DB_RAW_DATA_FOLDER
datasets = json.loads((db_data_folder / "datasets.json").read_text())
files = {}
print(
"Pulling EM viewer and segmentation config data files from the bucket (multiplexed)..."
)
for dataset in datasets:
dataset_id = dataset["id"]
em_metadata = db_data_folder / dataset_id / "em_metadata.json"
segmentation_metadata = (
db_data_folder / dataset_id / "segmentation_metadata.json"
)
files[segmentation_metadata] = self._pull_segmentation_metadata(dataset_id)
files[em_metadata] = self._pull_em_metadata(dataset_id)

for file_path, result in files.items():
if result.status_code != 200 or not result.text:
print(f" [ ] no {file_path.name} data for {file_path.parent.name}")
continue
print(
f" [x] configuration found for {file_path.parent.name}, writing in {file_path}"
)
file_path.parent.mkdir(parents=True, exist_ok=True)
file_path.write_text(result.text)

def _pull_segmentation_metadata(self, dataset_id):
url = f"{GCS_BUCKET_URL}/{dataset_id}/segmentations/metadata.json"
print(f" . pulling gs://{url}")
return self.session.get(url)

def _pull_em_metadata(self, dataset_id):
url = f"{GCS_BUCKET_URL}/{dataset_id}/em/metadata.json"
print(f" . pulling gs://{url}")
return self.session.get(url)

@classmethod
def get_segmentation_metadata(cls, dataset_id):
file = BASE_DIR / DB_RAW_DATA_FOLDER / dataset_id / "segmentation_metadata.json"
if not file.exists():
return {}
return json.loads(file.read_text())

@classmethod
def get_em_metadata(cls, dataset_id):
file = BASE_DIR / DB_RAW_DATA_FOLDER / dataset_id / "em_metadata.json"
if not file.exists():
return {}
return json.loads(file.read_text())

def get_metadata_files(self, dataset_id):
return (
self.get_em_metadata(dataset_id),
self.get_segmentation_metadata(dataset_id),
)


RAW_DB_DATA_DOWNLOADER = DbDataDownloader
METADATA_DOWNLOADER = DbDataDownloader

0 comments on commit edff50f

Please sign in to comment.