Skip to content

Commit

Permalink
Updates to BatchAggregation & tests
Browse files Browse the repository at this point in the history
  • Loading branch information
zwolf committed May 22, 2024
1 parent 1f945fb commit c6e8ba9
Show file tree
Hide file tree
Showing 5 changed files with 159 additions and 31 deletions.
74 changes: 61 additions & 13 deletions panoptes_aggregation/batch_aggregation.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,14 @@
import pandas as pd
import os
import urllib3
from os import getenv
from shutil import make_archive
import uuid

from azure.storage.blob import BlobServiceClient

from panoptes_client import Panoptes, Project, Workflow
from panoptes_aggregation.workflow_config import workflow_extractor_config, workflow_reducer_config
from panoptes_aggregation.workflow_config import workflow_extractor_config
from panoptes_aggregation.scripts import batch_utils
from panoptes_client.panoptes import PanoptesAPIException

import logging
panoptes_client_logger = logging.getLogger('panoptes_client').setLevel(logging.ERROR)
Expand All @@ -20,15 +22,34 @@
@celery.task(name="run_aggregation")
def run_aggregation(project_id, workflow_id, user_id):
ba = BatchAggregator(project_id, workflow_id, user_id)
exports = ba.save_exports()
ba.save_exports()

wf_df = ba.process_wf_export(ba.wf_csv)
cls_df = ba.process_cls_export(ba.cls_csv)

extractor_config = workflow_extractor_config(ba.tasks)
extracted_data = batch_utils.batch_extract(cls_df, extractor_config)

reducer_config = workflow_reducer_config(extractor_config)
reduced_data = batch_utils.batch_reduce(extracted_data, reducer_config)
batch_standard_reducers = {
'question_extractor': ['question_reducer', 'question_consensus_reducer'],
'survey_extractor': ['survey_reducer']
}

for task_type, extract_df in extracted_data.items():
extract_df.to_csv(f'{ba.output_path}/{ba.workflow_id}_{task_type}.csv')
reducer_list = batch_standard_reducers[task_type]
reduced_data = {}

for reducer in reducer_list:
# This is an override. The workflow_reducer_config method returns a config object
# that is incompatible with the batch_utils batch_reduce method
reducer_config = {'reducer_config': {reducer: {}}}
reduced_data[reducer] = batch_utils.batch_reduce(extract_df, reducer_config)
filename = f'{ba.output_path}/{ba.workflow_id}_reductions.csv'
reduced_data[reducer].to_csv(filename, mode='a')
ba.upload_files()

# hit up panoptes, let em know you're done

class BatchAggregator:
"""
Expand All @@ -39,20 +60,26 @@ def __init__(self, project_id, workflow_id, user_id):
self.project_id = project_id
self.workflow_id = workflow_id
self.user_id = user_id
self._generate_uuid()
self._connect_api_client()

def save_exports(self):
self.output_path = f'tmp/{self.workflow_id}'
os.mkdir(self.output_path)

cls_export = Workflow(self.workflow_id).describe_export('classifications')
full_cls_url = cls_export['media'][0]['src']
cls_file = f'{self.output_path}/{self.workflow_id}_cls_export.csv'
self._download_export(full_cls_url, cls_file)

wf_export = Project(self.project_id).describe_export('workflows')
full_wf_url = wf_export['media'][0]['src']
cls_file = f'tmp/{self.workflow_id}_cls_export.csv'
self._download_export(full_cls_url, cls_file)
wf_file = f'tmp/{self.project_id}_workflow_export.csv'
wf_file = f'{self.output_path}/{self.workflow_id}_workflow_export.csv'
self._download_export(full_wf_url, wf_file)

self.cls_csv = cls_file
self.wf_csv = wf_file
return {'cls_csv': cls_file, 'wf_csv': wf_file}
return {'classifications': cls_file, 'workflows': wf_file}

def process_wf_export(self, wf_csv):
self.wf_df = pd.read_csv(wf_csv)
Expand All @@ -68,6 +95,27 @@ def process_cls_export(self, cls_csv):
self.cls_df = cls_df.query(f'workflow_version == {self.workflow_version}')
return self.cls_df

def connect_blob_storage(self):
connect_str = os.getenv('AZURE_STORAGE_CONNECTION_STRING')
self.blob_service_client = BlobServiceClient.from_connection_string(connect_str)
self.blob_service_client.create_container(name=self.id)

def upload_file_to_storage(self, container_name, filepath):
blob = filepath.split('/')[-1]
blob_client = self.blob_service_client.get_blob_client(container=container_name, blob=blob)
with open(file=filepath, mode="rb") as data:
blob_client.upload_blob(data, overwrite=True)

def upload_files(self):
self.connect_blob_storage()
reductions_file = f'{self.output_path}/{self.workflow_id}_reductions.csv'
self.upload_file_to_storage(self.id, reductions_file)
zipfile = make_archive(f'tmp/{self.id}', 'zip', self.output_path)
self.upload_file_to_storage(self.id, zipfile)

def _generate_uuid(self):
self.id = uuid.uuid4().hex

def _download_export(self, url, filepath):
http = urllib3.PoolManager()
r = http.request('GET', url, preload_content=False)
Expand All @@ -82,7 +130,7 @@ def _download_export(self, url, filepath):
def _connect_api_client(self):
# connect to the API only once for this function request
Panoptes.connect(
endpoint=getenv('PANOPTES_URL', 'https://panoptes.zooniverse.org/'),
client_id=getenv('PANOPTES_CLIENT_ID'),
client_secret=getenv('PANOPTES_CLIENT_SECRET')
endpoint=os.getenv('PANOPTES_URL', 'https://panoptes.zooniverse.org/'),
client_id=os.getenv('PANOPTES_CLIENT_ID'),
client_secret=os.getenv('PANOPTES_CLIENT_SECRET')
)
8 changes: 8 additions & 0 deletions panoptes_aggregation/tests/batch_aggregation/cls_export.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
classification_id,user_name,user_id,user_ip,workflow_id,workflow_name,workflow_version,created_at,gold_standard,expert,metadata,annotations,subject_data,subject_ids
543695319,not-logged-in-b644753d0e3948f81dc2,,b644753d0e3948f81dc2,10,Superluminous Supernovae,16.55,2024-02-23 16:17:42 UTC,,,"{""source"":""api"",""session"":""7a1f4a17d190291faa1824be3b3febf1d8b77a4f2d25dd6f191f76ef335684bf"",""viewport"":{""width"":1710,""height"":948},""started_at"":""2024-02-23T16:16:35.085Z"",""user_agent"":""Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36"",""utc_offset"":""18000"",""finished_at"":""2024-02-23T16:17:42.334Z"",""live_project"":true,""interventions"":{""opt_in"":false,""messageShown"":false},""user_language"":""en"",""subject_dimensions"":[{""clientWidth"":558,""clientHeight"":419,""naturalWidth"":1200,""naturalHeight"":900},{""clientWidth"":150,""clientHeight"":150,""naturalWidth"":300,""naturalHeight"":300}],""subject_selection_state"":{""retired"":false,""selected_at"":""2024-02-23T16:16:35.003Z"",""already_seen"":false,""selection_state"":""normal"",""finished_workflow"":false,""user_has_finished_workflow"":false},""workflow_translation_id"":""28176""}","[{""task"":""T0"",""task_label"":""**Has the lightcurve been rising for more than 20 days?**\n\nNOTE: The y-axis shows the magnitude (brightness) of the supernova. In astronomy smaller magnitudes are brighter!"",""value"":""Yes""},{""task"":""T1"",""task_label"":""Is the cross-hair in the image close to a faint, fuzzy galaxy?"",""value"":""Yes""}]","{""96588114"":{""retired"":{""id"":125510348,""workflow_id"":10,""classifications_count"":10,""created_at"":""2024-02-21T09:44:28.745Z"",""updated_at"":""2024-02-23T16:17:42.488Z"",""retired_at"":""2024-02-23T16:17:42.479Z"",""subject_id"":96588114,""retirement_reason"":""classification_count""},""ramean"":123.7681641625,""ZTF_URL"":""https://lasair-ztf.lsst.ac.uk/objects/ZTF23abjrdem"",""decmean"":57.182124325000004,""objectId"":""ZTF23abjrdem""}}",96588114
543695340,not-logged-in-b644753d0e3948f81dc2,,b644753d0e3948f81dc2,10,Superluminous Supernovae,16.55,2024-02-23 16:17:48 UTC,,,"{""source"":""api"",""session"":""7a1f4a17d190291faa1824be3b3febf1d8b77a4f2d25dd6f191f76ef335684bf"",""viewport"":{""width"":1710,""height"":948},""started_at"":""2024-02-23T16:17:42.351Z"",""user_agent"":""Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36"",""utc_offset"":""18000"",""finished_at"":""2024-02-23T16:17:48.539Z"",""live_project"":true,""interventions"":{""opt_in"":false,""messageShown"":false},""user_language"":""en"",""subject_dimensions"":[{""clientWidth"":558,""clientHeight"":419,""naturalWidth"":1200,""naturalHeight"":900},{""clientWidth"":300,""clientHeight"":300,""naturalWidth"":300,""naturalHeight"":300}],""subject_selection_state"":{""retired"":false,""selected_at"":""2024-02-23T16:16:35.003Z"",""already_seen"":false,""selection_state"":""normal"",""finished_workflow"":false,""user_has_finished_workflow"":false},""workflow_translation_id"":""28176""}","[{""task"":""T0"",""task_label"":""**Has the lightcurve been rising for more than 20 days?**\n\nNOTE: The y-axis shows the magnitude (brightness) of the supernova. In astronomy smaller magnitudes are brighter!"",""value"":""No""}]","{""96588105"":{""retired"":{""id"":125510263,""workflow_id"":10,""classifications_count"":10,""created_at"":""2024-02-21T09:43:24.074Z"",""updated_at"":""2024-02-23T16:17:48.696Z"",""retired_at"":""2024-02-23T16:17:48.686Z"",""subject_id"":96588105,""retirement_reason"":""classification_count""},""ramean"":157.50762885625,""ZTF_URL"":""https://lasair-ztf.lsst.ac.uk/objects/ZTF22abycniv"",""decmean"":8.21724599375,""objectId"":""ZTF22abycniv""}}",96588105
543695374,not-logged-in-b644753d0e3948f81dc2,,b644753d0e3948f81dc2,10,Superluminous Supernovae,16.55,2024-02-23 16:18:02 UTC,,,"{""source"":""api"",""session"":""7a1f4a17d190291faa1824be3b3febf1d8b77a4f2d25dd6f191f76ef335684bf"",""viewport"":{""width"":1710,""height"":948},""started_at"":""2024-02-23T16:17:48.559Z"",""user_agent"":""Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36"",""utc_offset"":""18000"",""finished_at"":""2024-02-23T16:18:02.264Z"",""live_project"":true,""interventions"":{""opt_in"":false,""messageShown"":false},""user_language"":""en"",""subject_dimensions"":[{""clientWidth"":558,""clientHeight"":419,""naturalWidth"":1200,""naturalHeight"":900},{""clientWidth"":300,""clientHeight"":300,""naturalWidth"":300,""naturalHeight"":300}],""subject_selection_state"":{""retired"":false,""selected_at"":""2024-02-23T16:16:35.003Z"",""already_seen"":false,""selection_state"":""normal"",""finished_workflow"":false,""user_has_finished_workflow"":false},""workflow_translation_id"":""28176""}","[{""task"":""T0"",""task_label"":""**Has the lightcurve been rising for more than 20 days?**\n\nNOTE: The y-axis shows the magnitude (brightness) of the supernova. In astronomy smaller magnitudes are brighter!"",""value"":""Yes""},{""task"":""T1"",""task_label"":""Is the cross-hair in the image close to a faint, fuzzy galaxy?"",""value"":""No""}]","{""96588126"":{""retired"":{""id"":125510270,""workflow_id"":10,""classifications_count"":10,""created_at"":""2024-02-21T09:43:25.834Z"",""updated_at"":""2024-02-23T16:18:02.396Z"",""retired_at"":""2024-02-23T16:18:02.389Z"",""subject_id"":96588126,""retirement_reason"":""classification_count""},""ramean"":98.49884808888889,""ZTF_URL"":""https://lasair-ztf.lsst.ac.uk/objects/ZTF22abfnkve"",""decmean"":58.67660070000001,""objectId"":""ZTF22abfnkve""}}",96588126
543695390,not-logged-in-b644753d0e3948f81dc2,,b644753d0e3948f81dc2,10,Superluminous Supernovae,16.55,2024-02-23 16:18:09 UTC,,,"{""source"":""api"",""session"":""7a1f4a17d190291faa1824be3b3febf1d8b77a4f2d25dd6f191f76ef335684bf"",""viewport"":{""width"":1710,""height"":948},""started_at"":""2024-02-23T16:18:02.283Z"",""user_agent"":""Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36"",""utc_offset"":""18000"",""finished_at"":""2024-02-23T16:18:09.532Z"",""live_project"":true,""interventions"":{""opt_in"":false,""messageShown"":false},""user_language"":""en"",""subject_dimensions"":[{""clientWidth"":558,""clientHeight"":419,""naturalWidth"":1200,""naturalHeight"":900},{""clientWidth"":300,""clientHeight"":300,""naturalWidth"":300,""naturalHeight"":300}],""subject_selection_state"":{""retired"":false,""selected_at"":""2024-02-23T16:16:35.003Z"",""already_seen"":false,""selection_state"":""normal"",""finished_workflow"":false,""user_has_finished_workflow"":false},""workflow_translation_id"":""28176""}","[{""task"":""T0"",""task_label"":""**Has the lightcurve been rising for more than 20 days?**\n\nNOTE: The y-axis shows the magnitude (brightness) of the supernova. In astronomy smaller magnitudes are brighter!"",""value"":""No""}]","{""96588128"":{""retired"":{""id"":125510290,""workflow_id"":10,""classifications_count"":10,""created_at"":""2024-02-21T09:43:36.461Z"",""updated_at"":""2024-02-23T16:18:09.674Z"",""retired_at"":""2024-02-23T16:18:09.667Z"",""subject_id"":96588128,""retirement_reason"":""classification_count""},""ramean"":41.573462775,""ZTF_URL"":""https://lasair-ztf.lsst.ac.uk/objects/ZTF23aavvcjd"",""decmean"":-5.001660237499999,""objectId"":""ZTF23aavvcjd""}}",96588128
543695425,not-logged-in-b644753d0e3948f81dc2,,b644753d0e3948f81dc2,10,Superluminous Supernovae,16.55,2024-02-23 16:18:24 UTC,,,"{""source"":""api"",""session"":""7a1f4a17d190291faa1824be3b3febf1d8b77a4f2d25dd6f191f76ef335684bf"",""viewport"":{""width"":1710,""height"":948},""started_at"":""2024-02-23T16:18:09.551Z"",""user_agent"":""Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36"",""utc_offset"":""18000"",""finished_at"":""2024-02-23T16:18:24.225Z"",""live_project"":true,""interventions"":{""opt_in"":false,""messageShown"":false},""user_language"":""en"",""subject_dimensions"":[{""clientWidth"":558,""clientHeight"":419,""naturalWidth"":1200,""naturalHeight"":900},{""clientWidth"":300,""clientHeight"":300,""naturalWidth"":300,""naturalHeight"":300}],""subject_selection_state"":{""retired"":false,""selected_at"":""2024-02-23T16:16:35.003Z"",""already_seen"":false,""selection_state"":""normal"",""finished_workflow"":false,""user_has_finished_workflow"":false},""workflow_translation_id"":""28176""}","[{""task"":""T0"",""task_label"":""**Has the lightcurve been rising for more than 20 days?**\n\nNOTE: The y-axis shows the magnitude (brightness) of the supernova. In astronomy smaller magnitudes are brighter!"",""value"":""Yes""},{""task"":""T1"",""task_label"":""Is the cross-hair in the image close to a faint, fuzzy galaxy?"",""value"":""No""}]","{""96588109"":{""retired"":{""id"":125510335,""workflow_id"":10,""classifications_count"":10,""created_at"":""2024-02-21T09:44:14.501Z"",""updated_at"":""2024-02-23T16:18:24.390Z"",""retired_at"":""2024-02-23T16:18:24.378Z"",""subject_id"":96588109,""retirement_reason"":""classification_count""},""ramean"":11.719328585714285,""ZTF_URL"":""https://lasair-ztf.lsst.ac.uk/objects/ZTF23aatzhso"",""decmean"":42.02810038571429,""objectId"":""ZTF23aatzhso""}}",96588109
543695436,not-logged-in-b644753d0e3948f81dc2,,b644753d0e3948f81dc2,10,Superluminous Supernovae,16.55,2024-02-23 16:18:27 UTC,,,"{""source"":""api"",""session"":""7a1f4a17d190291faa1824be3b3febf1d8b77a4f2d25dd6f191f76ef335684bf"",""viewport"":{""width"":1710,""height"":948},""started_at"":""2024-02-23T16:18:24.243Z"",""user_agent"":""Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36"",""utc_offset"":""18000"",""finished_at"":""2024-02-23T16:18:27.892Z"",""live_project"":true,""interventions"":{""opt_in"":false,""messageShown"":false},""user_language"":""en"",""subject_dimensions"":[{""clientWidth"":558,""clientHeight"":419,""naturalWidth"":1200,""naturalHeight"":900},{""clientWidth"":300,""clientHeight"":300,""naturalWidth"":300,""naturalHeight"":300}],""subject_selection_state"":{""retired"":false,""selected_at"":""2024-02-23T16:16:35.003Z"",""already_seen"":false,""selection_state"":""normal"",""finished_workflow"":false,""user_has_finished_workflow"":false},""workflow_translation_id"":""28176""}","[{""task"":""T0"",""task_label"":""**Has the lightcurve been rising for more than 20 days?**\n\nNOTE: The y-axis shows the magnitude (brightness) of the supernova. In astronomy smaller magnitudes are brighter!"",""value"":""No""}]","{""96588106"":{""retired"":{""id"":125510296,""workflow_id"":10,""classifications_count"":10,""created_at"":""2024-02-21T09:43:44.966Z"",""updated_at"":""2024-02-23T17:54:11.466Z"",""retired_at"":""2024-02-23T17:54:11.458Z"",""subject_id"":96588106,""retirement_reason"":""classification_count""},""ramean"":47.78652812,""ZTF_URL"":""https://lasair-ztf.lsst.ac.uk/objects/ZTF23aauyuay"",""decmean"":73.76492526000001,""objectId"":""ZTF23aauyuay""}}",96588106
543695453,not-logged-in-b644753d0e3948f81dc2,,b644753d0e3948f81dc2,10,Superluminous Supernovae,16.55,2024-02-23 16:18:35 UTC,,,"{""source"":""api"",""session"":""7a1f4a17d190291faa1824be3b3febf1d8b77a4f2d25dd6f191f76ef335684bf"",""viewport"":{""width"":1710,""height"":948},""started_at"":""2024-02-23T16:18:27.902Z"",""user_agent"":""Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36"",""utc_offset"":""18000"",""finished_at"":""2024-02-23T16:18:35.478Z"",""live_project"":true,""interventions"":{""opt_in"":false,""messageShown"":false},""user_language"":""en"",""subject_dimensions"":[{""clientWidth"":558,""clientHeight"":419,""naturalWidth"":1200,""naturalHeight"":900},{""clientWidth"":300,""clientHeight"":300,""naturalWidth"":300,""naturalHeight"":300}],""subject_selection_state"":{""retired"":false,""selected_at"":""2024-02-23T16:16:35.003Z"",""already_seen"":false,""selection_state"":""normal"",""finished_workflow"":false,""user_has_finished_workflow"":false},""workflow_translation_id"":""28176""}","[{""task"":""T0"",""task_label"":""**Has the lightcurve been rising for more than 20 days?**\n\nNOTE: The y-axis shows the magnitude (brightness) of the supernova. In astronomy smaller magnitudes are brighter!"",""value"":""No""}]","{""96588131"":{""retired"":{""id"":125510305,""workflow_id"":10,""classifications_count"":10,""created_at"":""2024-02-21T09:43:48.500Z"",""updated_at"":""2024-02-23T18:31:31.686Z"",""retired_at"":""2024-02-23T18:31:31.677Z"",""subject_id"":96588131,""retirement_reason"":""classification_count""},""ramean"":158.6458489125,""ZTF_URL"":""https://lasair-ztf.lsst.ac.uk/objects/ZTF23absjgik"",""decmean"":-27.650916137499998,""objectId"":""ZTF23absjgik""}}",96588131
Loading

0 comments on commit c6e8ba9

Please sign in to comment.