Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GRT Update (Coordinates, Contacts & More Metadata) #2768

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
147 changes: 110 additions & 37 deletions scripts/grt.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,11 @@
"""Script for uploading Galaxy statistics to the Galactic radio telescope.

See doc/source/admin/grt.rst for more detailed usage information.

TODO:
- toolbox
- job runners
- check if GIEs are enabled
"""
from __future__ import print_function

Expand All @@ -13,6 +18,9 @@
import sqlalchemy as sa
import yaml
import re
import logging
logging.basicConfig(level=logging.INFO)
log = logging.getLogger(name="grt")

sys.path.insert(1, os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir, 'lib')))

Expand All @@ -25,16 +33,61 @@
default_config = os.path.abspath(os.path.join(os.path.dirname(__file__), 'grt.yml'))


def _init(config):
if config.startswith('/'):
config = os.path.abspath(config)
def resolve_location(config):
"""
resolve_location takes in a dict with autodetect (bool) and hardcoded
latitude and longitude values (floats). The function calls a number of
external websites in order to resolve the host's IP address, and their
geographic location.
"""
if config['autodetect']:
# Get public IP
log.info("Locating server")
try:
ip_address = urllib2.urlopen('https://icanhazip.com').read()
except (urllib2.HTTPError, urllib2.URLError) as err:
log.error("Could not contact IP detection service. %s", err)
return None

geolocation_api = 'http://ip-api.com/json/{0}'.format(ip_address)

try:
response = urllib2.urlopen(geolocation_api).read()
except (urllib2.HTTPError, urllib2.URLError) as err:
log.error("Could not contact location detection service. %s", err)
return None

# Construct or get the Location
json_geoloc = json.loads(response)
log.info("Server Located (%s, %s)", json_geoloc['lat'], json_geoloc['lon'])
return {
'lat': json_geoloc['lat'],
'lon': json_geoloc['lon'],
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For this? CloudLaunch? :)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yep, I remembered your PR to coudlaunch, saved me some time since you already had examples of using the service :)

else:
config = os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir, config))
if str(config['latitude']) == '0.0' and str(config['longitude']) == '0.0':
return None
else:
return {
'lat': config['latitude'],
'lon': config['longitude'],
}


properties = load_app_properties(ini_file=config)
def _init(config_path):
if config_path.startswith('/'):
config_path = os.path.abspath(config_path)
else:
config_path = os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir, config_path))

properties = load_app_properties(ini_file=config_path)
config = galaxy.config.Configuration(**properties)
object_store = build_object_store_from_config(config)

if config.database_connection is False:
log.error("Database connection not configured in %s. You will need to uncomment the database URL. Additionally you are using the default sqlite database, but GRT is most appropriate for production Galaxies.", config_path)
exit(1)

return (
mapping.init(
config.file_path,
Expand All @@ -43,7 +96,8 @@ def _init(config):
object_store=object_store
),
object_store,
config.database_connection.split(':')[0]
config.database_connection.split(':')[0],
config.version_major
)


Expand Down Expand Up @@ -89,7 +143,7 @@ def _sanitize_value(unsanitized_value):
elif type(unsanitized_value) is list:
sanitized_value = _sanitize_list(unsanitized_value)
else:
if fp_regex.match(str(unsanitized_value)):
if fp_regex.match(unicode(unsanitized_value)):
sanitized_value = None
else:
sanitized_value = unsanitized_value
Expand All @@ -100,15 +154,15 @@ def _sanitize_value(unsanitized_value):
def main(argv):
"""Entry point for GRT statistics collection."""
parser = argparse.ArgumentParser()
parser.add_argument('instance_id', help='Galactic Radio Telescope Instance ID')
parser.add_argument('api_key', help='Galactic Radio Telescope API Key')
parser.add_argument('--instance_id', help='Galactic Radio Telescope Instance ID')
parser.add_argument('--api_key', help='Galactic Radio Telescope API Key')

parser.add_argument('-c', '--config', dest='config', help='Path to GRT config file (scripts/grt.ini)', default=default_config)
parser.add_argument('--dry-run', dest='dryrun', help='Dry run (show data to be sent, but do not send)', action='store_true', default=False)
parser.add_argument('--grt-url', dest='grt_url', help='GRT Server (You can run your own!)')
args = parser.parse_args(argv[1:])

print('Loading GRT ini...')
log.info('Loading GRT ini...')
try:
with open(args.config) as f:
config_dict = yaml.load(f)
Expand All @@ -121,23 +175,25 @@ def main(argv):
config_dict['last_job_id_sent'] = 0

if args.instance_id:
config_dict['instance_id'] = args.instance_id
config_dict['grt_server']['instance_id'] = args.instance_id
if args.api_key:
config_dict['api_key'] = args.api_key
config_dict['grt_server']['api_key'] = args.api_key
if args.grt_url:
config_dict['grt_url'] = args.grt_url
config_dict['grt_server']['grt_url'] = args.grt_url

if config_dict['grt_server']['instance_id'] == '':
print("No Instance ID was provdied. One is required and may be obtained at https://telescope.galaxyproject.org")
exit(1)

if config_dict['grt_server']['api_key'] == '':
print("No API Key was provdied. One is required and may be obtained at https://telescope.galaxyproject.org")
exit(1)

print('Loading Galaxy...')
model, object_store, engine = _init(config_dict['galaxy_config'])
log.info('Loading Galaxy...')
model, object_store, engine, gx_version = _init(config_dict['galaxy_config'])
sa_session = model.context.current

# Fetch jobs COMPLETED with status OK that have not yet been sent.
jobs = sa_session.query(model.Job)\
.filter(sa.and_(
model.Job.table.c.state == "ok",
model.Job.table.c.id > config_dict['last_job_id_sent']
))\
.all()

# Set up our arrays
active_users = []
Expand All @@ -151,10 +207,19 @@ def kw_metrics(job):
}

# For every job
for job in jobs:
job_count = 0
last_job_id = None
for job in sa_session.query(model.Job)\
.filter(sa.and_(
model.Job.table.c.state == "ok",
model.Job.table.c.id > config_dict['last_job_id_sent']
))\
.all():
if job.tool_id in config_dict['tool_blacklist']:
continue

job_count += 1
last_job_id = job.id
# Append an active user, we'll reduce at the end
active_users.append(job.user_id)

Expand Down Expand Up @@ -186,18 +251,25 @@ def kw_metrics(job):
}
grt_jobs_data.append(job_data)

if len(jobs) > 0:
config_dict['last_job_id_sent'] = jobs[-1].id
if job_count > 0:
config_dict['last_job_id_sent'] = last_job_id

grt_report_data = {
'meta': {
'version': 1,
'instance_uuid': config_dict['instance_id'],
'instance_api_key': config_dict['api_key'],
'galaxy_version': gx_version,
'uuid': config_dict['grt_server']['instance_id'],
'api_key': config_dict['grt_server']['api_key'],
'name': config_dict['instance']['name'],
'description': config_dict['instance']['description'],
'tags': config_dict['instance']['tags'],
'location': resolve_location(config_dict['location']),
'latest_job': config_dict.get('last_job_id_sent', 0),
# We do not record ANYTHING about your users other than count.
'active_users': len(set(active_users)),
'total_users': sa_session.query(model.User).count(),
'recent_jobs': len(jobs),
'recent_jobs': job_count,
'url': config_dict['instance']['url'],
},
'tools': [
{
Expand All @@ -211,16 +283,17 @@ def kw_metrics(job):

if args.dryrun:
print(json.dumps(grt_report_data, indent=2))
else:
try:
urllib2.urlopen(config_dict['grt_url'], data=json.dumps(grt_report_data))
except urllib2.HTTPError as htpe:
print(htpe.read())
exit(1)

# Update grt.ini with last id of job (prevent duplicates from being sent)
with open(args.config, 'w') as f:
yaml.dump(config_dict, f, default_flow_style=False)
exit(0)

try:
urllib2.urlopen(config_dict['grt_server']['grt_url'], data=json.dumps(grt_report_data))
except urllib2.HTTPError as htpe:
print(htpe.read())
exit(1)

# Update grt.ini with last id of job (prevent duplicates from being sent)
with open(args.config, 'w') as f:
yaml.dump(config_dict, f, default_flow_style=False)

if __name__ == '__main__':
main(sys.argv)
48 changes: 45 additions & 3 deletions scripts/grt.yml.sample
Original file line number Diff line number Diff line change
@@ -1,7 +1,49 @@
---
galaxy_config: config/galaxy.ini
#instance_id: blah
#api_key: blah
grt_url: https://radio-telescope.galaxyproject.org/api/v1/upload

grt_server:
## URL to your GRT server
#grt_url: https://telescope.galaxyproject.org/api/v1/upload
grt_url: http://localhost:8000/grt/api/v1/upload
## An instance ID uniquely identifies your Galaxy instance
instance_id: "80b800b4-5507-4f19-8222-c81bbb630563"
## An API key is required to submit data to GRT
api_key: "4f3c15e9-c054-4548-a135-7acc2fc2ea92"

## The public name and description of your Galaxy instance
instance:
## The publicly accessible URL of your galaxy instance. E.g. https://fqdn/galaxy
url: ""
name: "Testing Galaxy instance"
description: |
Galaxy server providing tools for private use
tags:
#- public
- private
## If this is a cloudlaunch server, you could indicate that here
#- cloudlaunch
## If you wish to share what infrastucture you're running on, there are
#tags for this:
#- infra/aws
#- infra/gcp
#- infra/azure
#- infra/other_cloud
#- infra/private
## Additionally if you wish to share a bit about your job runners we
#would be interested to hear who is using which runners.
#- cluster/condor
#- cluster/slurm
location:
# If autodetect is set to true, then an attempt will be made to
# automatically detect your location using a third party service
# (http://ip-api.com, https://icanhazip.com).
autodetect: false
# If you still wish to share the location of your Galaxy server
# (great for helping people find local resources for Galaxy)
latitude: 51
longitude: 4


tool_blacklist:
- __SET_METADATA__
- upload1