Skip to content

Commit

Permalink
Merge pull request #807 from isb-cgc/isb-cgc-prod-sp
Browse files Browse the repository at this point in the history
Sprint 31
  • Loading branch information
s-paquette authored Jan 17, 2019
2 parents a9faa63 + dbdbab4 commit c0d6ea2
Show file tree
Hide file tree
Showing 10 changed files with 328 additions and 13 deletions.
2 changes: 0 additions & 2 deletions accounts/dcf_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@
unlink_at_dcf, refresh_at_dcf, decode_token_chunk, calc_expiration_time

from requests_oauthlib.oauth2_session import OAuth2Session
from jwt.contrib.algorithms.pycrypto import RSAAlgorithm
from json import loads as json_loads

# Shut this up unless we need to do debug of HTTP request contents
Expand Down Expand Up @@ -220,7 +219,6 @@ def oauth2_callback(request):
#

my_jwt = jwt.PyJWT()
my_jwt.register_algorithm('RS256', RSAAlgorithm(RSAAlgorithm.SHA256))

#
# DCF's key endpoint provides a list of keys they use. Right now, only one, but to future-proof, we want
Expand Down
96 changes: 96 additions & 0 deletions accounts/sa_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -364,6 +364,102 @@ def _user_on_project_or_drop(gcp_id, user_email, st_logger, user_gcp):
return True, None


def get_project_deleters(gcp_id, user_email, st_logger, log_name):
"""
User says they want to unregister a project. The problem is we need to insure that if the project has service
accounts (SAs) registered at DCF, we need to get those unregistered too. But the SAs do not need to be active, so
there is no requirement that everybody in the project be DCF registered to have an SA sitting there. In fact, if
an SA has been registered by Dr. X, who has since left the lab after adding Dr. Y to the project, and Dr. X has
been dropped, there does not actually have to be ANYBODY on the project with DCF connections to have an SA. But
that is beyond our control. However, if the current user doing the operation has EVER had an NIH linkage, we
need to tell them to register at DCF first. If the current user has NEVER had a NIH linkage, we check to see if
anybody else has such a linkage. If yes, we say that the linked person needs to do the job. If nobody on the
project has ever been near DCF, we let the deletion continue, since this implies the project was added just to
use CGC features.
"""
try:
crm_service = get_special_crm_resource()

# 1) Get all the project members, record if they have registered with us:
all_users_in_our_db = True
iam_policy = crm_service.projects().getIamPolicy(resource=gcp_id, body={}).execute()
bindings = iam_policy['bindings']
roles = {}
for val in bindings:
role = val['role']
members = val['members']
for member in members:
if member.startswith('user:'):
email = member.split(':')[1].lower()
if email not in roles:
roles[email] = {}
registered_user = bool(User.objects.filter(email=email).first())
roles[email]['registered_user'] = registered_user
if not registered_user:
all_users_in_our_db = False
roles[email]['roles'] = []
roles[email]['roles'].append(role)

# 2) Verify that the current user is on the GCP project. Somebody can only get
# here by hacking a custom POST command:
if not user_email.lower() in roles:
log_msg = '[STATUS] While unregistering GCP {0}: User email {1} is not in the GCP IAM policy.'.format(gcp_id, user_email)
logger.info(log_msg)
st_logger.write_struct_log_entry(log_name, {
'message': log_msg
})

return {
'message': 'Your user email ({}) was not found in GCP {}. You must be a member of the project in order to unregister it.'.format(user_email, gcp_id),
}

# 3) Verify which users have ever registered with with NIH:
some_user_registered = False
this_user_registered = False
all_users_nih_linkage_history = True

for email in roles:
member = roles[email]

member_is_this_user = (user_email.lower() == email)

# IF USER IS REGISTERED
if member['registered_user']:
user = User.objects.get(email=email)
nih_user = None
# FIND NIH_USER FOR USER
# Since we are not checking "linked" state, we may have more than one:
nih_users = NIH_User.objects.filter(user_id=user.id)
member['nih_registered'] = len(nih_users) > 0

if member['nih_registered']:
some_user_registered = True
if member_is_this_user:
this_user_registered = True
else:
all_users_nih_linkage_history = False

else:
member['nih_registered'] = False
all_users_nih_linkage_history = False

except HttpError as e:
logger.error("[STATUS] While surveying GCP deleter status {}: ".format(gcp_id))
logger.exception(e)
return {'message': 'There was an error accessing your project. Please verify that you have set the permissions correctly.'}
except Exception as e:
logger.error("[STATUS] While surveying GCP deleter status {}: ".format(gcp_id))
logger.exception(e)
return {'message': "There was an error accessing a GCP project. Please contact [email protected]."}

return_obj = {'roles': roles,
'some_user_registered': some_user_registered,
'this_user_registered': this_user_registered,
'all_users_in_our_db': all_users_in_our_db,
'all_users_nih_linkage_history': all_users_nih_linkage_history}
return return_obj


def _get_project_users(gcp_id, service_account, user_email, st_logger, log_name, is_refresh):
"""
While we can no longer show the user with a listing of what datasets each project user has access to (DCF will not
Expand Down
45 changes: 36 additions & 9 deletions accounts/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
from models import *
from projects.models import User_Data_Tables
from django.utils.html import escape
from sa_utils import verify_service_account, register_service_account, \
from sa_utils import verify_service_account, register_service_account, get_project_deleters, \
unregister_all_gcp_sa, unregister_sa, service_account_dict, \
do_nih_unlink, deactivate_nih_add_to_open, controlled_auth_datasets, have_linked_user

Expand Down Expand Up @@ -508,16 +508,43 @@ def user_gcp_delete(request, user_id, gcp_id):
if request.POST:
user = User.objects.get(id=user_id)
logger.info("[STATUS] User {} is unregistering GCP {}".format(user.email,gcp_id))

#
# In the new DCF-centric world, the user has to be logged into DCF if they are trying to
# delete a project with a service account on it. But users who have never been anywhere near
# DCF can register projects just to use webapp services.
# So, if user HAS EVER linked to DCF, they gotta be logged in to do this. If not, then if someone
# else on the project HAS EVER linked to DCF, they gotta be logged in. If nobody fits that bill,
# we let them delete the project.
# Note we also catch the case where a user not on a project is trying to delete it (requires custom
# crafted POST):
#

gcp = GoogleProject.objects.get(id=gcp_id, active=1)
success, msgs = unregister_all_gcp_sa(user_id, gcp_id, gcp.project_id)
# If we encounter problems deleting SAs, stop the process:
if not success:
messages.error(request, "Unregistering service accounts from Data Commons Framework was not successful.")
logger.info("[STATUS] SA Unregistration was unsuccessful {}".format(user.email, gcp_id))
for msg in msgs:
messages.error(request, msg)
deleter_analysis = get_project_deleters(gcp.project_id, user.email, logger, SERVICE_ACCOUNT_LOG_NAME)
if 'message' in deleter_analysis:
messages.error(request, deleter_analysis['message'])
return redirect('user_gcp_list', user_id=request.user.id)
logger.info("[STATUS] User {} is unregistering GCP {}: SAs dropped".format(user.email, gcp_id))

do_sa_unregister = True
if not deleter_analysis['this_user_registered']:
if deleter_analysis['some_user_registered']:
messages.error(request, "Only a project member who has registered with the Data Commons Framework can unregister this project")
logger.info("[STATUS] User {} with no DCF status tried to unregister {}".format(user.email, gcp_id))
return redirect('user_gcp_list', user_id=request.user.id)
else: # Nobody on the project has ever done an NIH Linking. Skip the SA step...
do_sa_unregister = False

if do_sa_unregister:
success, msgs = unregister_all_gcp_sa(user_id, gcp_id, gcp.project_id)
# If we encounter problems deleting SAs, stop the process:
if not success:
messages.error(request, "Unregistering service accounts from Data Commons Framework was not successful.")
logger.info("[STATUS] SA Unregistration was unsuccessful {}".format(user.email, gcp_id))
for msg in msgs:
messages.error(request, msg)
return redirect('user_gcp_list', user_id=request.user.id)
logger.info("[STATUS] User {} is unregistering GCP {}: SAs dropped".format(user.email, gcp_id))
gcp.user.clear()
gcp.active=False
gcp.save()
Expand Down
30 changes: 30 additions & 0 deletions cohorts/metadata_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,36 @@
'TARGET': None,
}

BQ_METADATA_DATA_TABLES = {
'TCGA': {
'HG19': {
'table': 'tcga_metadata_data_hg19_250718',
'dataset': 'TCGA_hg19_data_v0',
},
'HG38': {
'table': 'tcga_metadata_data_hg38_250718',
'dataset': 'TCGA_hg38_data_v0',
},
},
'CCLE': None,
'TARGET': None,
}

BQ_BIOCLIN_DATA_TABLES = {
'TCGA': {
'table': 'Clinical',
'dataset': 'TCGA_bioclin_v0',
},
'CCLE': {
'table': 'clinical_v0',
'dataset': 'CCLE_bioclin_v0',
},
'TARGET': {
'table': 'Clinical',
'dataset': 'TARGET_bioclin_v0',
},
}


# Get a set of random characters of 'length'
def make_id(length):
Expand Down
17 changes: 15 additions & 2 deletions google_helpers/bigquery/bq_support.py
Original file line number Diff line number Diff line change
Expand Up @@ -366,13 +366,26 @@ def fetch_job_results(self, job_ref):
def fetch_job_resource(self, job_ref):
return self.bq_service.jobs().get(**job_ref).execute(num_retries=5)

# Execute a query to be saved on a temp table (shorthand to instance method above), optionally parameterized
# and fetch its results
# Add rows to the table specified by project.dataset.table
# Note that this is a class method therefor the rows must be supplied formatted ready
# for insertion, build_row will not be called! (build_row is implemented in derived classes only)
@classmethod
def add_rows_to_table(cls, rows, project, dataset, table):
bqs = cls(project, dataset, table)
return bqs._streaming_insert(rows)

# Execute a query, optionally parameterized, and fetch its results
@classmethod
def execute_query_and_fetch_results(cls, query, parameters=None):
bqs = cls(None, None, None)
return bqs.execute_query(query, parameters)

@classmethod
# Execute a query, optionally parameterized, to be saved on a temp table
def execute_query_to_table(cls, query, project, dataset, table, parameters=None):
bqs = cls(project, dataset, table)
return bqs.execute_query(query, parameters)

# Insert a BQ job for a query to be saved on a temp table (shorthand to instance method above), optionally
# parameterized, and return the job reference
@classmethod
Expand Down
41 changes: 41 additions & 0 deletions google_helpers/bigquery/metrics_support.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
"""
Copyright 2018, Institute for Systems Biology
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

import logging
from django.conf import settings
from bq_support import BigQuerySupport

logger = logging.getLogger('main_logger')

MAX_INSERT = settings.MAX_BQ_INSERT


class BigQueryMetricsSupport(BigQuerySupport):

def __init__(self, metrics_table):
super(BigQueryMetricsSupport, self).__init__(settings.BIGQUERY_PROJECT_NAME, settings.METRICS_BQ_DATASET, metrics_table)

# Add rows to the metrics table specified by table
# Note that this is a class method therefor the rows must be supplied formatted ready
# for insertion, build_row will not be called!
@classmethod
def add_rows_to_table(cls, rows, table):
bqs = cls(table)
return bqs._streaming_insert(rows)


1 change: 1 addition & 0 deletions google_helpers/sheets/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
__author__ = '[email protected]'
31 changes: 31 additions & 0 deletions google_helpers/sheets/abstract.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
"""
Copyright 2019, Institute for Systems Biology
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

from abc import ABCMeta, abstractmethod


# Base Abstract class which defines the shared methods and properties for interaction with BigQuery
class SheetsABC:
__metaclass__ = ABCMeta

@abstractmethod
def __init__(self):
pass



36 changes: 36 additions & 0 deletions google_helpers/sheets/sheets_service.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
"""
Copyright 2015-2019, Institute for Systems Biology
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

from oauth2client.client import GoogleCredentials
from django.conf import settings
import httplib2
from .utils import build_with_retries


def get_sheet_service():

SHEETS_SCOPES = [
'https://www.googleapis.com/auth/spreadsheets'
]

credentials = GoogleCredentials.from_stream(
settings.GOOGLE_APPLICATION_CREDENTIALS).create_scoped(SHEETS_SCOPES)
http = httplib2.Http()
http = credentials.authorize(http)
service = build_with_retries('sheets', 'v4', None, 2, http=http)
return service
Loading

0 comments on commit c0d6ea2

Please sign in to comment.