diff --git a/accounts/dcf_views.py b/accounts/dcf_views.py index 1d2e1e8a..07e3857d 100755 --- a/accounts/dcf_views.py +++ b/accounts/dcf_views.py @@ -44,7 +44,6 @@ unlink_at_dcf, refresh_at_dcf, decode_token_chunk, calc_expiration_time from requests_oauthlib.oauth2_session import OAuth2Session -from jwt.contrib.algorithms.pycrypto import RSAAlgorithm from json import loads as json_loads # Shut this up unless we need to do debug of HTTP request contents @@ -220,7 +219,6 @@ def oauth2_callback(request): # my_jwt = jwt.PyJWT() - my_jwt.register_algorithm('RS256', RSAAlgorithm(RSAAlgorithm.SHA256)) # # DCF's key endpoint provides a list of keys they use. Right now, only one, but to future-proof, we want diff --git a/accounts/sa_utils.py b/accounts/sa_utils.py index 99c66b3e..fa16dfda 100644 --- a/accounts/sa_utils.py +++ b/accounts/sa_utils.py @@ -364,6 +364,102 @@ def _user_on_project_or_drop(gcp_id, user_email, st_logger, user_gcp): return True, None +def get_project_deleters(gcp_id, user_email, st_logger, log_name): + """ + User says they want to unregister a project. The problem is we need to insure that if the project has service + accounts (SAs) registered at DCF, we need to get those unregistered too. But the SAs do not need to be active, so + there is no requirement that everybody in the project be DCF registered to have an SA sitting there. In fact, if + an SA has been registered by Dr. X, who has since left the lab after adding Dr. Y to the project, and Dr. X has + been dropped, there does not actually have to be ANYBODY on the project with DCF connections to have an SA. But + that is beyond our control. However, if the current user doing the operation has EVER had an NIH linkage, we + need to tell them to register at DCF first. If the current user has NEVER had a NIH linkage, we check to see if + anybody else has such a linkage. If yes, we say that the linked person needs to do the job. If nobody on the + project has ever been near DCF, we let the deletion continue, since this implies the project was added just to + use CGC features. + """ + try: + crm_service = get_special_crm_resource() + + # 1) Get all the project members, record if they have registered with us: + all_users_in_our_db = True + iam_policy = crm_service.projects().getIamPolicy(resource=gcp_id, body={}).execute() + bindings = iam_policy['bindings'] + roles = {} + for val in bindings: + role = val['role'] + members = val['members'] + for member in members: + if member.startswith('user:'): + email = member.split(':')[1].lower() + if email not in roles: + roles[email] = {} + registered_user = bool(User.objects.filter(email=email).first()) + roles[email]['registered_user'] = registered_user + if not registered_user: + all_users_in_our_db = False + roles[email]['roles'] = [] + roles[email]['roles'].append(role) + + # 2) Verify that the current user is on the GCP project. Somebody can only get + # here by hacking a custom POST command: + if not user_email.lower() in roles: + log_msg = '[STATUS] While unregistering GCP {0}: User email {1} is not in the GCP IAM policy.'.format(gcp_id, user_email) + logger.info(log_msg) + st_logger.write_struct_log_entry(log_name, { + 'message': log_msg + }) + + return { + 'message': 'Your user email ({}) was not found in GCP {}. You must be a member of the project in order to unregister it.'.format(user_email, gcp_id), + } + + # 3) Verify which users have ever registered with with NIH: + some_user_registered = False + this_user_registered = False + all_users_nih_linkage_history = True + + for email in roles: + member = roles[email] + + member_is_this_user = (user_email.lower() == email) + + # IF USER IS REGISTERED + if member['registered_user']: + user = User.objects.get(email=email) + nih_user = None + # FIND NIH_USER FOR USER + # Since we are not checking "linked" state, we may have more than one: + nih_users = NIH_User.objects.filter(user_id=user.id) + member['nih_registered'] = len(nih_users) > 0 + + if member['nih_registered']: + some_user_registered = True + if member_is_this_user: + this_user_registered = True + else: + all_users_nih_linkage_history = False + + else: + member['nih_registered'] = False + all_users_nih_linkage_history = False + + except HttpError as e: + logger.error("[STATUS] While surveying GCP deleter status {}: ".format(gcp_id)) + logger.exception(e) + return {'message': 'There was an error accessing your project. Please verify that you have set the permissions correctly.'} + except Exception as e: + logger.error("[STATUS] While surveying GCP deleter status {}: ".format(gcp_id)) + logger.exception(e) + return {'message': "There was an error accessing a GCP project. Please contact feedback@isb-cgc.org."} + + return_obj = {'roles': roles, + 'some_user_registered': some_user_registered, + 'this_user_registered': this_user_registered, + 'all_users_in_our_db': all_users_in_our_db, + 'all_users_nih_linkage_history': all_users_nih_linkage_history} + return return_obj + + def _get_project_users(gcp_id, service_account, user_email, st_logger, log_name, is_refresh): """ While we can no longer show the user with a listing of what datasets each project user has access to (DCF will not diff --git a/accounts/views.py b/accounts/views.py index dbed317f..0cdd9b23 100644 --- a/accounts/views.py +++ b/accounts/views.py @@ -37,7 +37,7 @@ from models import * from projects.models import User_Data_Tables from django.utils.html import escape -from sa_utils import verify_service_account, register_service_account, \ +from sa_utils import verify_service_account, register_service_account, get_project_deleters, \ unregister_all_gcp_sa, unregister_sa, service_account_dict, \ do_nih_unlink, deactivate_nih_add_to_open, controlled_auth_datasets, have_linked_user @@ -508,16 +508,43 @@ def user_gcp_delete(request, user_id, gcp_id): if request.POST: user = User.objects.get(id=user_id) logger.info("[STATUS] User {} is unregistering GCP {}".format(user.email,gcp_id)) + + # + # In the new DCF-centric world, the user has to be logged into DCF if they are trying to + # delete a project with a service account on it. But users who have never been anywhere near + # DCF can register projects just to use webapp services. + # So, if user HAS EVER linked to DCF, they gotta be logged in to do this. If not, then if someone + # else on the project HAS EVER linked to DCF, they gotta be logged in. If nobody fits that bill, + # we let them delete the project. + # Note we also catch the case where a user not on a project is trying to delete it (requires custom + # crafted POST): + # + gcp = GoogleProject.objects.get(id=gcp_id, active=1) - success, msgs = unregister_all_gcp_sa(user_id, gcp_id, gcp.project_id) - # If we encounter problems deleting SAs, stop the process: - if not success: - messages.error(request, "Unregistering service accounts from Data Commons Framework was not successful.") - logger.info("[STATUS] SA Unregistration was unsuccessful {}".format(user.email, gcp_id)) - for msg in msgs: - messages.error(request, msg) + deleter_analysis = get_project_deleters(gcp.project_id, user.email, logger, SERVICE_ACCOUNT_LOG_NAME) + if 'message' in deleter_analysis: + messages.error(request, deleter_analysis['message']) return redirect('user_gcp_list', user_id=request.user.id) - logger.info("[STATUS] User {} is unregistering GCP {}: SAs dropped".format(user.email, gcp_id)) + + do_sa_unregister = True + if not deleter_analysis['this_user_registered']: + if deleter_analysis['some_user_registered']: + messages.error(request, "Only a project member who has registered with the Data Commons Framework can unregister this project") + logger.info("[STATUS] User {} with no DCF status tried to unregister {}".format(user.email, gcp_id)) + return redirect('user_gcp_list', user_id=request.user.id) + else: # Nobody on the project has ever done an NIH Linking. Skip the SA step... + do_sa_unregister = False + + if do_sa_unregister: + success, msgs = unregister_all_gcp_sa(user_id, gcp_id, gcp.project_id) + # If we encounter problems deleting SAs, stop the process: + if not success: + messages.error(request, "Unregistering service accounts from Data Commons Framework was not successful.") + logger.info("[STATUS] SA Unregistration was unsuccessful {}".format(user.email, gcp_id)) + for msg in msgs: + messages.error(request, msg) + return redirect('user_gcp_list', user_id=request.user.id) + logger.info("[STATUS] User {} is unregistering GCP {}: SAs dropped".format(user.email, gcp_id)) gcp.user.clear() gcp.active=False gcp.save() diff --git a/cohorts/metadata_helpers.py b/cohorts/metadata_helpers.py index 1b46e1a2..604119cb 100644 --- a/cohorts/metadata_helpers.py +++ b/cohorts/metadata_helpers.py @@ -187,6 +187,36 @@ 'TARGET': None, } +BQ_METADATA_DATA_TABLES = { + 'TCGA': { + 'HG19': { + 'table': 'tcga_metadata_data_hg19_250718', + 'dataset': 'TCGA_hg19_data_v0', + }, + 'HG38': { + 'table': 'tcga_metadata_data_hg38_250718', + 'dataset': 'TCGA_hg38_data_v0', + }, + }, + 'CCLE': None, + 'TARGET': None, +} + +BQ_BIOCLIN_DATA_TABLES = { + 'TCGA': { + 'table': 'Clinical', + 'dataset': 'TCGA_bioclin_v0', + }, + 'CCLE': { + 'table': 'clinical_v0', + 'dataset': 'CCLE_bioclin_v0', + }, + 'TARGET': { + 'table': 'Clinical', + 'dataset': 'TARGET_bioclin_v0', + }, +} + # Get a set of random characters of 'length' def make_id(length): diff --git a/google_helpers/bigquery/bq_support.py b/google_helpers/bigquery/bq_support.py index 0117aefd..f2b66923 100644 --- a/google_helpers/bigquery/bq_support.py +++ b/google_helpers/bigquery/bq_support.py @@ -366,13 +366,26 @@ def fetch_job_results(self, job_ref): def fetch_job_resource(self, job_ref): return self.bq_service.jobs().get(**job_ref).execute(num_retries=5) - # Execute a query to be saved on a temp table (shorthand to instance method above), optionally parameterized - # and fetch its results + # Add rows to the table specified by project.dataset.table + # Note that this is a class method therefor the rows must be supplied formatted ready + # for insertion, build_row will not be called! (build_row is implemented in derived classes only) + @classmethod + def add_rows_to_table(cls, rows, project, dataset, table): + bqs = cls(project, dataset, table) + return bqs._streaming_insert(rows) + + # Execute a query, optionally parameterized, and fetch its results @classmethod def execute_query_and_fetch_results(cls, query, parameters=None): bqs = cls(None, None, None) return bqs.execute_query(query, parameters) + @classmethod + # Execute a query, optionally parameterized, to be saved on a temp table + def execute_query_to_table(cls, query, project, dataset, table, parameters=None): + bqs = cls(project, dataset, table) + return bqs.execute_query(query, parameters) + # Insert a BQ job for a query to be saved on a temp table (shorthand to instance method above), optionally # parameterized, and return the job reference @classmethod diff --git a/google_helpers/bigquery/metrics_support.py b/google_helpers/bigquery/metrics_support.py new file mode 100644 index 00000000..f7016ff6 --- /dev/null +++ b/google_helpers/bigquery/metrics_support.py @@ -0,0 +1,41 @@ +""" + +Copyright 2018, Institute for Systems Biology + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +""" + +import logging +from django.conf import settings +from bq_support import BigQuerySupport + +logger = logging.getLogger('main_logger') + +MAX_INSERT = settings.MAX_BQ_INSERT + + +class BigQueryMetricsSupport(BigQuerySupport): + + def __init__(self, metrics_table): + super(BigQueryMetricsSupport, self).__init__(settings.BIGQUERY_PROJECT_NAME, settings.METRICS_BQ_DATASET, metrics_table) + + # Add rows to the metrics table specified by table + # Note that this is a class method therefor the rows must be supplied formatted ready + # for insertion, build_row will not be called! + @classmethod + def add_rows_to_table(cls, rows, table): + bqs = cls(table) + return bqs._streaming_insert(rows) + + diff --git a/google_helpers/sheets/__init__.py b/google_helpers/sheets/__init__.py new file mode 100644 index 00000000..001a7485 --- /dev/null +++ b/google_helpers/sheets/__init__.py @@ -0,0 +1 @@ +__author__ = 'spaquett@systemsbiology.org' diff --git a/google_helpers/sheets/abstract.py b/google_helpers/sheets/abstract.py new file mode 100644 index 00000000..04b0675d --- /dev/null +++ b/google_helpers/sheets/abstract.py @@ -0,0 +1,31 @@ +""" + +Copyright 2019, Institute for Systems Biology + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +""" + +from abc import ABCMeta, abstractmethod + + +# Base Abstract class which defines the shared methods and properties for interaction with BigQuery +class SheetsABC: + __metaclass__ = ABCMeta + + @abstractmethod + def __init__(self): + pass + + + diff --git a/google_helpers/sheets/sheets_service.py b/google_helpers/sheets/sheets_service.py new file mode 100644 index 00000000..ddc51e72 --- /dev/null +++ b/google_helpers/sheets/sheets_service.py @@ -0,0 +1,36 @@ +""" + +Copyright 2015-2019, Institute for Systems Biology + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +""" + +from oauth2client.client import GoogleCredentials +from django.conf import settings +import httplib2 +from .utils import build_with_retries + + +def get_sheet_service(): + + SHEETS_SCOPES = [ + 'https://www.googleapis.com/auth/spreadsheets' + ] + + credentials = GoogleCredentials.from_stream( + settings.GOOGLE_APPLICATION_CREDENTIALS).create_scoped(SHEETS_SCOPES) + http = httplib2.Http() + http = credentials.authorize(http) + service = build_with_retries('sheets', 'v4', None, 2, http=http) + return service diff --git a/google_helpers/sheets/sheets_support.py b/google_helpers/sheets/sheets_support.py new file mode 100644 index 00000000..56b7aa88 --- /dev/null +++ b/google_helpers/sheets/sheets_support.py @@ -0,0 +1,42 @@ +""" + +Copyright 2018, Institute for Systems Biology + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +""" + +import logging +import re +from time import sleep +from uuid import uuid4 +import copy +from django.conf import settings +from abstract import SheetsABC +from sheets_service import get_sheet_service + +logger = logging.getLogger('main_logger') + + +class SheetsSupport(SheetsABC): + def __init__(self, project_id, executing_project=None): + # Project which will execute any jobs run by this class + self.executing_project = executing_project or settings.BIGQUERY_PROJECT_NAME + # Destination project + self.project_id = project_id + + self.sheet_service = get_sheet_service() + + + +