diff --git a/apps/about/src/about/templates/admin_wizard.mako b/apps/about/src/about/templates/admin_wizard.mako index a29ee9b742d..bd50b908875 100644 --- a/apps/about/src/about/templates/admin_wizard.mako +++ b/apps/about/src/about/templates/admin_wizard.mako @@ -23,7 +23,6 @@ from metadata.conf import OPTIMIZER, has_optimizer from desktop.auth.backend import is_admin from desktop.conf import has_connectors -from desktop.lib.i18n import smart_unicode from desktop.views import commonheader, commonfooter if sys.version_info[0] > 2: diff --git a/apps/about/src/about/urls.py b/apps/about/src/about/urls.py index 15f91375749..6352602519f 100644 --- a/apps/about/src/about/urls.py +++ b/apps/about/src/about/urls.py @@ -15,15 +15,10 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys +from django.urls import re_path from about import views as about_views -if sys.version_info[0] > 2: - from django.urls import re_path -else: - from django.conf.urls import url as re_path - urlpatterns = [ re_path(r'^$', about_views.admin_wizard, name='index'), re_path(r'^admin_wizard$', about_views.admin_wizard, name='admin_wizard'), diff --git a/apps/about/src/about/views.py b/apps/about/src/about/views.py index d38140f4106..2fdc83aeb58 100644 --- a/apps/about/src/about/views.py +++ b/apps/about/src/about/views.py @@ -15,9 +15,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from builtins import str -import logging -import sys +from django.utils.translation import gettext as _ from desktop import appmanager from desktop.auth.backend import is_hue_admin @@ -26,11 +24,6 @@ from desktop.models import Settings, hue_version from desktop.views import collect_usage -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - def admin_wizard(request): if is_hue_admin(request.user): diff --git a/apps/beeswax/src/beeswax/api.py b/apps/beeswax/src/beeswax/api.py index f437600f83a..915afd6c688 100644 --- a/apps/beeswax/src/beeswax/api.py +++ b/apps/beeswax/src/beeswax/api.py @@ -16,7 +16,6 @@ # limitations under the License. import re -import sys import json import logging from builtins import zip diff --git a/apps/beeswax/src/beeswax/api_tests.py b/apps/beeswax/src/beeswax/api_tests.py index 66b61102072..0bc348975e6 100644 --- a/apps/beeswax/src/beeswax/api_tests.py +++ b/apps/beeswax/src/beeswax/api_tests.py @@ -16,27 +16,19 @@ # See the License for the specific language governing permissions and # limitations under the License. -import json -import logging -import pytest import sys +import logging +from unittest.mock import Mock, patch +import pytest from django.test import TestCase from requests.exceptions import ReadTimeout +from beeswax.api import _autocomplete, get_functions from desktop.lib.django_test_util import make_logged_in_client from desktop.lib.test_utils import add_to_group, grant_access from useradmin.models import User -from beeswax.api import _autocomplete, get_functions - - -if sys.version_info[0] > 2: - from unittest.mock import patch, Mock -else: - from mock import patch, Mock - - LOG = logging.getLogger() @@ -47,9 +39,8 @@ def setup_method(self): self.client = make_logged_in_client(username="test", groupname="default", recreate=True, is_superuser=False) self.user = User.objects.get(username="test") - def test_autocomplete_time_out(self): - get_tables_meta=Mock( + get_tables_meta = Mock( side_effect=ReadTimeout("HTTPSConnectionPool(host='gethue.com', port=10001): Read timed out. (read timeout=120)") ) db = Mock( @@ -65,7 +56,6 @@ def test_autocomplete_time_out(self): 'error': "HTTPSConnectionPool(host='gethue.com', port=10001): Read timed out. (read timeout=120)" }) - def test_get_functions(self): db = Mock( get_functions=Mock( @@ -83,7 +73,6 @@ def test_get_functions(self): resp == [{'name': 'f1'}, {'name': 'f2'}]) - def test_get_functions(self): with patch('beeswax.api._get_functions') as _get_functions: db = Mock() @@ -97,12 +86,11 @@ def test_get_functions(self): resp['functions'] == [{'name': 'f1'}, {'name': 'f2'}, {'name': 'f3'}]) - def test_get_function(self): db = Mock() - db.client = Mock(query_server = {'dialect': 'hive'}) + db.client = Mock(query_server={'dialect': 'hive'}) db.get_function = Mock( - return_value = [ + return_value=[ ['floor_month(param) - Returns the timestamp at a month granularity'], ['param needs to be a timestamp value'], ['Example:'], @@ -123,8 +111,7 @@ def test_get_function(self): '> SELECT floor_month(CAST(\'yyyy-MM-dd HH:mm:ss\' AS TIMESTAMP)) FROM src;\nyyyy-MM-01 00:00:00' }) - - db.client = Mock(query_server = {'dialect': 'impala'}) + db.client = Mock(query_server={'dialect': 'impala'}) data = _autocomplete(db, operation='function') assert data['function'] == {} diff --git a/apps/beeswax/src/beeswax/conf.py b/apps/beeswax/src/beeswax/conf.py index 032ea79010a..07eecf4500f 100644 --- a/apps/beeswax/src/beeswax/conf.py +++ b/apps/beeswax/src/beeswax/conf.py @@ -15,13 +15,9 @@ # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import division - -import sys import math import logging import os.path -from builtins import str from django.utils.translation import gettext as _, gettext_lazy as _t diff --git a/apps/beeswax/src/beeswax/create_table.py b/apps/beeswax/src/beeswax/create_table.py index d6e7c1700a3..ea523302f62 100644 --- a/apps/beeswax/src/beeswax/create_table.py +++ b/apps/beeswax/src/beeswax/create_table.py @@ -15,46 +15,39 @@ # See the License for the specific language governing permissions and # limitations under the License. - -from __future__ import division -from builtins import str -from builtins import range -from builtins import object +import re import csv import gzip import json -import logging import math -import re -import sys +import logging -from django.urls import reverse from django.http import QueryDict +from django.urls import reverse +from django.utils.translation import gettext as _ from aws.s3.s3fs import S3FileSystemException +from beeswax.common import TERMINATORS +from beeswax.design import hql_query +from beeswax.forms import ( + TERMINATOR_CHOICES, + ColumnTypeFormSet, + CreateByImportDelimForm, + CreateByImportFileForm, + CreateTableForm, + PartitionTypeFormSet, +) +from beeswax.server import dbms +from beeswax.server.dbms import QueryServerException +from beeswax.views import execute_directly from desktop.context_processors import get_app_name from desktop.lib import django_mako, i18n +from desktop.lib.django_forms import MultiForm from desktop.lib.django_util import render from desktop.lib.exceptions_renderable import PopupException -from desktop.lib.django_forms import MultiForm from desktop.models import _get_apps from hadoop.fs import hadoopfs -from beeswax.common import TERMINATORS -from beeswax.design import hql_query -from beeswax.forms import CreateTableForm, ColumnTypeFormSet,\ - PartitionTypeFormSet, CreateByImportFileForm, CreateByImportDelimForm,\ - TERMINATOR_CHOICES -from beeswax.server import dbms -from beeswax.server.dbms import QueryServerException -from beeswax.views import execute_directly - -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - - LOG = logging.getLogger() @@ -62,7 +55,7 @@ def create_table(request, database='default'): """Create a table by specifying its attributes manually""" db = dbms.get(request.user) dbs = db.get_databases() - databases = [{'name':db, 'url':reverse('beeswax:create_table', kwargs={'database': db})} for db in dbs] + databases = [{'name': db, 'url': reverse('beeswax:create_table', kwargs={'database': db})} for db in dbs] form = MultiForm( table=CreateTableForm, @@ -77,8 +70,8 @@ def create_table(request, database='default'): if request.POST.get('create'): if form.is_valid(): - columns = [ f.cleaned_data for f in form.columns.forms ] - partition_columns = [ f.cleaned_data for f in form.partitions.forms ] + columns = [f.cleaned_data for f in form.columns.forms] + partition_columns = [f.cleaned_data for f in form.partitions.forms] proposed_query = django_mako.render_to_string("create_table_statement.mako", { 'databases': databases, 'database': database, @@ -109,15 +102,16 @@ def create_table(request, database='default'): IMPORT_PEEK_SIZE = 5 * 1024**2 IMPORT_PEEK_NLINES = 10 -DELIMITERS = [ hive_val for hive_val, desc, ascii in TERMINATORS ] -DELIMITER_READABLE = {'\\001' : _('ctrl-As'), - '\\002' : _('ctrl-Bs'), - '\\003' : _('ctrl-Cs'), - '\\t' : _('tabs'), - ',' : _('commas'), - ' ' : _('spaces')} +DELIMITERS = [hive_val for hive_val, desc, ascii in TERMINATORS] +DELIMITER_READABLE = {'\\001': _('ctrl-As'), + '\\002': _('ctrl-Bs'), + '\\003': _('ctrl-Cs'), + '\\t': _('tabs'), + ',': _('commas'), + ' ': _('spaces')} FILE_READERS = [] + def import_wizard(request, database='default'): """ Help users define table and based on a file they want to import to Hive. @@ -133,7 +127,7 @@ def import_wizard(request, database='default'): db = dbms.get(request.user) dbs = db.get_databases() - databases = [{'name':db, 'url':reverse('beeswax:import_wizard', kwargs={'database': db})} for db in dbs] + databases = [{'name': db, 'url': reverse('beeswax:import_wizard', kwargs={'database': db})} for db in dbs] if request.method == 'POST': # @@ -164,7 +158,7 @@ def import_wizard(request, database='default'): cancel_s3_column_def = request.POST.get('cancel_create') # Step 3 -> 2 # Exactly one of these should be True - if len([_f for _f in (do_s2_auto_delim, do_s2_user_delim, do_s3_column_def, do_hive_create, cancel_s2_user_delim, cancel_s3_column_def) if _f]) != 1: + if len([_f for _f in (do_s2_auto_delim, do_s2_user_delim, do_s3_column_def, do_hive_create, cancel_s2_user_delim, cancel_s3_column_def) if _f]) != 1: # noqa: E501 raise PopupException(_('Invalid form submission')) if not do_s2_auto_delim: @@ -198,7 +192,8 @@ def import_wizard(request, database='default'): raise PopupException(_('Path location "%s" is invalid: %s') % (path, e)) delim_is_auto = True - fields_list, n_cols, s2_delim_form = _delim_preview(request.fs, s1_file_form, encoding, [reader.TYPE for reader in FILE_READERS], DELIMITERS) + fields_list, n_cols, s2_delim_form = _delim_preview( + request.fs, s1_file_form, encoding, [reader.TYPE for reader in FILE_READERS], DELIMITERS) if (do_s2_user_delim or do_s3_column_def or cancel_s3_column_def) and s2_delim_form.is_valid(): # Delimit based on input @@ -236,7 +231,7 @@ def import_wizard(request, database='default'): try: fields_list_for_json = list(fields_list) if fields_list_for_json: - fields_list_for_json[0] = [re.sub('[^\w]', '', a) for a in fields_list_for_json[0]] # Cleaning headers + fields_list_for_json[0] = [re.sub(r'[^\w]', '', a) for a in fields_list_for_json[0]] # Cleaning headers apps_list = _get_apps(request.user, '') return render('import_wizard_define_columns.mako', request, { 'apps': apps_list, @@ -251,7 +246,8 @@ def import_wizard(request, database='default'): 'databases': databases }) except Exception as e: - raise PopupException(_("The selected delimiter is creating an un-even number of columns. Please make sure you don't have empty columns."), detail=e) + raise PopupException(_( + "The selected delimiter is creating an un-even number of columns. Please make sure you don't have empty columns."), detail=e) # # Final: Execute @@ -271,7 +267,7 @@ def import_wizard(request, database='default'): 'path': path, 'skip_header': request.GET.get('removeHeader', 'off').lower() == 'on' }, - 'columns': [ f.cleaned_data for f in s3_col_formset.forms ], + 'columns': [f.cleaned_data for f in s3_col_formset.forms], 'partition_columns': [], 'database': database, 'databases': databases @@ -337,7 +333,7 @@ def _delim_preview(fs, file_form, encoding, file_types, delimiters): LOG.exception(msg) raise PopupException(msg) - n_cols = max([ len(row) for row in fields_list ]) + n_cols = max([len(row) for row in fields_list]) # ``delimiter`` is a MultiValueField. delimiter_0 and delimiter_1 are the sub-fields. delimiter_0 = delim delimiter_1 = '' @@ -409,13 +405,12 @@ def score_delim(fields_list): avg_n_fields = math.floor(sum(len_list) / n_lines) sq_of_exp = avg_n_fields * avg_n_fields - len_list_sq = [l * l for l in len_list] + len_list_sq = [len * len for len in len_list] exp_of_sq = math.floor(sum(len_list_sq) / n_lines) var = exp_of_sq - sq_of_exp # Favour more fields return (1000.0 / (var + 1)) + avg_n_fields - max_score = -1 res = (None, None) @@ -424,7 +419,7 @@ def score_delim(fields_list): delimiter = delim.decode('string_escape') try: fields_list = _get_rows(lines, delimiter) - except: + except Exception: LOG.exception('failed to get rows') fields_list = [line.split(delimiter) for line in lines if line] @@ -472,6 +467,7 @@ def readlines(fileobj, encoding): except UnicodeError: return None + FILE_READERS.append(GzipFileReader) @@ -488,6 +484,7 @@ def readlines(fileobj, encoding): except UnicodeError: return None + FILE_READERS.append(TextFileReader) diff --git a/apps/beeswax/src/beeswax/data_export.py b/apps/beeswax/src/beeswax/data_export.py index 348b26d1591..a3ab4594460 100644 --- a/apps/beeswax/src/beeswax/data_export.py +++ b/apps/beeswax/src/beeswax/data_export.py @@ -15,28 +15,20 @@ # See the License for the specific language governing permissions and # limitations under the License. -from builtins import str -from builtins import object import json -import logging import math -import sys -import types - -from desktop.lib import export_csvxls -from beeswax import common, conf +import logging -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ +from django.utils.translation import gettext as _ +from beeswax import common, conf +from desktop.lib import export_csvxls LOG = logging.getLogger() FETCH_SIZE = 1000 -DOWNLOAD_COOKIE_AGE = 1800 # 30 minutes +DOWNLOAD_COOKIE_AGE = 1800 # 30 minutes def download(handle, format, db, id=None, file_name='query_result', user_agent=None): @@ -115,22 +107,22 @@ def __iter__(self): # Avoid serialization to string where possible def _getsizeofascii(self, row): size = 0 - size += max(len(row) - 1, 0) # CSV commas between columns - size += 2 # CSV \r\n at the end of row + size += max(len(row) - 1, 0) # CSV commas between columns + size += 2 # CSV \r\n at the end of row for col in row: col_type = type(col) - if col_type == int: + if col_type is int: if col == 0: size += 1 elif col < 0: size += int(math.log10(-1 * col)) + 2 else: size += int(math.log10(col)) + 1 - elif col_type == bytes: + elif col_type is bytes: size += len(col) - elif col_type == float: + elif col_type is float: size += len(str(col)) - elif col_type == bool: + elif col_type is bool: size += 4 elif col_type == type(None): size += 4 diff --git a/apps/beeswax/src/beeswax/design.py b/apps/beeswax/src/beeswax/design.py index b7028ffba8d..2536e68a2f6 100644 --- a/apps/beeswax/src/beeswax/design.py +++ b/apps/beeswax/src/beeswax/design.py @@ -18,30 +18,18 @@ """ The HQLdesign class can (de)serialize a design to/from a QueryDict. """ - -from future import standard_library -standard_library.install_aliases() -from builtins import object import json import logging -import os -import re -import sys import urllib.parse import django.http from django import forms from django.forms import ValidationError +from django.utils.translation import gettext as _ -from notebook.sql_utils import split_statements, strip_trailing_semicolon from desktop.lib.django_forms import BaseSimpleFormSet, MultiForm from hadoop.cluster import get_hdfs - -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - +from notebook.sql_utils import split_statements, strip_trailing_semicolon LOG = logging.getLogger() @@ -296,6 +284,3 @@ def denormalize_formset_dict(data_dict_list, formset, attr_list): res[str(formset.management_form.add_prefix('next_form_id'))] = str(len(data_dict_list)) return res - - def __str__(self): - return '%s: %s' % (self.__class__, self.query) diff --git a/apps/beeswax/src/beeswax/forms.py b/apps/beeswax/src/beeswax/forms.py index b5e5f210c02..c07dcd4f0e0 100644 --- a/apps/beeswax/src/beeswax/forms.py +++ b/apps/beeswax/src/beeswax/forms.py @@ -17,22 +17,16 @@ import sys -from builtins import chr from django import forms -from django.core.validators import MinValueValidator, MaxValueValidator +from django.core.validators import MaxValueValidator, MinValueValidator from django.forms import NumberInput +from django.utils.translation import gettext as _, gettext_lazy as _t from aws.s3 import S3_ROOT, S3A_ROOT -from desktop.lib.django_forms import simple_formset_factory, DependencyAwareForm, ChoiceOrOtherField, MultiForm, SubmitButton -from filebrowser.forms import PathField - from beeswax import common from beeswax.models import SavedQuery - -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _, gettext_lazy as _t -else: - from django.utils.translation import ugettext as _, ugettext_lazy as _t +from desktop.lib.django_forms import ChoiceOrOtherField, DependencyAwareForm, MultiForm, SubmitButton, simple_formset_factory +from filebrowser.forms import PathField class QueryForm(MultiForm): @@ -129,7 +123,7 @@ class SaveResultsTableForm(forms.Form): label=_t("Table Name"), required=True, help_text=_t("Name of the new table") - ) # Can also contain a DB prefixed table name, e.g. DB_NAME.TABLE_NAME + ) # Can also contain a DB prefixed table name, e.g. DB_NAME.TABLE_NAME def __init__(self, *args, **kwargs): self.db = kwargs.pop('db', None) @@ -186,6 +180,7 @@ class FunctionForm(forms.Form): name = forms.CharField(required=True) class_name = forms.CharField(required=True) + FunctionFormSet = simple_formset_factory(FunctionForm) @@ -212,11 +207,13 @@ class SettingForm(forms.Form): key = forms.CharField() value = forms.CharField() + SettingFormSet = simple_formset_factory(SettingForm) # In theory, there are only 256 of these... -TERMINATOR_CHOICES = [ (hive_val, desc) for hive_val, desc, ascii in common.TERMINATORS ] +TERMINATOR_CHOICES = [(hive_val, desc) for hive_val, desc, ascii in common.TERMINATORS] + class CreateTableForm(DependencyAwareForm): """ @@ -230,7 +227,7 @@ class CreateTableForm(DependencyAwareForm): # Row Formatting row_format = forms.ChoiceField(required=True, - choices=common.to_choices([ "Delimited", "SerDe" ]), + choices=common.to_choices(["Delimited", "SerDe"]), initial="Delimited") # Delimited Row @@ -370,13 +367,14 @@ def clean(self): # Note, struct is not currently supported. (Because it's recursive, for example.) HIVE_TYPES = \ - ( "string", "tinyint", "smallint", "int", "bigint", "boolean", + ("string", "tinyint", "smallint", "int", "bigint", "boolean", "float", "double", "array", "map", "timestamp", "date", "char", "varchar") HIVE_PRIMITIVE_TYPES = \ ("string", "tinyint", "smallint", "int", "bigint", "boolean", "float", "double", "timestamp", "date", "char", "varchar") + class PartitionTypeForm(forms.Form): dependencies = [ ("column_type", "char", "char_length"), @@ -393,6 +391,7 @@ class PartitionTypeForm(forms.Form): validators=[MinValueValidator(1), MaxValueValidator(65355)], help_text=_t("Specify if column_is varchar")) + class ColumnTypeForm(DependencyAwareForm): """ Form used to specify a column during table creation @@ -432,7 +431,7 @@ class ColumnTypeForm(DependencyAwareForm): def _clean_databasename(name): try: - if name in db.get_databases(): # Will always fail + if name in db.get_databases(): # Will always fail raise forms.ValidationError(_('Database "%(name)s" already exists.') % {'name': name}) except Exception: return name diff --git a/apps/beeswax/src/beeswax/hive_site.py b/apps/beeswax/src/beeswax/hive_site.py index 555a7346863..49f356cee35 100644 --- a/apps/beeswax/src/beeswax/hive_site.py +++ b/apps/beeswax/src/beeswax/hive_site.py @@ -19,25 +19,17 @@ Helper for reading hive-site.xml """ -from builtins import str +import re import errno +import socket import logging import os.path -import re -import socket -import sys +import beeswax.conf from desktop.lib import security_util from hadoop import confparse from hadoop.ssl_client_site import get_trustore_location, get_trustore_password -import beeswax.conf - -if sys.version_info[0] > 2: - open_file = open -else: - open_file = file - LOG = logging.getLogger() _HIVE_SITE_PATH = None # Path to hive-site.xml @@ -72,13 +64,14 @@ # Host is whatever up to the colon. Allow and ignore a trailing slash. -_THRIFT_URI_RE = re.compile("^thrift://([^:]+):(\d+)[/]?$") +_THRIFT_URI_RE = re.compile(r"^thrift://([^:]+):(\d+)[/]?$") class MalformedHiveSiteException(Exception): """Parsing error class used internally""" pass + def reset(): """Reset the cached conf""" global _HIVE_SITE_DICT @@ -105,7 +98,7 @@ def get_metastore(): if not is_local: use_sasl = str(get_conf().get(_CNF_METASTORE_SASL, 'false')).lower() == 'true' - thrift_uri = thrift_uris.split(",")[0] # First URI + thrift_uri = thrift_uris.split(",")[0] # First URI host = socket.getfqdn() match = _THRIFT_URI_RE.match(thrift_uri) if not match: @@ -138,18 +131,23 @@ def get_hiveserver2_kerberos_principal(hostname_or_ip): else: return None + def get_metastore_warehouse_dir(): return get_conf().get(_CNF_METASTORE_WAREHOUSE_DIR, '/user/hive/warehouse') + def get_hiveserver2_authentication(): - return get_conf().get(_CNF_HIVESERVER2_AUTHENTICATION, 'NONE').upper() # NONE == PLAIN SASL + return get_conf().get(_CNF_HIVESERVER2_AUTHENTICATION, 'NONE').upper() # NONE == PLAIN SASL + def get_hiveserver2_thrift_sasl_qop(): return get_conf().get(_CNF_HIVESERVER2_THRIFT_SASL_QOP, 'NONE').lower() + def hiveserver2_impersonation_enabled(): return get_conf().get(_CNF_HIVESERVER2_IMPERSONATION, 'TRUE').upper() == 'TRUE' + def hiveserver2_jdbc_url(): is_transport_mode_http = hiveserver2_transport_mode() == 'HTTP' urlbase = 'jdbc:hive2://%s:%s/default' % ( @@ -180,15 +178,19 @@ def hiveserver2_jdbc_url(): def hiveserver2_use_ssl(): return get_conf().get(_CNF_HIVESERVER2_USE_SSL, 'FALSE').upper() == 'TRUE' + def hiveserver2_transport_mode(): return get_conf().get(_CNF_HIVESERVER2_TRANSPORT_MODE, 'TCP').upper() + def hiveserver2_thrift_binary_port(): return get_conf().get(_CNF_HIVESERVER2_THRIFT_BINARY_PORT) + def hiveserver2_thrift_http_port(): return get_conf().get(_CNF_HIVESERVER2_THRIFT_HTTP_PORT) + def hiveserver2_thrift_http_path(): return get_conf().get(_CNF_HIVESERVER2_THRIFT_HTTP_PATH, 'cliservice') @@ -202,15 +204,19 @@ def has_concurrency_support(): ''''Possibly use set -v in future to obtain properties hive.create.as.acid=true & hive.create.as.insert.only=true''' return get_conf().get(_CNF_HIVE_SUPPORT_CONCURRENCY, 'TRUE').upper() == 'TRUE' + def get_hive_hook_proto_base_directory(): return get_conf().get(_CNF_HIVE_HOOK_PROTO_BASE_DIR) + def get_hive_execution_mode(): return get_conf().get(_CNF_HIVE_EXECUTION_MODE) + def get_hive_execution_engine(): return get_conf().get(_CNF_HIVE_EXECUTION_ENGINE) + def _parse_hive_site(): """ Parse hive-site.xml and store in _HIVE_SITE_DICT @@ -220,7 +226,7 @@ def _parse_hive_site(): _HIVE_SITE_PATH = os.path.join(beeswax.conf.HIVE_CONF_DIR.get(), 'hive-site.xml') try: - data = open_file(_HIVE_SITE_PATH, 'r').read() + data = open(_HIVE_SITE_PATH, 'r').read() except IOError as err: if err.errno != errno.ENOENT: LOG.error('Cannot read from "%s": %s' % (_HIVE_SITE_PATH, err)) @@ -230,9 +236,10 @@ def _parse_hive_site(): _HIVE_SITE_DICT = confparse.ConfParse(data) + def get_hive_site_content(): hive_site_path = os.path.join(beeswax.conf.HIVE_CONF_DIR.get(), 'hive-site.xml') if not os.path.exists(hive_site_path): return '' else: - return open_file(hive_site_path, 'r').read() + return open(hive_site_path, 'r').read() diff --git a/apps/beeswax/src/beeswax/management/commands/beeswax_install_examples.py b/apps/beeswax/src/beeswax/management/commands/beeswax_install_examples.py index e4fd7218149..990e63d7db9 100644 --- a/apps/beeswax/src/beeswax/management/commands/beeswax_install_examples.py +++ b/apps/beeswax/src/beeswax/management/commands/beeswax_install_examples.py @@ -15,33 +15,24 @@ # See the License for the specific language governing permissions and # limitations under the License. -from builtins import object -import csv -import logging -import json import os -import sys +import csv import pwd +import json +import logging from django.core.management.base import BaseCommand +from django.utils.translation import gettext as _ -from desktop.lib.exceptions_renderable import PopupException -from desktop.conf import USE_NEW_EDITOR -from desktop.models import Directory, Document2, Document2Permission -from hadoop import cluster -from notebook.models import import_saved_beeswax_query, make_notebook, MockRequest, _get_example_directory -from useradmin.models import get_default_user_group, install_sample_user, User - -from beeswax.design import hql_query from beeswax.conf import LOCAL_EXAMPLES_DATA_DIR from beeswax.hive_site import has_concurrency_support -from beeswax.models import SavedQuery, HQL, IMPALA, RDBMS -from beeswax.server import dbms - -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ +from beeswax.models import HQL, IMPALA, RDBMS, SavedQuery +from desktop.conf import USE_NEW_EDITOR +from desktop.lib.exceptions_renderable import PopupException +from desktop.models import Document2, Document2Permission +from hadoop import cluster +from notebook.models import MockRequest, _get_example_directory, import_saved_beeswax_query, make_notebook +from useradmin.models import User, get_default_user_group, install_sample_user LOG = logging.getLogger() @@ -104,7 +95,6 @@ def handle(self, *args, **options): return self.successes, self.errors - def install_tables(self, django_user, dialect, db_name, tables, interpreter=None, request=None): data_dir = LOCAL_EXAMPLES_DATA_DIR.get() table_file = open(os.path.join(data_dir, tables)) @@ -129,7 +119,6 @@ def install_tables(self, django_user, dialect, db_name, tables, interpreter=None LOG.error(msg) self.errors.append(_('Could not install table %s: %s') % (full_name, msg)) - def install_queries(self, django_user, dialect, interpreter=None): design_file = open(os.path.join(LOCAL_EXAMPLES_DATA_DIR.get(), 'queries.json')) design_list = json.load(design_file) @@ -196,7 +185,6 @@ def __init__(self, data_dict, dialect, db_name='default', interpreter=None, requ self._contents_file = os.path.join(self._data_dir, self.filename) self._check_file_contents(self._contents_file) - def install(self, django_user): if self.dialect in ('hive', 'impala'): if has_concurrency_support() and not self.is_transactional: @@ -215,7 +203,6 @@ def install(self, django_user): return True - def create(self, django_user): """ Create SQL sample table. @@ -242,7 +229,6 @@ def create(self, django_user): else: raise ex - def load(self, django_user): inserts = [] @@ -279,7 +265,6 @@ def load(self, django_user): for insert in inserts: self._load_data_to_table(django_user, insert) - def load_partition(self, django_user, partition_spec, filepath, columns): if (self.dialect not in ('hive', 'impala') or has_concurrency_support()) and self.is_transactional: with open(filepath) as f: @@ -309,14 +294,12 @@ def load_partition(self, django_user, partition_spec, filepath, columns): self._load_data_to_table(django_user, hql) - def _check_file_contents(self, filepath): if not os.path.isfile(filepath): msg = _('Cannot find table data in "%(file)s".') % {'file': filepath} LOG.error(msg) raise ValueError(msg) - def _get_partition_dir(self, partition_spec): parts = partition_spec.split(',') last_part = parts[-1] @@ -324,7 +307,6 @@ def _get_partition_dir(self, partition_spec): part_dir = part_value.strip("'").replace('-', '_') return part_dir - def _get_hdfs_root_destination(self, django_user, subdir=None): fs = cluster.get_hdfs() hdfs_root_destination = None @@ -349,7 +331,6 @@ def _get_hdfs_root_destination(self, django_user, subdir=None): return hdfs_root_destination - def _upload_to_hdfs(self, django_user, local_filepath, hdfs_root_destination, filename=None): fs = cluster.get_hdfs() @@ -362,7 +343,6 @@ def _upload_to_hdfs(self, django_user, local_filepath, hdfs_root_destination, fi return hdfs_destination - def _load_data_to_table(self, django_user, hql): LOG.info('Loading data into table "%s"' % (self.name,)) @@ -378,7 +358,6 @@ def _load_data_to_table(self, django_user, hql): ) job.execute_and_wait(self.request) - def _get_sql_insert_values(self, f, columns=None): data = f.read() dialect = csv.Sniffer().sniff(data) @@ -409,7 +388,6 @@ def __init__(self, data_dict): self.type = int(data_dict['type']) self.data = data_dict['data'] - def install(self, django_user, interpreter=None): """ Install queries. Raise InstallException on failure. @@ -468,7 +446,6 @@ def install(self, django_user, interpreter=None): examples_dir.share(django_user, Document2Permission.READ_PERM, groups=[get_default_user_group()]) LOG.info('Successfully installed sample query: %s' % doc2) - def _document_type(self, type, interpreter=None): if type == HQL: return 'query-hive' diff --git a/apps/beeswax/src/beeswax/management/commands/beeswax_install_examples_tests.py b/apps/beeswax/src/beeswax/management/commands/beeswax_install_examples_tests.py index 8a236e5a56f..6b9dd4ec618 100644 --- a/apps/beeswax/src/beeswax/management/commands/beeswax_install_examples_tests.py +++ b/apps/beeswax/src/beeswax/management/commands/beeswax_install_examples_tests.py @@ -16,22 +16,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +import sys import logging +from unittest.mock import patch + import pytest -import sys +from beeswax.management.commands.beeswax_install_examples import Command, SampleQuery, SampleTable from desktop.auth.backend import rewrite_user from desktop.lib.django_test_util import make_logged_in_client from desktop.models import Document2 -from useradmin.models import User, install_sample_user - -from beeswax.management.commands.beeswax_install_examples import SampleTable, Command, SampleQuery - -if sys.version_info[0] > 2: - from unittest.mock import patch, Mock, MagicMock -else: - from mock import patch, Mock, MagicMock - +from useradmin.models import User LOG = logging.getLogger() @@ -43,7 +38,6 @@ def setup_method(self): self.client = make_logged_in_client(username="test", groupname="default", recreate=True, is_superuser=False) self.user = User.objects.get(username="test") - def test_install_queries_mysql(self): design_dict = { "name": "TestStandardTables Query", @@ -115,7 +109,6 @@ def test_install_queries(self): query = Document2.objects.filter(name='TestBeswaxHiveTables Query').get() assert 'query-hive' == query.type - def test_create_table_load_data_but_no_fs(self): table_data = { "data_file": "sample_07.csv", @@ -134,7 +127,6 @@ def test_create_table_load_data_but_no_fs(self): make_notebook.assert_not_called() - @pytest.mark.django_db class TestTransactionalTables(): @@ -142,7 +134,6 @@ def setup_method(self): self.client = make_logged_in_client(username="test", groupname="default", recreate=True, is_superuser=False) self.user = rewrite_user(User.objects.get(username="test")) - def test_load_sample_07_with_concurrency_support(self): table_data = { "data_file": "sample_07.csv", @@ -161,7 +152,6 @@ def test_load_sample_07_with_concurrency_support(self): make_notebook.assert_called() - def test_load_web_logs_with_concurrency_support(self): table_data = { "partition_files": { @@ -204,7 +194,6 @@ def test_load_web_logs_with_concurrency_support(self): make_notebook.assert_called() - def test_create_phoenix_table(self): table_data = { "data_file": "./tables/us_population.csv", diff --git a/apps/beeswax/src/beeswax/management/commands/create_table_query_data.py b/apps/beeswax/src/beeswax/management/commands/create_table_query_data.py index c50adf2ed34..8d87e9e4edf 100644 --- a/apps/beeswax/src/beeswax/management/commands/create_table_query_data.py +++ b/apps/beeswax/src/beeswax/management/commands/create_table_query_data.py @@ -15,27 +15,18 @@ # See the License for the specific language governing permissions and # limitations under the License. -from builtins import str - import logging -import sys from django.core.management.base import BaseCommand +from django.utils.translation import gettext as _ -from desktop.lib import django_mako +from beeswax import hive_site +from beeswax.design import hql_query from beeswax.server import dbms from beeswax.server.dbms import get_query_server_config - -from beeswax.design import hql_query -from beeswax import hive_site +from desktop.lib import django_mako from useradmin.models import install_sample_user -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - - LOG = logging.getLogger() @@ -46,7 +37,6 @@ class Command(BaseCommand): args = '' help = 'Create table sys.query_data over hive.hook.proto.base-directory' - def handle(self, *args, **options): create_table() diff --git a/apps/beeswax/src/beeswax/models.py b/apps/beeswax/src/beeswax/models.py index ff014256186..85f9e361d9b 100644 --- a/apps/beeswax/src/beeswax/models.py +++ b/apps/beeswax/src/beeswax/models.py @@ -15,36 +15,26 @@ # See the License for the specific language governing permissions and # limitations under the License. -from builtins import range -from builtins import object import ast -import base64 -import datetime import json +import base64 import logging -import sys +import datetime +from enum import Enum -from django.db import models from django.contrib.contenttypes.fields import GenericRelation +from django.db import models from django.urls import reverse +from django.utils.translation import gettext as _, gettext_lazy as _t +from TCLIService.ttypes import THandleIdentifier, TOperationHandle, TOperationState, TOperationType, TSessionHandle -from enum import Enum -from TCLIService.ttypes import TSessionHandle, THandleIdentifier, TOperationState, TOperationHandle, TOperationType - +from beeswax.design import HQLdesign from desktop.lib.exceptions_renderable import PopupException from desktop.models import Document, Document2 from desktop.redaction import global_redaction_engine from librdbms.server import dbms as librdbms_dbms from useradmin.models import User, UserProfile -from beeswax.design import HQLdesign - -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _, gettext_lazy as _t -else: - from django.utils.translation import ugettext as _, ugettext_lazy as _t - - LOG = logging.getLogger() QUERY_SUBMISSION_TIMEOUT = datetime.timedelta(0, 60 * 60) # 1 hour @@ -54,6 +44,7 @@ HIVE_SERVER2 = 'hiveserver2' QUERY_TYPES = (HQL, IMPALA, RDBMS, SPARK, HPLSQL) = list(range(5)) + class QueryHistory(models.Model): """ Holds metadata about all queries that have been executed. @@ -136,7 +127,6 @@ def get_query_server_config(self): return query_server - def get_current_statement(self): if self.design is not None: design = self.design.get_design() @@ -156,7 +146,7 @@ def is_finished(self): if self.design is not None: design = self.design.get_design() - return is_statement_finished and self.statement_number + 1 == design.statement_count # Last statement + return is_statement_finished and self.statement_number + 1 == design.statement_count # Last statement else: return is_statement_finished @@ -363,7 +353,7 @@ def __str__(self): def get_query_context(self): try: return make_query_context('design', self.id) - except: + except Exception: LOG.exception('failed to make query context') return "" @@ -455,7 +445,7 @@ def get_tez_session(self, user, application, n_sessions): if available_sessions: session = available_sessions[0] else: - session = None # No available session found + session = None # No available session found return session @@ -485,7 +475,7 @@ def get_handle(self): def get_adjusted_guid_secret(self): secret = self.secret guid = self.guid - if sys.version_info[0] > 2 and not isinstance(self.secret, bytes) and not isinstance(self.guid, bytes): + if not isinstance(self.secret, bytes) and not isinstance(self.guid, bytes): # only for py3, after bytes saved, bytes wrapped in a string object try: secret = ast.literal_eval(secret) @@ -521,7 +511,6 @@ def __str__(self): return '%s %s' % (self.secret, self.guid) - class HiveServerQueryHandle(QueryHandle): """ QueryHandle for Hive Server 2. @@ -556,16 +545,10 @@ def get_rpc_handle(self): @classmethod def get_decoded(cls, secret, guid): - if sys.version_info[0] > 2: - return base64.b64decode(secret), base64.b64decode(guid) - else: - return base64.decodestring(secret), base64.decodestring(guid) + return base64.b64decode(secret), base64.b64decode(guid) def get_encoded(self): - if sys.version_info[0] > 2: - return base64.b64encode(self.secret), base64.b64encode(self.guid) - else: - return base64.encodestring(self.secret), base64.encodestring(self.guid) + return base64.b64encode(self.secret), base64.b64encode(self.guid) # Deprecated. Could be removed. @@ -608,6 +591,7 @@ def get(): except MetaInstall.DoesNotExist: return MetaInstall(id=1) + class Namespace(models.Model): name = models.CharField(default='', max_length=255) description = models.TextField(default='') @@ -652,6 +636,7 @@ def to_dict(self): 'external_id': self.external_id } + class Compute(models.Model): """ Instance of a compute type pointing to a Hive or Impala compute resources. diff --git a/apps/beeswax/src/beeswax/query_history.py b/apps/beeswax/src/beeswax/query_history.py index 43508d0ff6f..2622bff2dc0 100644 --- a/apps/beeswax/src/beeswax/query_history.py +++ b/apps/beeswax/src/beeswax/query_history.py @@ -15,32 +15,22 @@ # See the License for the specific language governing permissions and # limitations under the License. -from builtins import filter -from builtins import str - -import collections -import logging import json -import sys -import threading import uuid +import logging +import threading +import collections + +from django.utils.translation import gettext as _ from beeswax.design import hql_query +from beeswax.management.commands import create_table_query_data from beeswax.server import dbms from beeswax.server.dbms import get_query_server_config -from beeswax.management.commands import create_table_query_data - -from desktop.lib.exceptions_renderable import raise_popup_exception, PopupException from desktop.lib import django_mako - +from desktop.lib.exceptions_renderable import PopupException, raise_popup_exception from useradmin.models import install_sample_user -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - - LOG = logging.getLogger() QUERY_HISTORY_CACHE_MAX_USER_COUNT = 10 @@ -50,8 +40,8 @@ class QueryHistory(object): def __init__(self, max_user=10, max_history_per_user=25): - self.max_user=max_user - self.max_history_per_user=max_history_per_user + self.max_user = max_user + self.max_history_per_user = max_history_per_user self.by_user = collections.OrderedDict() self.no_user_key = str(uuid.uuid4()) self.lock = threading.Lock() @@ -131,16 +121,19 @@ def get_queries(self, request_user, filters): self.lock.acquire() by_user = self.by_user.get(request_user) if by_user and by_user['filters'] == filters: - del self.by_user[request_user] # Moving request_user to head of queue + del self.by_user[request_user] # Moving request_user to head of queue self.by_user[request_user] = by_user return by_user return None finally: self.lock.release() + QUERY_HISTORY = QueryHistory(max_user=QUERY_HISTORY_CACHE_MAX_USER_COUNT, max_history_per_user=QUERY_HISTORY_CACHE_MAX_LENGTH_PER_USER) -# If fresh user get from _get_query_history_latest else get _get_query_history_from. if results set from _get_query_history_from less than limit merge results with cache else call _get_query_history_latest + +# If fresh user get from _get_query_history_latest else get _get_query_history_from. +# if results set from _get_query_history_from less than limit merge results with cache else call _get_query_history_latest def get_query_history(request_user=None, start_date=None, start_time=None, query_id=None, status=None, limit=None): _init_table() @@ -151,7 +144,7 @@ def get_query_history(request_user=None, start_date=None, start_time=None, query last = history['max'] data = _get_query_history_from(request_user=request_user, start_date=last['date'], - start_time=last['time']+1, + start_time=last['time'] + 1, query_id=query_id, status=status, limit=limit) @@ -161,10 +154,19 @@ def get_query_history(request_user=None, start_date=None, start_time=None, query cached = _n_filter(filter_list, cached)[:limit] return {'data': cached} - data = _get_query_history_latest(request_user=request_user, start_date=start_date, start_time=start_time, query_id=query_id, status=status, limit=limit, force_refresh=True) + data = _get_query_history_latest( + request_user=request_user, + start_date=start_date, + start_time=start_time, + query_id=query_id, + status=status, + limit=limit, + force_refresh=True + ) QUERY_HISTORY.set(request_user, data['data'], filters) return data + # If id in cache return cache else _get_query_history_from def get_query_by_id(request_user=None, query_id=None): _init_table() @@ -173,10 +175,11 @@ def get_query_by_id(request_user=None, query_id=None): if datum: return {'data': [datum]} else: - data = _get_query_history_from(request_user=request_user, query_id=query_id) # force_refresh? + data = _get_query_history_from(request_user=request_user, query_id=query_id) # force_refresh? cached = _groupby({'by_id': {}}, data['data']) return {'data': cached} + def _init_table(): global HAS_CREATED_TABLE if not HAS_CREATED_TABLE: @@ -185,8 +188,21 @@ def _init_table(): if not HAS_CREATED_TABLE: raise PopupException(_('Could not initialize query history table.')) -def _get_query_history_latest(request_user=None, query_id=None, start_date=None, start_time=None, status=None, limit=25, force_refresh=False): - proposed_query = django_mako.render_to_string("select_table_query_data_latest.mako", {'table': {'name': 'query_data', 'request_user': request_user, 'query_id': query_id, 'start_date': start_date, 'start_time': start_time, 'status': status, 'limit': limit, 'force_refresh': force_refresh}}) + +def _get_query_history_latest( + request_user=None, query_id=None, start_date=None, start_time=None, status=None, limit=25, force_refresh=False): + proposed_query = django_mako.render_to_string( + "select_table_query_data_latest.mako", + {'table': { + 'name': 'query_data', + 'request_user': request_user, + 'query_id': query_id, + 'start_date': start_date, + 'start_time': start_time, + 'status': status, + 'limit': limit, + 'force_refresh': force_refresh + }}) data = _execute_query(proposed_query, limit) for row in data['data']: if row[1]: @@ -197,6 +213,7 @@ def _get_query_history_latest(request_user=None, query_id=None, start_date=None, row[8] = json.loads(row[8]) return data + def _get_query_history_from(request_user=None, start_date=None, start_time=None, status=None, query_id=None, limit=25): proposed_query = django_mako.render_to_string("select_table_query_data_from.mako", {'table': @@ -217,6 +234,7 @@ def _get_query_history_from(request_user=None, start_date=None, start_time=None, row[8] = [row[8]] return data + def _execute_query(proposed_query, limit): user = install_sample_user() query_server = get_query_server_config('beeswax') @@ -243,6 +261,7 @@ def _execute_query(proposed_query, limit): except Exception as ex: raise_popup_exception(_('Error fetching query history.')) + def _get_filter_list(filters): filter_list = [] if filters.get("states"): @@ -250,14 +269,17 @@ def _get_filter_list(filters): return filter_list + def _get_status(row): return 'completed' if len(row[1]) >= 2 else 'running' + def _n_filter(filters, tuples): for f in filters: tuples = list(filter(f, tuples)) return tuples + def _groupby(by_user, data): results = [] for row in data: @@ -270,7 +292,7 @@ def _groupby(by_user, data): results.append(row) else: item = by_user['by_id'][row[0]] - if row[8][0] in item[8]: # we have dup + if row[8][0] in item[8]: # we have dup continue if row[1]: item[1] += row[1] diff --git a/apps/beeswax/src/beeswax/server/dbms.py b/apps/beeswax/src/beeswax/server/dbms.py index a5f77f9f5fd..6defed37743 100644 --- a/apps/beeswax/src/beeswax/server/dbms.py +++ b/apps/beeswax/src/beeswax/server/dbms.py @@ -16,7 +16,6 @@ # limitations under the License. import re -import sys import json import time import logging @@ -72,7 +71,6 @@ from desktop.lib.exceptions_renderable import PopupException from desktop.lib.parameterization import substitute_variables from desktop.lib.view_util import location_to_url -from desktop.models import Cluster from desktop.settings import CACHES_HIVE_DISCOVERY_KEY from indexer.file_format import HiveFormat from libzookeeper import conf as libzookeeper_conf diff --git a/apps/beeswax/src/beeswax/server/dbms_tests.py b/apps/beeswax/src/beeswax/server/dbms_tests.py index 973adfd7e41..263568f946d 100644 --- a/apps/beeswax/src/beeswax/server/dbms_tests.py +++ b/apps/beeswax/src/beeswax/server/dbms_tests.py @@ -16,7 +16,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import logging from unittest.mock import Mock, patch diff --git a/apps/beeswax/src/beeswax/server/hive_metastore_server.py b/apps/beeswax/src/beeswax/server/hive_metastore_server.py index 36d62c5edae..965afaa7894 100644 --- a/apps/beeswax/src/beeswax/server/hive_metastore_server.py +++ b/apps/beeswax/src/beeswax/server/hive_metastore_server.py @@ -15,32 +15,21 @@ # See the License for the specific language governing permissions and # limitations under the License. -from builtins import object import logging -import re -import sys -import thrift -from django.utils.encoding import smart_str, force_unicode - -import hadoop.cluster - -from desktop.lib import thrift_util -from desktop.conf import KERBEROS +from django.utils.encoding import force_unicode, smart_str +from django.utils.translation import gettext as _ from hive_metastore import ThriftHiveMetastore from TCLIService.ttypes import TOperationState +import hadoop.cluster from beeswax import hive_site from beeswax.conf import SERVER_CONN_TIMEOUT -from beeswax.server.hive_server2_lib import ResultCompatible from beeswax.models import HiveServerQueryHandle, QueryHistory -from beeswax.server.dbms import Table, DataTable - -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - +from beeswax.server.dbms import DataTable, Table +from beeswax.server.hive_server2_lib import ResultCompatible +from desktop.conf import KERBEROS +from desktop.lib import thrift_util LOG = logging.getLogger() @@ -105,7 +94,6 @@ def parse_result_row(row): yield parse_result_row(row) - class HiveMetastoreClient(object): def __init__(self, query_server, user): @@ -113,15 +101,12 @@ def __init__(self, query_server, user): self.query_server = query_server self.meta_client = self.meta_client() - def get_databases(self, *args, **kwargs): return self.meta_client.get_all_databases() - def get_tables(self, *args, **kwargs): return self.meta_client.get_tables(*args, **kwargs) - def get_tables_meta(self, *args, **kwargs): meta_tables = self.meta_client.get_table_meta(*args, **kwargs) return [ @@ -140,41 +125,32 @@ def get_table(self, *args, **kwargs): return table - def get_partitions(self, db_name, tbl_name, max_parts): if max_parts is None: max_parts = -1 return self.meta_client.get_partitions(db_name, tbl_name, max_parts) - def use(self, query): pass - def query(self, query, statement=0): return HiveServerQueryHandle(secret='mock', guid='mock') - def get_state(self, handle): return QueryHistory.STATE.available - def close(self, handle): pass - def get_operation_status(self, handle): return MockFinishedOperation() - def get_default_configuration(self, *args, **kwargs): return [] - def fetch(self, handle, start_over=False, max_rows=None): return EmptyResultCompatible() - @classmethod def get_security(cls, query_server=None): cluster_conf = hadoop.cluster.get_cluster_conf_for_job_submission() @@ -193,7 +169,6 @@ def get_security(cls, query_server=None): return use_sasl, kerberos_principal_short_name - def meta_client(self): """Get the Thrift client to talk to the metastore""" @@ -281,7 +256,7 @@ def alter_partition(self, db_name, tbl_name, new_part): self._encode_partition(new_part) return self._client.alter_partition(db_name, tbl_name, new_part) - use_sasl, kerberos_principal_short_name = HiveMetastoreClient.get_security() # TODO Reuse from HiveServer2 lib + use_sasl, kerberos_principal_short_name = HiveMetastoreClient.get_security() # TODO Reuse from HiveServer2 lib client = thrift_util.get_client( ThriftHiveMetastore.Client, diff --git a/apps/beeswax/src/beeswax/server/hive_server2_lib.py b/apps/beeswax/src/beeswax/server/hive_server2_lib.py index 1e88838f807..d4ac2c41c0b 100644 --- a/apps/beeswax/src/beeswax/server/hive_server2_lib.py +++ b/apps/beeswax/src/beeswax/server/hive_server2_lib.py @@ -16,12 +16,11 @@ # limitations under the License. import re -import sys import json import logging -from builtins import filter, map, next, object from operator import itemgetter +from django.utils.translation import gettext as _ from TCLIService import TCLIService from TCLIService.ttypes import ( TCancelOperationReq, @@ -55,12 +54,6 @@ from desktop.lib import python_util, thrift_util from notebook.connectors.base import get_interpreter -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - - LOG = logging.getLogger() IMPALA_RESULTSET_CACHE_SIZE = 'impala.resultset.cache.size' DEFAULT_USER = DEFAULT_USER.get() @@ -358,7 +351,7 @@ def _get_val(cls, column): @classmethod def mark_nulls(cls, values, bytestring): - if sys.version_info[0] < 3 or isinstance(bytestring, bytes): + if isinstance(bytestring, bytes): mask = bytearray(bytestring) else: bitstring = python_util.from_string_to_bits(bytestring) @@ -379,7 +372,7 @@ def mark_nulls(cls, values, bytestring): def set_nulls(cls, values, nulls): can_decode = True bytestring = nulls - if sys.version_info[0] == 3 and isinstance(bytestring, bytes): + if isinstance(bytestring, bytes): try: bytestring = bytestring.decode('utf-8') except Exception: @@ -421,10 +414,7 @@ def rows(self): try: yield row.fields() except StopIteration as e: - if sys.version_info[0] > 2: - return # pep-0479: expected Py3.8 generator raised StopIteration - else: - raise e + return # pep-0479: expected Py3.8 generator raised StopIteration class HiveServerTTableSchema(object): @@ -1041,9 +1031,6 @@ def execute_statement(self, statement, max_rows=1000, configuration=None, orient if self.query_server.get('dialect') == 'impala' and self.query_server['QUERY_TIMEOUT_S'] > 0: configuration['QUERY_TIMEOUT_S'] = str(self.query_server['QUERY_TIMEOUT_S']) - if sys.version_info[0] == 2: - statement = statement.encode('utf-8') - req = TExecuteStatementReq(statement=statement, confOverlay=configuration) (res, session) = self.call(self._client.ExecuteStatement, req, session=session) @@ -1061,9 +1048,6 @@ def execute_async_statement(self, statement=None, thrift_function=None, thrift_r if self.query_server.get('dialect') == 'impala' and self.query_server['QUERY_TIMEOUT_S'] > 0: conf_overlay['QUERY_TIMEOUT_S'] = str(self.query_server['QUERY_TIMEOUT_S']) - if sys.version_info[0] == 2: - statement = statement.encode('utf-8') - (res, session) = self.call_return_result_and_session(thrift_function, thrift_request, session=session) return HiveServerQueryHandle( diff --git a/apps/beeswax/src/beeswax/server/hive_server2_lib_tests.py b/apps/beeswax/src/beeswax/server/hive_server2_lib_tests.py index 5467110834c..9b338d5c521 100644 --- a/apps/beeswax/src/beeswax/server/hive_server2_lib_tests.py +++ b/apps/beeswax/src/beeswax/server/hive_server2_lib_tests.py @@ -16,26 +16,21 @@ # See the License for the specific language governing permissions and # limitations under the License. +import sys import logging +from unittest.mock import MagicMock, Mock, patch + import pytest -import sys from TCLIService.ttypes import TStatusCode +from beeswax.conf import CLOSE_SESSIONS, MAX_NUMBER_OF_SESSIONS +from beeswax.models import HiveServerQueryHandle, Session +from beeswax.server.dbms import QueryServerException, get_query_server_config +from beeswax.server.hive_server2_lib import HiveServerClient, HiveServerClientCompatible, HiveServerTable from desktop.auth.backend import rewrite_user from desktop.lib.django_test_util import make_logged_in_client from useradmin.models import User -from beeswax.conf import MAX_NUMBER_OF_SESSIONS, CLOSE_SESSIONS -from beeswax.models import HiveServerQueryHandle, Session -from beeswax.server.dbms import get_query_server_config, QueryServerException -from beeswax.server.hive_server2_lib import HiveServerTable, HiveServerClient, HiveServerClientCompatible - -if sys.version_info[0] > 2: - from unittest.mock import patch, Mock, MagicMock -else: - from mock import patch, Mock, MagicMock - - LOG = logging.getLogger() @@ -112,7 +107,6 @@ def test_open_session(self): original_guid == handle.sessionId.guid) - def test_get_configuration(self): with patch('beeswax.server.hive_server2_lib.HiveServerClient.execute_query_statement') as execute_query_statement: @@ -229,16 +223,13 @@ def test_get_databases_impala_specific(self): client.get_databases(query) assert ( - None != - client.call.call_args[0][1].schemaName), client.call.call_args.args + None is not client.call.call_args[0][1].schemaName), client.call.call_args.args with patch.dict(self.query_server, {'dialect': 'impala'}, clear=True): client.get_databases(query) assert ( - None == # Should be empty and not '*' with Impala - client.call.call_args[0][1].schemaName), client.call.call_args.args - + None is client.call.call_args[0][1].schemaName), client.call.call_args.args # Should be empty and not '*' with Impala def test_get_table_with_error(self): query = Mock( @@ -309,18 +300,15 @@ def test_get_table_with_error(self): try: client.get_table(database='database', table_name='table_name') except QueryServerException as e: - if sys.version_info[0] > 2: - req_string = ("TGetTablesReq(sessionHandle=TSessionHandle(sessionId=THandleIdentifier(guid=%s, secret=%s)), " - "catalogName=None, schemaName='database', tableName='table_name', tableTypes=None)")\ - % (str(original_guid), str(original_secret)) - else: - req_string = ("TGetTablesReq(schemaName='database', sessionHandle=TSessionHandle(sessionId=THandleIdentifier" - "(secret='%s', guid='%s')), tableName='table_name', tableTypes=None, catalogName=None)")\ - % ('s\\xb6\\x0ePP\\xbdL\\x17\\xa3\\x0f\\\\\\xf7K\\xe8Y\\x1d', - '\\xd9\\xe0hT\\xd6wO\\xe1\\xa3S\\xfb\\x04\\xca\\x93V\\x01') # manually adding '\' + req_string = ("TGetTablesReq(sessionHandle=TSessionHandle(sessionId=THandleIdentifier(guid=%s, secret=%s)), " + "catalogName=None, schemaName='database', tableName='table_name', tableTypes=None)")\ + % (str(original_guid), str(original_secret)) + assert ( "Bad status for request %s:\n%s" % (req_string, get_tables_res) == - str(e)) + str(e) + ) + class TestHiveServerTable(): @@ -353,7 +341,7 @@ def test_cols_impala(self): Mock(stringVal=Mock( values=[ 'comment', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', - 'NULL', 'NULL', '{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"code\":\"true\",\"description\":\"true\",\"salary\":\"true\",'\ + 'NULL', 'NULL', '{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"code\":\"true\",\"description\":\"true\",\"salary\":\"true\",' '\"total_emp\":\"true\"}}', '2', '1', '822', '3288', '48445', 'true', 'insert_only', '1572882268', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', '1', ], @@ -381,7 +369,6 @@ def test_cols_impala(self): assert table.cols[2] == {'col_name': 'total_emp', 'data_type': 'int', 'comment': 'NULL'} assert table.cols[3] == {'col_name': 'salary', 'data_type': 'int', 'comment': 'NULL'} - def test_cols_hive_tez(self): table_results = Mock() @@ -413,7 +400,7 @@ def test_cols_hive_tez(self): nulls='')), Mock(stringVal=Mock( values=[ - '', '', '', '', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', '{\"BASIC_STATS\":'\ + '', '', '', '', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', '{\"BASIC_STATS\":' '\"true\",\"COLUMN_STATS\":{\"code\":\"true\",\"description\":\"true\",\"salary\":\"true\",\"total_emp\":\"true\"}}', '2', '1', '822', '3288', '48445', 'TRUE', 'insert_only ', '1572882268', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', '1', @@ -442,7 +429,6 @@ def test_cols_hive_tez(self): assert table.cols[2] == {'col_name': 'total_emp', 'data_type': 'int', 'comment': ''} assert table.cols[3] == {'col_name': 'salary', 'data_type': 'int', 'comment': ''} - def test_cols_hive_llap_upstream(self): table_results = Mock() @@ -471,7 +457,7 @@ def test_cols_hive_llap_upstream(self): Mock(stringVal=Mock( values=[ 'comment', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', - 'NULL', '{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"code\":\"true\",\"description\":\"true\",\"salary\":\"true\",'\ + 'NULL', '{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"code\":\"true\",\"description\":\"true\",\"salary\":\"true\",' '\"total_emp\":\"true\"}}', '2', '1', '822', '3288', '48445', 'true', 'insert_only', '1572882268', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', '1', ], @@ -499,7 +485,6 @@ def test_cols_hive_llap_upstream(self): assert table.cols[2] == {'col_name': 'total_emp', 'data_type': 'int', 'comment': 'NULL'} assert table.cols[3] == {'col_name': 'salary', 'data_type': 'int', 'comment': 'NULL'} - def test_partition_keys_impala(self): table_results = Mock() @@ -530,7 +515,7 @@ def test_partition_keys_impala(self): nulls='')), Mock(stringVal=Mock( values=['comment', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'comment', 'NULL', 'NULL', - 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', '{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":'\ + 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', '{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":' '{\"code\":\"true\",\"description\":\"true\",\"salary\":\"true\",\"total_emp\":\"true\"}}', '2', '1', '822', '3288', '48445', 'true', 'insert_only', '1572882268', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', '1', ], @@ -557,7 +542,6 @@ def test_partition_keys_impala(self): assert table.partition_keys[0].type == 'string' assert table.partition_keys[0].comment == 'NULL' - def test_partition_keys_hive(self): table_results = Mock() @@ -590,7 +574,7 @@ def test_partition_keys_hive(self): Mock(stringVal=Mock( values=[ 'comment', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'comment', '', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', - 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', '{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"code\":\"true\",\"description\":'\ + 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', '{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"code\":\"true\",\"description\":' '\"true\",\"salary\":\"true\",\"total_emp\":\"true\"}}', '2', '1', '822', '3288', '48445', 'true', 'insert_only', '1572882268', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', '1', ], @@ -617,7 +601,6 @@ def test_partition_keys_hive(self): assert table.partition_keys[0].type == 'string' assert table.partition_keys[0].comment == '' - def test_single_primary_key_hive(self): table_results = Mock() @@ -650,7 +633,7 @@ def test_single_primary_key_hive(self): Mock(stringVal=Mock( values=[ 'comment', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'comment', '', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', - 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', '{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"code\":\"true\",\"description\":'\ + 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', '{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"code\":\"true\",\"description\":' '\"true\",\"salary\":\"true\",\"total_emp\":\"true\"}}', '2', '1', '822', '3288', '48445', 'true', 'insert_only', '1572882268', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', '1', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL' @@ -678,7 +661,6 @@ def test_single_primary_key_hive(self): assert table.primary_keys[0].type == 'NULL' assert table.primary_keys[0].comment == 'NULL' - def test_multi_primary_keys_hive(self): table_results = Mock() @@ -712,7 +694,7 @@ def test_multi_primary_keys_hive(self): Mock(stringVal=Mock( values=[ 'comment', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'comment', '', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', - 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', '{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"code\":\"true\",\"description\":'\ + 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', '{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"code\":\"true\",\"description\":' '\"true\",\"salary\":\"true\",\"total_emp\":\"true\"}}', '2', '1', '822', '3288', '48445', 'true', 'insert_only', '1572882268', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', '1', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL' @@ -744,7 +726,6 @@ def test_multi_primary_keys_hive(self): assert table.primary_keys[1].type == 'NULL' assert table.primary_keys[1].comment == 'NULL' - def test_foreign_keys_hive(self): table_results = Mock() @@ -787,7 +768,7 @@ def test_foreign_keys_hive(self): stringVal=Mock( values=[ 'comment', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'comment', '', 'NULL', 'NULL', 'NULL', 'NULL', - 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', '{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"code\":\"true\",'\ + 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', '{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"code\":\"true\",' '\"description\":\"true\",\"salary\":\"true\",\"total_emp\":\"true\"}}', '2', '1', '822', '3288', '48445', 'true', 'insert_only', '1572882268', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', '1', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL', 'Key Sequence:1', @@ -849,13 +830,13 @@ def test_call_session_single(self): # Reuse session from argument (res, session2) = client.call(fn, req, status=None, session=session1) - open_session.assert_called_once() # open_session should not be called again, because we're reusing session + open_session.assert_called_once() # open_session should not be called again, because we're reusing session assert session1 == session2 # Reuse session from get_session get_session.return_value = session1 (res, session3) = client.call(fn, req, status=None) - open_session.assert_called_once() # open_session should not be called again, because we're reusing session + open_session.assert_called_once() # open_session should not be called again, because we're reusing session assert session1 == session3 finally: for f in finish: @@ -884,13 +865,13 @@ def test_call_session_pool(self): # Reuse session from argument (res, session2) = client.call(fn, req, status=None, session=session1) - open_session.assert_called_once() # open_session should not be called again, because we're reusing session + open_session.assert_called_once() # open_session should not be called again, because we're reusing session assert session1 == session2 # Reuse session from get_session get_session.return_value = session1 (res, session3) = client.call(fn, req, status=None) - open_session.assert_called_once() # open_session should not be called again, because we're reusing session + open_session.assert_called_once() # open_session should not be called again, because we're reusing session assert session1 == session3 finally: for f in finish: @@ -941,7 +922,7 @@ def test_call_session_close_idle(self): # Reuse session from argument (res, session2) = client.call(fn, req, status=None, session=session1) - open_session.assert_called_once() # open_session should not be called again, because we're reusing session + open_session.assert_called_once() # open_session should not be called again, because we're reusing session assert session1 == session2 # Create new session @@ -999,7 +980,7 @@ def test_call_session_close_idle_managed_queries(self): assert open_session.call_count == 6 assert close_session.call_count == 6 - res = client.get_partitions(MagicMock(), MagicMock()) # get_partitions does 2 requests with 1 session each + res = client.get_partitions(MagicMock(), MagicMock()) # get_partitions does 2 requests with 1 session each assert open_session.call_count == 8 assert close_session.call_count == 8 finally: @@ -1033,8 +1014,8 @@ def test_call_session_close_idle_limit(self): for f in finish: f() -class TestHiveServerClientCompatible(): +class TestHiveServerClientCompatible(): def test_get_tables_meta(self): client = Mock( diff --git a/apps/beeswax/src/beeswax/tests.py b/apps/beeswax/src/beeswax/tests.py index 9c93b721709..875dbb5cc3b 100644 --- a/apps/beeswax/src/beeswax/tests.py +++ b/apps/beeswax/src/beeswax/tests.py @@ -16,79 +16,83 @@ # See the License for the specific language governing permissions and # limitations under the License. -from future import standard_library -from builtins import next, map, str, chr, range, object +import os +import re +import sys import gzip import json -import logging -import os import random -import re import shutil import socket import string -import sys +import logging import tempfile import threading -import pytest -import hadoop +from io import BytesIO as string_io +from unittest.mock import patch +import pytest +from django.db import transaction from django.test import TestCase +from django.urls import reverse from django.utils.encoding import smart_str from django.utils.html import escape -from django.urls import reverse -from django.db import transaction - -from desktop.lib.exceptions_renderable import PopupException -from desktop.conf import AUTH_USERNAME as DEFAULT_AUTH_USERNAME, AUTH_PASSWORD as DEFAULT_AUTH_PASSWORD, \ - AUTH_PASSWORD_SCRIPT as DEFAULT_AUTH_PASSWORD_SCRIPT, LDAP_USERNAME, LDAP_PASSWORD, USE_NEW_EDITOR -from desktop import redaction -from desktop.redaction import logfilter -from desktop.redaction.engine import RedactionPolicy, RedactionRule -from desktop.lib.django_test_util import make_logged_in_client, assert_equal_mod_whitespace -from desktop.lib.parameterization import substitute_variables -from desktop.lib.python_util import from_string_to_bits, get_bytes_from_bits -from desktop.lib.test_utils import grant_access, add_to_group -from desktop.lib.security_util import get_localhost_name -from desktop.lib.export_csvxls_tests import _read_xls_sheet_data -from hadoop.fs.hadoopfs import Hdfs -from useradmin.models import User - -from hadoop import ssl_client_site -from hadoop.pseudo_hdfs4 import is_live_cluster +import hadoop import desktop.conf as desktop_conf - -import beeswax.create_table -import beeswax.hive_site -import beeswax.models import beeswax.views - +import beeswax.models +import beeswax.hive_site +import beeswax.create_table from beeswax import conf, hive_site from beeswax.common import apply_natural_sort -from beeswax.conf import HIVE_SERVER_HOST, AUTH_USERNAME, AUTH_PASSWORD, AUTH_PASSWORD_SCRIPT -from beeswax.views import collapse_whitespace, _save_design, parse_out_jobs, parse_out_queries -from beeswax.test_base import make_query, wait_for_query_to_finish, verify_history, get_query_server_config, fetch_query_result_data +from beeswax.conf import AUTH_PASSWORD, AUTH_PASSWORD_SCRIPT, AUTH_USERNAME, HIVE_SERVER_HOST +from beeswax.data_export import download, upload from beeswax.design import hql_query -from beeswax.data_export import upload, download -from beeswax.models import SavedQuery, QueryHistory, HQL, HIVE_SERVER2 +from beeswax.hive_site import get_metastore, hiveserver2_jdbc_url +from beeswax.models import HIVE_SERVER2, HQL, QueryHistory, SavedQuery from beeswax.server import dbms from beeswax.server.dbms import QueryServerException -from beeswax.server.hive_server2_lib import HiveServerClient, PartitionKeyCompatible, PartitionValueCompatible, HiveServerTable, \ - HiveServerTColumnValue2 -from beeswax.test_base import BeeswaxSampleProvider, is_hive_on_spark, get_available_execution_engines -from beeswax.hive_site import get_metastore, hiveserver2_jdbc_url - -standard_library.install_aliases() - -if sys.version_info[0] > 2: - from unittest.mock import patch, Mock - from io import BytesIO as string_io - open_file = open -else: - from mock import patch, Mock - from cStringIO import StringIO as string_io - open_file = file +from beeswax.server.hive_server2_lib import ( + HiveServerClient, + HiveServerTable, + HiveServerTColumnValue2, + PartitionKeyCompatible, + PartitionValueCompatible, +) +from beeswax.test_base import ( + BeeswaxSampleProvider, + fetch_query_result_data, + get_available_execution_engines, + get_query_server_config, + is_hive_on_spark, + make_query, + verify_history, + wait_for_query_to_finish, +) +from beeswax.views import _save_design, collapse_whitespace, parse_out_jobs, parse_out_queries +from desktop import redaction +from desktop.conf import ( + AUTH_PASSWORD as DEFAULT_AUTH_PASSWORD, + AUTH_PASSWORD_SCRIPT as DEFAULT_AUTH_PASSWORD_SCRIPT, + AUTH_USERNAME as DEFAULT_AUTH_USERNAME, + LDAP_PASSWORD, + LDAP_USERNAME, + USE_NEW_EDITOR, +) +from desktop.lib.django_test_util import assert_equal_mod_whitespace, make_logged_in_client +from desktop.lib.exceptions_renderable import PopupException +from desktop.lib.export_csvxls_tests import _read_xls_sheet_data +from desktop.lib.parameterization import substitute_variables +from desktop.lib.python_util import from_string_to_bits, get_bytes_from_bits +from desktop.lib.security_util import get_localhost_name +from desktop.lib.test_utils import add_to_group, grant_access +from desktop.redaction import logfilter +from desktop.redaction.engine import RedactionPolicy, RedactionRule +from hadoop import ssl_client_site +from hadoop.fs.hadoopfs import Hdfs +from hadoop.pseudo_hdfs4 import is_live_cluster +from useradmin.models import User LOG = logging.getLogger() @@ -433,7 +437,7 @@ def test_query_with_remote_udf(self): # BeeswaxTest.jar is gone pytest.skip("Skipping Test") - src = open_file(os.path.join(os.path.dirname(__file__), "..", "..", "java-lib", "BeeswaxTest.jar")) + src = open(os.path.join(os.path.dirname(__file__), "..", "..", "java-lib", "BeeswaxTest.jar")) udf = self.cluster.fs_prefix + "hive1157.jar" dest = self.cluster.fs.open(udf, "w") shutil.copyfileobj(src, dest) @@ -2102,10 +2106,7 @@ def test_hs2_log_verbose(self): def test_import_gzip_reader(): """Test the gzip reader in create table""" # Make gzipped data - if sys.version_info[0] > 2: - data = open(__file__, encoding='utf-8').read() - else: - data = file(__file__).read() + data = open(__file__, encoding='utf-8').read() data_gz_sio = string_io() gz = gzip.GzipFile(fileobj=data_gz_sio, mode='wb') gz_data = data @@ -2247,7 +2248,7 @@ def get(self): return tmpdir xml = hive_site_xml(is_local=True, use_sasl=False) - open_file(os.path.join(tmpdir, 'hive-site.xml'), 'w').write(xml) + open(os.path.join(tmpdir, 'hive-site.xml'), 'w').write(xml) beeswax.hive_site.reset() saved = beeswax.conf.HIVE_CONF_DIR @@ -2278,7 +2279,7 @@ def get(self): is_local=False, use_sasl=False, thrift_uris=thrift_uris, kerberos_principal='test/_HOST@TEST.COM', hs2_kerberos_principal='test/_HOST@TEST.COM' ) - open_file(os.path.join(tmpdir, 'hive-site.xml'), 'w').write(xml) + open(os.path.join(tmpdir, 'hive-site.xml'), 'w').write(xml) beeswax.hive_site.reset() saved = beeswax.conf.HIVE_CONF_DIR @@ -2309,7 +2310,7 @@ def get(self): return tmpdir xml = hive_site_xml(is_local=True, use_sasl=False, hs2_kerberos_principal=None) - open_file(os.path.join(tmpdir, 'hive-site.xml'), 'w').write(xml) + open(os.path.join(tmpdir, 'hive-site.xml'), 'w').write(xml) beeswax.hive_site.reset() saved = beeswax.conf.HIVE_CONF_DIR @@ -2876,12 +2877,8 @@ def test_save_design(self): def test_get_history_xss(self): sql = 'SELECT count(sample_07.salary) FROM sample_07;">' - if sys.version_info[0] < 3: - sql_escaped = b'SELECT count(sample_07.salary) FROM sample_07;"><iFrAME>'\ - b'src="javascript:alert('Hue has an xss');"></iFraME>' - else: - sql_escaped = b'SELECT count(sample_07.salary) FROM sample_07;"><iFrAME>'\ - b'src="javascript:alert('Hue has an xss');"></iFraME>' + sql_escaped = b'SELECT count(sample_07.salary) FROM sample_07;"><iFrAME>'\ + b'src="javascript:alert('Hue has an xss');"></iFraME>' response = _make_query(self.client, sql, submission_type='Save', name='My Name 1', desc='My Description') content = json.loads(response.content) @@ -3157,7 +3154,7 @@ def get(self): return tmpdir xml = hive_site_xml(is_local=False, use_sasl=True, kerberos_principal='hive/_HOST@test.com') - open_file(os.path.join(tmpdir, 'hive-site.xml'), 'w').write(xml) + open(os.path.join(tmpdir, 'hive-site.xml'), 'w').write(xml) beeswax.hive_site.reset() saved = beeswax.conf.HIVE_CONF_DIR diff --git a/apps/beeswax/src/beeswax/urls.py b/apps/beeswax/src/beeswax/urls.py index f58786e9832..c6867b1a251 100644 --- a/apps/beeswax/src/beeswax/urls.py +++ b/apps/beeswax/src/beeswax/urls.py @@ -15,17 +15,14 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys - -from beeswax import views as beeswax_views -from beeswax import create_database as beeswax_create_database -from beeswax import create_table as beeswax_create_table -from beeswax import api as beeswax_api - -if sys.version_info[0] > 2: - from django.urls import re_path -else: - from django.conf.urls import url as re_path +from django.urls import re_path + +from beeswax import ( + api as beeswax_api, + create_database as beeswax_create_database, + create_table as beeswax_create_table, + views as beeswax_views, +) urlpatterns = [ re_path(r'^$', beeswax_views.index, name='index'), diff --git a/apps/beeswax/src/beeswax/views.py b/apps/beeswax/src/beeswax/views.py index c645a3c6a77..c70476a9f6f 100644 --- a/apps/beeswax/src/beeswax/views.py +++ b/apps/beeswax/src/beeswax/views.py @@ -15,70 +15,66 @@ # See the License for the specific language governing permissions and # limitations under the License. -from builtins import next, str -import json -import logging import re import sys +import json import time +import logging from django import forms -from django.core.paginator import Paginator, EmptyPage, InvalidPage from django.contrib import messages +from django.core.paginator import EmptyPage, Paginator from django.db.models import Q from django.http import HttpResponse, QueryDict from django.shortcuts import redirect -from django.utils.html import escape from django.urls import reverse +from django.utils.html import escape +from django.utils.translation import gettext as _ +import beeswax.forms +import beeswax.design +from beeswax import common, data_export, models +from beeswax.management.commands import beeswax_install_examples +from beeswax.models import QueryHistory, SavedQuery, Session +from beeswax.server import dbms +from beeswax.server.dbms import QueryServerException, expand_exception, get_query_server_config from desktop.appmanager import get_apps_dict +from desktop.auth.backend import is_admin from desktop.conf import ENABLE_DOWNLOAD, REDIRECT_WHITELIST from desktop.context_processors import get_app_name - -from desktop.lib.django_util import JsonResponse -from desktop.lib.django_util import copy_query_dict, format_preserving_redirect, render -from desktop.lib.django_util import login_notrequired, get_desktop_uri_prefix +from desktop.lib.django_util import ( + JsonResponse, + copy_query_dict, + format_preserving_redirect, + get_desktop_uri_prefix, + login_notrequired, + render, +) from desktop.lib.exceptions_renderable import PopupException -from desktop.models import Document, _get_apps from desktop.lib.parameterization import find_variables +from desktop.models import Document, _get_apps from desktop.views import serve_403_error from notebook.models import escape_rows from useradmin.models import User -import beeswax.forms -import beeswax.design - -from beeswax import common, data_export, models -from beeswax.management.commands import beeswax_install_examples -from beeswax.models import QueryHistory, SavedQuery, Session -from beeswax.server import dbms -from beeswax.server.dbms import expand_exception, get_query_server_config, QueryServerException - -from desktop.auth.backend import is_admin - -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - - LOG = logging.getLogger() # For scraping Job IDs from logs HADOOP_JOBS_RE = re.compile("Starting Job = ([a-z0-9_]+?),") -SPARK_APPLICATION_RE = re.compile("Running with YARN Application = (?Papplication_\d+_\d+)") -TEZ_APPLICATION_RE = re.compile("Executing on YARN cluster with App id ([a-z0-9_]+?)\)") -TEZ_QUERY_RE = re.compile("\(queryId=([a-z0-9_-]+?)\)") - +SPARK_APPLICATION_RE = re.compile(r"Running with YARN Application = (?Papplication_\d+_\d+)") +TEZ_APPLICATION_RE = re.compile(r"Executing on YARN cluster with App id ([a-z0-9_]+?)\)") +TEZ_QUERY_RE = re.compile(r"\(queryId=([a-z0-9_-]+?)\)") def index(request): return execute_query(request) + """ Design views """ + def save_design(request, form, type_, design, explicit_save): """ save_design(request, form, type_, design, explicit_save) -> SavedQuery @@ -95,7 +91,7 @@ def save_design(request, form, type_, design, explicit_save): """ authorized_get_design(request, design.id) assert form.saveform.is_valid() - sub_design_form = form # Beeswax/Impala case + sub_design_form = form # Beeswax/Impala case if type_ == models.HQL: design_cls = beeswax.design.HQLdesign @@ -371,7 +367,6 @@ def list_query_history(request): } return JsonResponse(resp) - return render('list_history.mako', request, { 'request': request, 'page': page, @@ -414,10 +409,12 @@ def download(request, id, format, user_agent=None): message = e.message raise PopupException(message, detail='') + """ Queries Views """ + def execute_query(request, design_id=None, query_history_id=None): """ View function for executing an arbitrary query. @@ -462,7 +459,7 @@ def execute_query(request, design_id=None, query_history_id=None): context = { 'design': design, 'apps': apps_list, - 'query': query_history, # Backward + 'query': query_history, # Backward 'query_history': query_history, 'autocomplete_base_url': reverse(get_app_name(request) + ':api_autocomplete_databases', kwargs={}), 'autocomplete_base_url_hive': reverse('beeswax:api_autocomplete_databases', kwargs={}), @@ -616,6 +613,7 @@ def configuration(request): Other views """ + def install_examples(request): response = {'status': -1, 'message': ''} @@ -686,6 +684,8 @@ def query_done_cb(request, server_id): """ Utils """ + + def massage_columns_for_json(cols): massaged_cols = [] for column in cols: @@ -885,7 +885,7 @@ def _list_designs(user, querydict, page_size, prefix="", is_trashed=False): sort_dir, sort_attr = DEFAULT_SORT db_queryset = db_queryset.order_by(sort_dir + SORT_ATTR_TRANSLATION[sort_attr]) - designs = [job.content_object for job in db_queryset.all() if job.content_object and job.content_object.is_auto == False] + designs = [job.content_object for job in db_queryset.all() if job.content_object and job.content_object.is_auto is False] pagenum = int(querydict.get(prefix + 'page', 1)) paginator = Paginator(designs, page_size, allow_empty_first_page=True) @@ -972,6 +972,7 @@ def parse_out_jobs(log, engine='mr', with_state=False): return ret + def parse_out_queries(log, engine=None, with_state=False): """ Ideally, Hive would tell us what jobs it has run directly from the Thrift interface. @@ -1013,6 +1014,7 @@ def parse_out_queries(log, engine=None, with_state=False): return ret + def _copy_prefix(prefix, base_dict): """Copy keys starting with ``prefix``""" querydict = QueryDict(None, mutable=True) @@ -1154,6 +1156,8 @@ def get_db_choices(request): return [(db, db) for db in dbs] -WHITESPACE = re.compile("\s+", re.MULTILINE) +WHITESPACE = re.compile(r"\s+", re.MULTILINE) + + def collapse_whitespace(s): return WHITESPACE.sub(" ", s).strip() diff --git a/apps/beeswax/src/beeswax/views_tests.py b/apps/beeswax/src/beeswax/views_tests.py index a0306e7bd97..1caafedeefe 100644 --- a/apps/beeswax/src/beeswax/views_tests.py +++ b/apps/beeswax/src/beeswax/views_tests.py @@ -16,22 +16,17 @@ # See the License for the specific language governing permissions and # limitations under the License. -import logging -import json import sys +import json +import logging +from unittest.mock import patch -from django.urls import reverse import pytest +from django.urls import reverse from desktop.lib.django_test_util import make_logged_in_client from useradmin.models import User -if sys.version_info[0] > 2: - from unittest.mock import patch, Mock, MagicMock -else: - from mock import patch, Mock, MagicMock - - LOG = logging.getLogger() @@ -42,7 +37,6 @@ def setup_method(self): self.client = make_logged_in_client(username="test", groupname="default", recreate=True, is_superuser=True, is_admin=True) self.user = User.objects.get(username="test") - def test_install_via_insert_mysql(self): with patch('beeswax.views.beeswax_install_examples.SampleTable') as SampleTable: diff --git a/apps/filebrowser/src/filebrowser/api.py b/apps/filebrowser/src/filebrowser/api.py index 8da74bebfb8..bd501902798 100644 --- a/apps/filebrowser/src/filebrowser/api.py +++ b/apps/filebrowser/src/filebrowser/api.py @@ -28,7 +28,7 @@ from desktop.lib.django_util import JsonResponse from desktop.lib.fs.gc.gs import get_gs_home_directory from desktop.lib.fs.ozone.ofs import get_ofs_home_directory -from desktop.lib.i18n import smart_unicode +from desktop.lib.i18n import smart_str from filebrowser.views import _normalize_path LOG = logging.getLogger() @@ -42,7 +42,7 @@ def decorator(*args, **kwargs): except Exception as e: LOG.exception('Error running %s' % view_fn) response['status'] = -1 - response['message'] = smart_unicode(e) + response['message'] = smart_str(e) return JsonResponse(response) return decorator diff --git a/apps/filebrowser/src/filebrowser/conf.py b/apps/filebrowser/src/filebrowser/conf.py index 960390cc0ad..80d3895dbf7 100644 --- a/apps/filebrowser/src/filebrowser/conf.py +++ b/apps/filebrowser/src/filebrowser/conf.py @@ -15,17 +15,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -import os -import sys +from django.utils.translation import gettext_lazy as _ from desktop.conf import ENABLE_DOWNLOAD, is_oozie_enabled from desktop.lib.conf import Config, coerce_bool -if sys.version_info[0] > 2: - from django.utils.translation import gettext_lazy as _ -else: - from django.utils.translation import ugettext_lazy as _ - MAX_SNAPPY_DECOMPRESSION_SIZE = Config( key="max_snappy_decompression_size", help=_("Max snappy decompression size in bytes."), diff --git a/apps/filebrowser/src/filebrowser/forms.py b/apps/filebrowser/src/filebrowser/forms.py index 7d75b975319..8daacb76178 100644 --- a/apps/filebrowser/src/filebrowser/forms.py +++ b/apps/filebrowser/src/filebrowser/forms.py @@ -15,35 +15,24 @@ # See the License for the specific language governing permissions and # limitations under the License. -from future import standard_library -standard_library.install_aliases() -from builtins import zip -from builtins import range import logging -import sys -import urllib.request, urllib.error +import urllib.error +import urllib.request +from urllib.parse import unquote as urllib_unquote from django import forms -from django.forms import FileField, CharField, BooleanField, Textarea -from django.forms.formsets import formset_factory, BaseFormSet +from django.forms import BooleanField, CharField, FileField, Textarea +from django.forms.formsets import BaseFormSet, formset_factory +from django.utils.translation import gettext_lazy as _ from aws.s3 import S3A_ROOT, normpath as s3_normpath from azure.abfs.__init__ import ABFS_ROOT, normpath as abfs_normpath -from desktop.lib.fs.ozone import OFS_ROOT, normpath as ofs_normpath -from desktop.lib.fs.gc import GS_ROOT, normpath as gs_normpath from desktop.lib import i18n -from hadoop.fs import normpath -from useradmin.models import User, Group - +from desktop.lib.fs.gc import GS_ROOT, normpath as gs_normpath +from desktop.lib.fs.ozone import OFS_ROOT, normpath as ofs_normpath from filebrowser.lib import rwx - -if sys.version_info[0] > 2: - from urllib.parse import unquote as urllib_unquote - from django.utils.translation import gettext_lazy as _ -else: - from urllib import unquote as urllib_unquote - from django.utils.translation import ugettext_lazy as _ - +from hadoop.fs import normpath +from useradmin.models import Group, User logger = logging.getLogger() @@ -108,31 +97,39 @@ def clean_encoding(self): return i18n.get_site_encoding() return encoding + class RenameForm(forms.Form): op = "rename" src_path = CharField(label=_("File to rename"), help_text=_("The file to rename.")) dest_path = CharField(label=_("New name"), help_text=_("Rename the file to:")) + class BaseRenameFormSet(FormSet): op = "rename" + RenameFormSet = formset_factory(RenameForm, formset=BaseRenameFormSet, extra=0) + class CopyForm(forms.Form): op = "copy" src_path = CharField(label=_("File to copy"), help_text=_("The file to copy.")) dest_path = CharField(label=_("Destination location"), help_text=_("Copy the file to:")) + class BaseCopyFormSet(FormSet): op = "copy" + CopyFormSet = formset_factory(CopyForm, formset=BaseCopyFormSet, extra=0) + class SetReplicationFactorForm(forms.Form): op = "setreplication" src_path = CharField(label=_("File to set replication factor"), help_text=_("The file to set replication factor.")) replication_factor = CharField(label=_("Value of replication factor"), help_text=_("The value of replication factor.")) + class UploadFileForm(forms.Form): op = "upload" # The "hdfs" prefix in "hdfs_file" triggers the HDFSfileUploadHandler @@ -140,54 +137,68 @@ class UploadFileForm(forms.Form): dest = PathField(label=_("Destination Path"), help_text=_("Filename or directory to upload to."), required=False) # Used actually? extract_archive = BooleanField(required=False) + class UploadLocalFileForm(forms.Form): op = "upload" file = FileField(label=_("File to Upload")) + class UploadArchiveForm(forms.Form): op = "upload" archive = FileField(label=_("Archive to Upload")) dest = PathField(label=_("Destination Path"), help_text=_("Archive to upload to.")) + class RemoveForm(forms.Form): op = "remove" path = PathField(label=_("File to remove")) + class RmDirForm(forms.Form): op = "rmdir" path = PathField(label=_("Directory to remove")) + class RmTreeForm(forms.Form): op = "rmtree" path = PathField(label=_("Directory to remove (recursively)")) + class BaseRmTreeFormset(FormSet): op = "rmtree" + RmTreeFormSet = formset_factory(RmTreeForm, formset=BaseRmTreeFormset, extra=0) + class RestoreForm(forms.Form): op = "rmtree" path = PathField(label=_("Path to restore")) + class BaseRestoreFormset(FormSet): op = "restore" + RestoreFormSet = formset_factory(RestoreForm, formset=BaseRestoreFormset, extra=0) + class TrashPurgeForm(forms.Form): op = "purge_trash" + class MkDirForm(forms.Form): op = "mkdir" path = PathField(label=_("Path in which to create the directory")) name = PathField(label=_("Directory Name")) + class TouchForm(forms.Form): op = "touch" path = PathField(label=_("Path in which to create the file")) name = PathField(label=_("File Name")) + class ChownForm(forms.Form): op = "chown" path = PathField(label=_("Path to change user/group ownership")) @@ -205,11 +216,14 @@ def __init__(self, *args, **kwargs): self.all_groups = [group.name for group in Group.objects.all()] self.all_users = [user.username for user in User.objects.all()] + class BaseChownFormSet(FormSet): op = "chown" + ChownFormSet = formset_factory(ChownForm, formset=BaseChownFormSet, extra=0) + class ChmodForm(forms.Form): op = "chmod" path = PathField(label=_("Path to change permissions")) @@ -252,7 +266,9 @@ def full_clean(self): if hasattr(self, "cleaned_data"): self.cleaned_data["mode"] = rwx.compress_mode([self.cleaned_data[name] for name in self.names]) + class BaseChmodFormSet(FormSet): op = "chmod" + ChmodFormSet = formset_factory(ChmodForm, formset=BaseChmodFormSet, extra=0) diff --git a/apps/filebrowser/src/filebrowser/lib/archives.py b/apps/filebrowser/src/filebrowser/lib/archives.py index 3e68c7d7f15..21a0bdf1919 100644 --- a/apps/filebrowser/src/filebrowser/lib/archives.py +++ b/apps/filebrowser/src/filebrowser/lib/archives.py @@ -17,24 +17,19 @@ # # Utilities for dealing with file modes. -from past.builtins import basestring -from builtins import object -import bz2 import os -import posixpath -import sys +import bz2 import tarfile import tempfile - -from desktop.lib.exceptions_renderable import PopupException -from filebrowser.conf import ARCHIVE_UPLOAD_TEMPDIR +import posixpath +from builtins import object from zipfile import ZipFile -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ +from django.utils.translation import gettext as _ +from past.builtins import basestring +from desktop.lib.exceptions_renderable import PopupException +from filebrowser.conf import ARCHIVE_UPLOAD_TEMPDIR __all__ = ['archive_factory'] @@ -65,6 +60,7 @@ def _create_dirs(self, basepath, dirs=[]): except OSError: pass + class ZipArchive(Archive): """ Acts on a zip file in memory or in a temporary location. @@ -72,10 +68,8 @@ class ZipArchive(Archive): """ def __init__(self, file): - if sys.version_info[0] > 2: - self.file = isinstance(file, basestring) and file - else: - self.file = isinstance(file, basestring) and open(file) or file + self.file = isinstance(file, basestring) and file + self.zfh = ZipFile(self.file) def extract(self): @@ -248,6 +242,7 @@ def archive_factory(path, archive_type='zip'): elif archive_type == 'bz2' or archive_type == 'bzip2': return BZ2Archive(path) + class IllegalPathException(PopupException): def __init__(self): diff --git a/apps/filebrowser/src/filebrowser/lib/xxd_test.py b/apps/filebrowser/src/filebrowser/lib/xxd_test.py index dfd51ca0c5b..38ca2aa4da9 100644 --- a/apps/filebrowser/src/filebrowser/lib/xxd_test.py +++ b/apps/filebrowser/src/filebrowser/lib/xxd_test.py @@ -15,37 +15,29 @@ # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import absolute_import -from future import standard_library -standard_library.install_aliases() -from builtins import range -import unittest -import logging -import pytest import random -import sys +import logging +import unittest import subprocess +from io import StringIO as string_io +from subprocess import PIPE, Popen -from filebrowser.lib import xxd - +import pytest from django.test import TestCase -from subprocess import Popen, PIPE -if sys.version_info[0] > 2: - from io import StringIO as string_io -else: - from cStringIO import StringIO as string_io +from filebrowser.lib import xxd LOG = logging.getLogger() -LENGTH = 1024*10 # 10KB +LENGTH = 1024 * 10 # 10KB + class XxdTest(TestCase): def test_mask_not_alphanumeric(self): - assert (1, ". X") == xxd.mask_not_alphanumeric("\n X") + assert (1, ". X") == xxd.mask_not_alphanumeric("\n X") def test_mask_not_printable(self): - assert (2, "..@") == xxd.mask_not_alphanumeric("\xff\x90\x40") + assert (2, "..@") == xxd.mask_not_alphanumeric("\xff\x90\x40") def _get_offset_width(self, line): offset, match, _ = line.partition(":") @@ -103,5 +95,6 @@ def test_compare_to_xxd(self): xxd.main(string_io(random_text), output) self._verify_content(stdin, output.getvalue()) + if __name__ == "__main__": unittest.main() diff --git a/apps/filebrowser/src/filebrowser/urls.py b/apps/filebrowser/src/filebrowser/urls.py index fec525c9aa6..c2ebc778b5d 100644 --- a/apps/filebrowser/src/filebrowser/urls.py +++ b/apps/filebrowser/src/filebrowser/urls.py @@ -15,15 +15,10 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys +from django.urls import re_path from filebrowser import api as filebrowser_api, utils as filebrowser_utils, views as filebrowser_views -if sys.version_info[0] > 2: - from django.urls import re_path -else: - from django.conf.urls import url as re_path - urlpatterns = [ # Base view re_path(r'^$', filebrowser_views.index, name='index'), diff --git a/apps/filebrowser/src/filebrowser/views.py b/apps/filebrowser/src/filebrowser/views.py index b466d96d2f9..386791e5f59 100644 --- a/apps/filebrowser/src/filebrowser/views.py +++ b/apps/filebrowser/src/filebrowser/views.py @@ -15,6 +15,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import io import os import re import sys @@ -26,11 +27,15 @@ import posixpath import urllib.error import urllib.request -from builtins import object +from builtins import object, str as new_str from bz2 import decompress from datetime import datetime from functools import partial +from gzip import decompress as decompress_gzip +from io import StringIO as string_io +from urllib.parse import quote as urllib_quote, unquote as urllib_unquote, urlparse as lib_urlparse +from avro import datafile, io from django.core.files.uploadhandler import FileUploadHandler, StopFutureHandlers, StopUpload from django.core.paginator import EmptyPage, InvalidPage, Page, Paginator from django.http import Http404, HttpResponse, HttpResponseForbidden, HttpResponseNotModified, HttpResponseRedirect, StreamingHttpResponse @@ -39,6 +44,7 @@ from django.urls import reverse from django.utils.html import escape from django.utils.http import http_date +from django.utils.translation import gettext as _ from django.views.decorators.csrf import csrf_exempt from django.views.decorators.http import require_http_methods from django.views.static import was_modified_since @@ -100,28 +106,6 @@ from hadoop.fs.upload import HDFSFineUploaderChunkedUpload, LocalFineUploaderChunkedUpload from useradmin.models import Group, User -if sys.version_info[0] > 2: - import io - from builtins import str as new_str - from gzip import decompress as decompress_gzip - from io import StringIO as string_io - from urllib.parse import quote as urllib_quote, unquote as urllib_unquote, urlparse as lib_urlparse - - from avro import datafile, io - from django.utils.translation import gettext as _ -else: - from urllib import quote as urllib_quote, unquote as urllib_unquote - - from cStringIO import StringIO as string_io - from urlparse import urlparse as lib_urlparse - new_str = unicode - from gzip import GzipFile - - import parquet - from avro import datafile, io - from django.utils.translation import ugettext as _ - - DEFAULT_CHUNK_SIZE_BYTES = 1024 * 4 # 4KB MAX_CHUNK_SIZE_BYTES = 1024 * 1024 # 1MB @@ -825,24 +809,17 @@ def display(request, path): # Get contents as string for text mode, or at least try uni_contents = None if not mode or mode == 'text': - if sys.version_info[0] > 2: - if not isinstance(contents, str): - uni_contents = new_str(contents, encoding, errors='replace') - is_binary = uni_contents.find(i18n.REPLACEMENT_CHAR) != -1 - # Auto-detect mode - if not mode: - mode = is_binary and 'binary' or 'text' - else: - # We already have a string. - uni_contents = contents - is_binary = False - mode = 'text' - else: + if not isinstance(contents, str): uni_contents = new_str(contents, encoding, errors='replace') is_binary = uni_contents.find(i18n.REPLACEMENT_CHAR) != -1 # Auto-detect mode if not mode: mode = is_binary and 'binary' or 'text' + else: + # We already have a string. + uni_contents = contents + is_binary = False + mode = 'text' # Get contents as bytes if mode == "binary": @@ -1019,10 +996,7 @@ def _read_gzip(fhandle, path, offset, length, stats): if offset and offset != 0: raise PopupException(_("Offsets are not supported with Gzip compression.")) try: - if sys.version_info[0] > 2: - contents = decompress_gzip(fhandle.read()) - else: - contents = GzipFile('', 'r', 0, string_io(fhandle.read())).read(length) + contents = decompress_gzip(fhandle.read()) except Exception as e: logging.exception('Could not decompress file at "%s": %s' % (path, e)) raise PopupException(_("Failed to decompress file.")) @@ -1052,27 +1026,18 @@ def _read_simple(fhandle, path, offset, length, stats): def detect_gzip(contents): '''This is a silly small function which checks to see if the file is Gzip''' - if sys.version_info[0] > 2: - return contents[:2] == b'\x1f\x8b' - else: - return contents[:2] == '\x1f\x8b' + return contents[:2] == b'\x1f\x8b' def detect_bz2(contents): '''This is a silly small function which checks to see if the file is Bz2''' - if sys.version_info[0] > 2: - return contents[:3] == b'BZh' - else: - return contents[:3] == 'BZh' + return contents[:3] == b'BZh' def detect_avro(contents): '''This is a silly small function which checks to see if the file is Avro''' # Check if the first three bytes are 'O', 'b' and 'j' - if sys.version_info[0] > 2: - return contents[:3] == b'\x4F\x62\x6A' - else: - return contents[:3] == '\x4F\x62\x6A' + return contents[:3] == b'\x4F\x62\x6A' def detect_snappy(contents): diff --git a/apps/hbase/src/hbase/api.py b/apps/hbase/src/hbase/api.py index 6c640778597..9844205aabf 100644 --- a/apps/hbase/src/hbase/api.py +++ b/apps/hbase/src/hbase/api.py @@ -15,27 +15,25 @@ # See the License for the specific language governing permissions and # limitations under the License. -from builtins import range -from builtins import object -import json -import logging import re import csv -import sys +import json +import logging from django.utils.encoding import smart_str +from django.utils.translation import gettext as _ from desktop.lib import thrift_util from desktop.lib.exceptions_renderable import PopupException - from hbase import conf -from hbase.hbase_site import get_server_principal, get_server_authentication, is_using_thrift_ssl, is_using_thrift_http, get_thrift_transport -from hbase.server.hbase_lib import get_thrift_type, get_client_type - -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ +from hbase.hbase_site import ( + get_server_authentication, + get_server_principal, + get_thrift_transport, + is_using_thrift_http, + is_using_thrift_ssl, +) +from hbase.server.hbase_lib import get_client_type, get_thrift_type LOG = logging.getLogger() @@ -68,12 +66,12 @@ def getClusters(self): clusters = [] try: full_config = json.loads(conf.HBASE_CLUSTERS.get().replace("'", "\"")) - except: + except Exception: LOG.debug('Failed to read HBase cluster configuration as JSON, falling back to raw configuration.') - full_config = [conf.HBASE_CLUSTERS.get()] #hack cause get() is weird + full_config = [conf.HBASE_CLUSTERS.get()] # hack cause get() is weird for config in full_config: - match = re.match('\((?P[^\(\)\|]+)\|(?P.+):(?P[0-9]+)\)', config) + match = re.match(r'\((?P[^\(\)\|]+)\|(?P.+):(?P[0-9]+)\)', config) if match: clusters += [{ 'name': match.group('name'), @@ -90,7 +88,7 @@ def getCluster(self, name): for cluster in clusters: if cluster["name"] == name: return cluster - except: + except Exception: LOG.exception('failed to get the cluster %s' % name) raise PopupException(_("Cluster by the name of %s does not exist in configuration.") % name) @@ -149,7 +147,7 @@ def getTableList(self, cluster): def getRows(self, cluster, tableName, columns, startRowKey, numRows, prefix=False): client = self.connectCluster(cluster) - if prefix == False: + if prefix is False: scanner = client.scannerOpen(tableName, smart_str(startRowKey), columns, None, doas=self.user.username) else: scanner = client.scannerOpenWithPrefix(tableName, smart_str(startRowKey), columns, None, doas=self.user.username) @@ -193,7 +191,7 @@ def getRowPartial(self, cluster, tableName, rowKey, offset, number): def deleteColumns(self, cluster, tableName, row, columns): client = self.connectCluster(cluster) Mutation = get_thrift_type('Mutation') - mutations = [Mutation(isDelete = True, column=smart_str(column)) for column in columns] + mutations = [Mutation(isDelete=True, column=smart_str(column)) for column in columns] return client.mutateRow(tableName, smart_str(row), mutations, None, doas=self.user.username) def deleteColumn(self, cluster, tableName, row, column): @@ -209,7 +207,7 @@ def putRow(self, cluster, tableName, row, data): Mutation = get_thrift_type('Mutation') for column in list(data.keys()): value = smart_str(data[column]) if data[column] is not None else None - mutations.append(Mutation(column=smart_str(column), value=value)) # must use str for API, does thrift coerce by itself? + mutations.append(Mutation(column=smart_str(column), value=value)) # must use str for API, does thrift coerce by itself? return client.mutateRow(tableName, smart_str(row), mutations, None, doas=self.user.username) def putColumn(self, cluster, tableName, row, column, value=None): @@ -225,8 +223,8 @@ def getRowQuerySet(self, cluster, tableName, columns, queries): aggregate_data = [] limit = conf.TRUNCATE_LIMIT.get() if not isinstance(queries, list): - queries=json.loads(queries) - queries = sorted(queries, key=lambda query: query['scan_length']) #sort by scan length + queries = json.loads(queries) + queries = sorted(queries, key=lambda query: query['scan_length']) # sort by scan length for query in queries: scan_length = int(query['scan_length']) if query['row_key'] == "null": diff --git a/apps/hbase/src/hbase/conf.py b/apps/hbase/src/hbase/conf.py index e387cf5e5ac..aefbbb06fb1 100644 --- a/apps/hbase/src/hbase/conf.py +++ b/apps/hbase/src/hbase/conf.py @@ -16,19 +16,17 @@ # limitations under the License. from __future__ import print_function -import logging + import os import sys +import logging + +from django.utils.translation import gettext as _, gettext_lazy as _t from desktop.conf import default_ssl_validate -from desktop.lib.conf import Config, validate_thrift_transport, coerce_bool +from desktop.lib.conf import Config, coerce_bool, validate_thrift_transport from hbase.hbase_site import get_thrift_transport -if sys.version_info[0] > 2: - from django.utils.translation import gettext_lazy as _t, gettext as _ -else: - from django.utils.translation import ugettext_lazy as _t, ugettext as _ - LOG = logging.getLogger() @@ -50,7 +48,7 @@ THRIFT_TRANSPORT = Config( key="thrift_transport", default="buffered", - help=_t("Should come from hbase-site.xml, do not set. 'framed' is used to chunk up responses, used with the nonblocking server in Thrift but is not supported in Hue." + help=_t("Should come from hbase-site.xml, do not set. 'framed' is used to chunk up responses, used with the nonblocking server in Thrift but is not supported in Hue." # noqa: E501 "'buffered' used to be the default of the HBase Thrift Server. Default is buffered when not set in hbase-site.xml."), type=str ) @@ -64,7 +62,7 @@ # Hidden, just for making patching of older version of Hue easier. To remove in Hue 4. USE_DOAS = Config( key='use_doas', - help=_t('Should come from hbase-site.xml, do not set. Force Hue to use Http Thrift mode with doas impersonation, regarless of hbase-site.xml properties.'), + help=_t('Should come from hbase-site.xml, do not set. Force Hue to use Http Thrift mode with doas impersonation, regarless of hbase-site.xml properties.'), # noqa: E501 default=False, type=coerce_bool ) @@ -84,9 +82,9 @@ def config_validator(user): from hbase.settings import NICE_NAME try: - if not 'test' in sys.argv: # Avoid tests hanging + if 'test' not in sys.argv: # Avoid tests hanging api = HbaseApi(user=user) - cluster_name = api.getClusters()[0]['name'] # Currently pick first configured cluster + cluster_name = api.getClusters()[0]['name'] # Currently pick first configured cluster # Check connectivity api.connectCluster(cluster_name) api.getTableList(cluster_name) @@ -104,8 +102,6 @@ def config_validator(user): LOG.exception(msg) res.append((NICE_NAME, _(msg))) - - res.extend(validate_thrift_transport(THRIFT_TRANSPORT)) return res diff --git a/apps/hbase/src/hbase/hbase_site.py b/apps/hbase/src/hbase/hbase_site.py index 6da217abfa4..de8e569f3c9 100644 --- a/apps/hbase/src/hbase/hbase_site.py +++ b/apps/hbase/src/hbase/hbase_site.py @@ -18,15 +18,9 @@ import errno import logging import os.path -import sys -from hadoop import confparse from desktop.lib.security_util import get_components - -if sys.version_info[0] > 2: - open_file = open -else: - open_file = file +from hadoop import confparse LOG = logging.getLogger() @@ -44,7 +38,6 @@ _CNF_HBASE_USE_THRIFT_SSL = 'hbase.thrift.ssl.enabled' - def reset(): global SITE_DICT SITE_DICT = None @@ -67,6 +60,7 @@ def get_server_principal(): def get_server_authentication(): return get_conf().get(_CNF_HBASE_AUTHENTICATION, 'NOSASL').upper() + def get_thrift_transport(): use_framed = get_conf().get(_CNF_HBASE_REGIONSERVER_THRIFT_FRAMED) if use_framed is not None: @@ -75,20 +69,23 @@ def get_thrift_transport(): else: return "buffered" else: - #Avoid circular import + # Avoid circular import from hbase.conf import THRIFT_TRANSPORT return THRIFT_TRANSPORT.get() + def is_impersonation_enabled(): - #Avoid circular import + # Avoid circular import from hbase.conf import USE_DOAS return get_conf().get(_CNF_HBASE_IMPERSONATION_ENABLED, 'FALSE').upper() == 'TRUE' or USE_DOAS.get() + def is_using_thrift_http(): - #Avoid circular import + # Avoid circular import from hbase.conf import USE_DOAS return get_conf().get(_CNF_HBASE_USE_THRIFT_HTTP, 'FALSE').upper() == 'TRUE' or USE_DOAS.get() + def is_using_thrift_ssl(): return get_conf().get(_CNF_HBASE_USE_THRIFT_SSL, 'FALSE').upper() == 'TRUE' @@ -97,11 +94,11 @@ def _parse_site(): global SITE_DICT global SITE_PATH - #Avoid circular import + # Avoid circular import from hbase.conf import HBASE_CONF_DIR SITE_PATH = os.path.join(HBASE_CONF_DIR.get(), 'hbase-site.xml') try: - data = open_file(SITE_PATH, 'r').read() + data = open(SITE_PATH, 'r').read() except IOError as err: if err.errno != errno.ENOENT: LOG.error('Cannot read from "%s": %s' % (SITE_PATH, err)) @@ -109,4 +106,3 @@ def _parse_site(): data = "" SITE_DICT = confparse.ConfParse(data) - diff --git a/apps/hbase/src/hbase/management/commands/hbase_setup.py b/apps/hbase/src/hbase/management/commands/hbase_setup.py index 63e5d867016..b3f0a96505f 100644 --- a/apps/hbase/src/hbase/management/commands/hbase_setup.py +++ b/apps/hbase/src/hbase/management/commands/hbase_setup.py @@ -15,24 +15,18 @@ # See the License for the specific language governing permissions and # limitations under the License. -import logging import os import sys - +import logging from datetime import datetime, timedelta from django.core.management.base import BaseCommand +from django.utils.translation import gettext as _ +from hbased.ttypes import AlreadyExists from desktop.lib.paths import get_apps_root -from useradmin.models import install_sample_user, User - -from hbased.ttypes import AlreadyExists from hbase.api import HbaseApi - -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ +from useradmin.models import User, install_sample_user LOG = logging.getLogger() @@ -50,7 +44,7 @@ def handle(self, *args, **options): user = install_sample_user() api = HbaseApi(user=user) - cluster_name = api.getClusters()[0]['name'] # Currently pick first configured cluster + cluster_name = api.getClusters()[0]['name'] # Currently pick first configured cluster # Check connectivity api.connectCluster(cluster_name) @@ -61,10 +55,9 @@ def handle(self, *args, **options): self.create_binary_table(api, cluster_name) self.load_binary_table(api, cluster_name) - def create_analytics_table(self, api, cluster_name): try: - api.createTable(cluster_name, 'analytics_demo', [{'properties': {'name': 'hour'}}, {'properties': {'name': 'day'}}, {'properties': {'name': 'total'}}]) + api.createTable(cluster_name, 'analytics_demo', [{'properties': {'name': 'hour'}}, {'properties': {'name': 'day'}}, {'properties': {'name': 'total'}}]) # noqa: E501 except AlreadyExists: pass diff --git a/apps/hbase/src/hbase/tests.py b/apps/hbase/src/hbase/tests.py index 18c26af49f1..942db744b6d 100644 --- a/apps/hbase/src/hbase/tests.py +++ b/apps/hbase/src/hbase/tests.py @@ -15,30 +15,31 @@ # See the License for the specific language governing permissions and # limitations under the License. -from builtins import object -import json import os -import shutil import sys +import json +import shutil import tempfile -import pytest +from builtins import object -from django.urls import reverse +import pytest from django.test import TestCase +from django.urls import reverse from desktop.lib.django_test_util import make_logged_in_client -from desktop.lib.test_utils import grant_access, add_to_group +from desktop.lib.test_utils import add_to_group, grant_access from hadoop.pseudo_hdfs4 import is_live_cluster -from useradmin.models import User - from hbase.api import HbaseApi from hbase.conf import HBASE_CONF_DIR -from hbase.hbase_site import get_server_authentication, get_server_principal, get_conf, reset, _CNF_HBASE_IMPERSONATION_ENABLED, is_impersonation_enabled - -if sys.version_info[0] > 2: - open_file = open -else: - open_file = file +from hbase.hbase_site import ( + _CNF_HBASE_IMPERSONATION_ENABLED, + get_conf, + get_server_authentication, + get_server_principal, + is_impersonation_enabled, + reset, +) +from useradmin.models import User def test_security_plain(): @@ -47,7 +48,7 @@ def test_security_plain(): try: xml = hbase_site_xml() - open_file(os.path.join(tmpdir, 'hbase-site.xml'), 'w').write(xml) + open(os.path.join(tmpdir, 'hbase-site.xml'), 'w').write(xml) reset() assert 'NOSASL' == get_server_authentication() @@ -56,7 +57,7 @@ def test_security_plain(): security = HbaseApi._get_security() assert 'test' == security['kerberos_principal_short_name'] - assert False == security['use_sasl'] + assert False is security['use_sasl'] finally: reset() finish() @@ -69,7 +70,7 @@ def test_security_kerberos(): try: xml = hbase_site_xml(authentication='kerberos') - open_file(os.path.join(tmpdir, 'hbase-site.xml'), 'w').write(xml) + open(os.path.join(tmpdir, 'hbase-site.xml'), 'w').write(xml) reset() assert 'KERBEROS' == get_server_authentication() @@ -78,17 +79,14 @@ def test_security_kerberos(): security = HbaseApi._get_security() assert 'test' == security['kerberos_principal_short_name'] - assert True == security['use_sasl'] + assert True is security['use_sasl'] finally: reset() finish() shutil.rmtree(tmpdir) -def hbase_site_xml( - kerberos_principal='test/test.com@TEST.COM', - authentication='NOSASL'): - +def hbase_site_xml(kerberos_principal='test/test.com@TEST.COM', authentication='NOSASL'): return """ @@ -113,6 +111,7 @@ def test_impersonation_is_decorator_is_there(): # Decorator is still there from hbased.Hbase import do_as + @pytest.mark.django_db def test_impersonation(): from hbased import Hbase as thrift_hbase @@ -130,24 +129,22 @@ def test_impersonation(): try: client.getTableNames(doas=user.username) except AttributeError: - pass # We don't mock everything + pass # We don't mock everything finally: get_conf()[_CNF_HBASE_IMPERSONATION_ENABLED] = impersonation_enabled assert {} == proto.get_headers() - get_conf()[_CNF_HBASE_IMPERSONATION_ENABLED] = 'TRUE' try: client.getTableNames(doas=user.username) except AttributeError: - pass # We don't mock everything + pass # We don't mock everything finally: get_conf()[_CNF_HBASE_IMPERSONATION_ENABLED] = impersonation_enabled - assert {'doAs': u'test_hbase'} == proto.get_headers() - + assert {'doAs': 'test_hbase'} == proto.get_headers() class MockHttpClient(object): @@ -157,10 +154,12 @@ def __init__(self): def setCustomHeaders(self, headers): self.headers = headers + class MockTransport(object): def __init__(self): self._TBufferedTransport__trans = MockHttpClient() + class MockProtocol(object): def __init__(self): self.trans = MockTransport() @@ -174,10 +173,8 @@ def get_headers(self): @pytest.mark.integration class TestIntegrationWithHBase(TestCase): - @classmethod def setup_class(cls): - if not is_live_cluster(): pytest.skip('These tests can only run on a live cluster') @@ -186,7 +183,6 @@ def setup_class(cls): add_to_group('test') grant_access("test", "test", "indexer") - def test_list_tables(self): if not is_live_cluster(): pytest.skip('HUE-2910: Skipping because test is not reentrant') diff --git a/apps/hbase/src/hbase/urls.py b/apps/hbase/src/hbase/urls.py index 4d91fc1f837..ff9b18e691c 100644 --- a/apps/hbase/src/hbase/urls.py +++ b/apps/hbase/src/hbase/urls.py @@ -17,12 +17,9 @@ import sys -from hbase import views as hbase_views +from django.urls import re_path -if sys.version_info[0] > 2: - from django.urls import re_path -else: - from django.conf.urls import url as re_path +from hbase import views as hbase_views urlpatterns = [ re_path(r'^$', hbase_views.app, name='index'), diff --git a/apps/hbase/src/hbase/views.py b/apps/hbase/src/hbase/views.py index 42fa93f1c3b..8b8431521aa 100644 --- a/apps/hbase/src/hbase/views.py +++ b/apps/hbase/src/hbase/views.py @@ -15,33 +15,25 @@ # See the License for the specific language governing permissions and # limitations under the License. -from future import standard_library -standard_library.install_aliases() -import base64 +import re import json +import base64 import logging -import re -import sys -import urllib.request, urllib.parse, urllib.error +import urllib.error +import urllib.parse +import urllib.request +from io import StringIO as string_io + +from django.utils.translation import gettext as _ from desktop.auth.backend import is_admin from desktop.lib.django_util import JsonResponse, render - from hbase import conf -from hbase.hbase_site import is_impersonation_enabled -from hbase.settings import DJANGO_APPS from hbase.api import HbaseApi +from hbase.hbase_site import is_impersonation_enabled from hbase.management.commands import hbase_setup from hbase.server.hbase_lib import get_thrift_type - -if sys.version_info[0] > 2: - from io import StringIO as string_io - from django.utils.translation import gettext as _ -else: - from cStringIO import StringIO as string_io - from avro import datafile, io - from django.utils.translation import ugettext as _ - +from hbase.settings import DJANGO_APPS LOG = logging.getLogger() @@ -49,42 +41,45 @@ def has_write_access(user): return is_admin(user) or user.has_hue_permission(action="write", app=DJANGO_APPS[0]) or is_impersonation_enabled() + def app(request): return render('app.mako', request, { 'can_write': has_write_access(request.user), 'is_embeddable': request.GET.get('is_embeddable', False), }) + # action/cluster/arg1/arg2/arg3... -def api_router(request, url): # On split, deserialize anything +def api_router(request, url): # On split, deserialize anything def safe_json_load(raw): try: return json.loads(re.sub(r'(?:\")([0-9]+)(?:\")', r'\1', str(raw))) - except: + except Exception: LOG.debug('Failed to parse input as JSON, falling back to raw input.') return raw def deserialize(data): - if type(data) == dict: + if type(data) is dict: special_type = get_thrift_type(data.pop('hue-thrift-type', '')) if special_type: return special_type(data) if hasattr(data, "__iter__"): for i, item in enumerate(data): - data[i] = deserialize(item) # Sets local binding, needs to set in data + data[i] = deserialize(item) # Sets local binding, needs to set in data return data decoded_url_params = [urllib.parse.unquote(arg) for arg in re.split(r'(? 2: - from django.urls import re_path -else: - from django.conf.urls import url as re_path +from help import views as help_views urlpatterns = [ re_path(r'^$', help_views.view, {"app": "desktop", "path": "/index.html"}), diff --git a/apps/help/src/help/views.py b/apps/help/src/help/views.py index 13052e386e0..da6e7d3cefa 100644 --- a/apps/help/src/help/views.py +++ b/apps/help/src/help/views.py @@ -15,30 +15,26 @@ # See the License for the specific language governing permissions and # limitations under the License. -from future import standard_library -standard_library.install_aliases() -from builtins import str -from desktop.lib.django_util import render -from desktop.lib.exceptions_renderable import PopupException -from desktop import appmanager -from hadoop.fs import LocalSubFileSystem +import os +import urllib.error +import urllib.parse +import urllib.request import markdown -import urllib.request, urllib.parse, urllib.error -import os -import sys -if sys.version_info[0] > 2: - open_file = open -else: - open_file = file +from desktop import appmanager +from desktop.lib.django_util import render +from desktop.lib.exceptions_renderable import PopupException +from hadoop.fs import LocalSubFileSystem INDEX_FILENAMES = ("index.md", "index.html", "index.txt") + def _unquote_path(path): """Normalizes paths.""" return urllib.parse.unquote(path) + def get_help_fs(app_name): """ Creates a local file system for a given app's help directory. @@ -51,6 +47,7 @@ def get_help_fs(app_name): else: raise PopupException("App '%s' is not loaded, so no help is available for it!" % app_name) + def view(request, app, path): """ Views and renders a file at a given path. @@ -88,8 +85,8 @@ def view(request, app, path): data = { 'content': content, - 'apps': sorted([ x for x in appmanager.DESKTOP_MODULES if x.help_dir ], - key = lambda app: app.menu_index), + 'apps': sorted([x for x in appmanager.DESKTOP_MODULES if x.help_dir], + key=lambda app: app.menu_index), 'title': appmanager.get_desktop_module(app).nice_name, 'current': app, 'is_embeddable': request.GET.get('is_embeddable', False), diff --git a/apps/hive/src/hive/conf.py b/apps/hive/src/hive/conf.py index 85cd1a2be84..8538ded37ff 100644 --- a/apps/hive/src/hive/conf.py +++ b/apps/hive/src/hive/conf.py @@ -15,19 +15,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import logging import sys +import logging -import beeswax.hive_site +from django.utils.translation import gettext as _, gettext_lazy as _t +import beeswax.hive_site +from beeswax.settings import NICE_NAME from desktop.conf import has_connectors from desktop.lib.exceptions import StructuredThriftTransportException -from beeswax.settings import NICE_NAME - -if sys.version_info[0] > 2: - from django.utils.translation import gettext_lazy as _t, gettext as _ -else: - from django.utils.translation import ugettext_lazy as _t, ugettext as _ LOG = logging.getLogger() @@ -42,7 +38,7 @@ def config_validator(user): v1 All the configuration happens in apps/beeswax. ''' - from beeswax.design import hql_query # dbms is dependent on beeswax.conf, import in method to avoid circular dependency + from beeswax.design import hql_query # dbms is dependent on beeswax.conf, import in method to avoid circular dependency from beeswax.server import dbms res = [] @@ -52,7 +48,7 @@ def config_validator(user): try: try: - if not 'test' in sys.argv: # Avoid tests hanging + if 'test' not in sys.argv: # Avoid tests hanging server = dbms.get(user) query = hql_query("SELECT 'Hello World!';") handle = server.execute_and_wait(query, timeout_sec=10.0) @@ -73,9 +69,9 @@ def config_validator(user): res.append((NICE_NAME, _(msg))) try: - from desktop.lib.fsmanager import get_filesystem from aws.conf import is_enabled as is_s3_enabled from azure.conf import is_abfs_enabled + from desktop.lib.fsmanager import get_filesystem warehouse = beeswax.hive_site.get_metastore_warehouse_dir() fs = get_filesystem() fs_scheme = fs._get_scheme(warehouse) diff --git a/apps/hive/src/hive/tests.py b/apps/hive/src/hive/tests.py index 1476dc910ce..3ba8d8670a4 100644 --- a/apps/hive/src/hive/tests.py +++ b/apps/hive/src/hive/tests.py @@ -15,12 +15,12 @@ # See the License for the specific language governing permissions and # limitations under the License. -import aws +from unittest.mock import Mock, patch + import pytest -import sys +import aws from desktop.lib.django_test_util import make_logged_in_client -from unittest.mock import patch, Mock @pytest.mark.django_db @@ -32,7 +32,7 @@ def test_config_check(): 'default': { 'region': 'us-east-1', 'access_key_id': 'access_key_id', - 'secret_access_key':'secret_access_key' + 'secret_access_key': 'secret_access_key' } }), warehouse = 's3a://yingsdx0602/data1/warehouse/tablespace/managed/hive' @@ -61,8 +61,7 @@ def test_config_check(): err_msg = 'Failed to access Hive warehouse: %s' % warehouse if not isinstance(err_msg, bytes): err_msg = err_msg.encode('utf-8') - assert not err_msg in resp.content, resp + assert err_msg not in resp.content, resp finally: for old_conf in reset: old_conf() - diff --git a/apps/hive/src/hive/urls.py b/apps/hive/src/hive/urls.py index 24aff11822e..b63352b2f42 100644 --- a/apps/hive/src/hive/urls.py +++ b/apps/hive/src/hive/urls.py @@ -17,11 +17,7 @@ import sys -if sys.version_info[0] > 2: - from django.urls import re_path -else: - from django.conf.urls import url as re_path - +from django.urls import re_path urlpatterns = [ ] diff --git a/apps/impala/src/impala/api.py b/apps/impala/src/impala/api.py index da9efce2a9e..d3d7a9add34 100644 --- a/apps/impala/src/impala/api.py +++ b/apps/impala/src/impala/api.py @@ -15,41 +15,32 @@ # See the License for the specific language governing permissions and # limitations under the License. -## Main views are inherited from Beeswax. +# Main views are inherited from Beeswax. -import base64 -import logging +import sys import json +import base64 import struct -import sys +import logging +from django.utils.translation import gettext as _ from django.views.decorators.http import require_POST from beeswax.api import error_handler from beeswax.models import Session from beeswax.server import dbms as beeswax_dbms from beeswax.views import authorized_get_query_history - from desktop.lib.django_util import JsonResponse from desktop.lib.thrift_util import unpack_guid from desktop.models import Document2 - -from jobbrowser.apis.query_api import _get_api from impala import dbms -from impala.server import get_api as get_impalad_api, _get_impala_server_url - +from impala.server import _get_impala_server_url, get_api as get_impalad_api +from jobbrowser.apis.query_api import _get_api from libanalyze import analyze as analyzer, rules - from notebook.models import make_notebook -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - - LOG = logging.getLogger() -ANALYZER = rules.TopDownAnalysis() # We need to parse some files so save as global +ANALYZER = rules.TopDownAnalysis() # We need to parse some files so save as global @require_POST @@ -60,7 +51,7 @@ def invalidate(request): table = request.POST.get('table', None) flush_all = request.POST.get('flush_all', 'false').lower() == 'true' - query_server = dbms.get_query_server_config(connector=None) # TODO: connector support + query_server = dbms.get_query_server_config(connector=None) # TODO: connector support db = beeswax_dbms.get(request.user, query_server=query_server) response = {'status': 0, 'message': ''} @@ -128,6 +119,7 @@ def get_runtime_profile(request, query_history_id): return JsonResponse(response) + @require_POST @error_handler def alanize(request): @@ -150,15 +142,30 @@ def alanize(request): heatmap = {} summary = analyzer.summary(profile) - heatmapMetrics = ['AverageThreadTokens', 'BloomFilterBytes', 'PeakMemoryUsage', 'PerHostPeakMemUsage', 'PrepareTime', 'RowsProduced', 'TotalCpuTime', 'TotalNetworkReceiveTime', 'TotalNetworkSendTime', 'TotalStorageWaitTime', 'TotalTime'] + heatmapMetrics = [ + 'AverageThreadTokens', + 'BloomFilterBytes', + 'PeakMemoryUsage', + 'PerHostPeakMemUsage', + 'PrepareTime', + 'RowsProduced', + 'TotalCpuTime', + 'TotalNetworkReceiveTime', + 'TotalNetworkSendTime', + 'TotalStorageWaitTime', + 'TotalTime', + ] for key in heatmapMetrics: metrics = analyzer.heatmap_by_host(profile, key) if metrics['data']: heatmap[key] = metrics - response['data'] = { 'query': { 'healthChecks' : result[0]['result'], 'summary': summary, 'heatmap': heatmap, 'heatmapMetrics': sorted(list(heatmap.keys())) } } + response['data'] = { + 'query': {'healthChecks': result[0]['result'], 'summary': summary, 'heatmap': heatmap, 'heatmapMetrics': sorted(list(heatmap.keys()))} + } response['status'] = 0 return JsonResponse(response) + def alanize_metrics(request): response = {'status': -1} cluster = json.loads(request.POST.get('cluster', '{}')) @@ -176,6 +183,7 @@ def alanize_metrics(request): response['status'] = 0 return JsonResponse(response) + @require_POST @error_handler def alanize_fix(request): @@ -193,7 +201,7 @@ def alanize_fix(request): is_task=True, compute=cluster ) - response['details'] = { 'task': notebook.execute(request, batch=True) } + response['details'] = {'task': notebook.execute(request, batch=True)} response['status'] = 0 return JsonResponse(response) diff --git a/apps/impala/src/impala/api_tests.py b/apps/impala/src/impala/api_tests.py index 96693c72a81..8f346e34f56 100644 --- a/apps/impala/src/impala/api_tests.py +++ b/apps/impala/src/impala/api_tests.py @@ -15,23 +15,19 @@ # See the License for the specific language governing permissions and # limitations under the License. -from builtins import object +import sys import json import logging -import pytest -import sys +from builtins import object +from unittest.mock import Mock, patch -from django.urls import reverse +import pytest from django.test import TestCase +from django.urls import reverse from desktop.lib.django_test_util import make_logged_in_client from impala import conf -if sys.version_info[0] > 2: - from unittest.mock import patch, Mock -else: - from mock import patch, Mock - LOG = logging.getLogger() @@ -50,7 +46,7 @@ def test_invalidate(self): response = self.client.post(reverse("impala:invalidate"), { 'flush_all': False, - 'cluster': json.dumps({"credentials":{},"type":"direct","id":"default","name":"default"}), + 'cluster': json.dumps({"credentials": {}, "type": "direct", "id": "default", "name": "default"}), 'database': 'default', 'table': 'k8s_logs' } diff --git a/apps/impala/src/impala/conf.py b/apps/impala/src/impala/conf.py index bbfe37de22d..c6a83a3641d 100644 --- a/apps/impala/src/impala/conf.py +++ b/apps/impala/src/impala/conf.py @@ -15,25 +15,26 @@ # See the License for the specific language governing permissions and # limitations under the License. -import logging import os -import socket import sys +import socket +import logging + +from django.utils.translation import gettext as _, gettext_lazy as _t -from desktop.conf import default_ssl_cacerts, default_ssl_validate, AUTH_USERNAME as DEFAULT_AUTH_USERNAME, \ - AUTH_PASSWORD as DEFAULT_AUTH_PASSWORD, has_connectors -from desktop.lib.conf import ConfigSection, Config, coerce_bool, coerce_csv, coerce_password_from_script +from desktop.conf import ( + AUTH_PASSWORD as DEFAULT_AUTH_PASSWORD, + AUTH_USERNAME as DEFAULT_AUTH_USERNAME, + default_ssl_cacerts, + default_ssl_validate, + has_connectors, +) +from desktop.lib.conf import Config, ConfigSection, coerce_bool, coerce_csv, coerce_password_from_script from desktop.lib.exceptions import StructuredThriftTransportException from desktop.lib.paths import get_desktop_root - from impala.impala_flags import get_max_result_cache_size, is_impersonation_enabled, is_kerberos_enabled, is_webserver_spnego_enabled from impala.settings import NICE_NAME -if sys.version_info[0] > 2: - from django.utils.translation import gettext_lazy as _t, gettext as _ -else: - from django.utils.translation import ugettext_lazy as _t, ugettext as _ - LOG = logging.getLogger() @@ -177,16 +178,19 @@ ) ) + def get_auth_username(): """Get from top level default from desktop""" return DEFAULT_AUTH_USERNAME.get() + AUTH_USERNAME = Config( key="auth_username", help=_t("Auth username of the hue user used for authentications."), private=True, dynamic_default=get_auth_username) + def get_auth_password(): """Get from script or backward compatibility""" password = AUTH_PASSWORD_SCRIPT.get() @@ -195,6 +199,7 @@ def get_auth_password(): return DEFAULT_AUTH_PASSWORD.get() + AUTH_PASSWORD = Config( key="auth_password", help=_t("LDAP/PAM/.. password of the hue user used for authentications."), @@ -210,6 +215,7 @@ def get_auth_password(): default=None ) + def get_daemon_config(key): from metadata.conf import MANAGER from metadata.manager_client import ManagerApi @@ -219,18 +225,21 @@ def get_daemon_config(key): return None + def get_daemon_api_username(): """ Try to get daemon_api_username from Cloudera Manager API """ return get_daemon_config('webserver_htpassword_user') + def get_daemon_api_password(): """ Try to get daemon_api_password from Cloudera Manager API """ return get_daemon_config('webserver_htpassword_password') + DAEMON_API_PASSWORD = Config( key="daemon_api_password", help=_t("Password for Impala Daemon when username/password authentication is enabled for the Impala Daemon UI."), @@ -262,9 +271,11 @@ def get_daemon_api_password(): default="digest" ) + def get_use_sasl_default(): """kerberos enabled or password is specified""" - return is_kerberos_enabled() or AUTH_PASSWORD.get() is not None # Maps closely to legacy behavior + return is_kerberos_enabled() or AUTH_PASSWORD.get() is not None # Maps closely to legacy behavior + USE_SASL = Config( key="use_sasl", @@ -296,7 +307,7 @@ def config_validator(user): try: try: - if not 'test' in sys.argv: # Avoid tests hanging + if 'test' not in sys.argv: # Avoid tests hanging query_server = get_query_server_config(name='impala') server = dbms.get(user, query_server) query = hql_query("SELECT 'Hello World!';") diff --git a/apps/impala/src/impala/dbms.py b/apps/impala/src/impala/dbms.py index b25ce06c8d0..ddab83d3a30 100644 --- a/apps/impala/src/impala/dbms.py +++ b/apps/impala/src/impala/dbms.py @@ -15,28 +15,28 @@ # See the License for the specific language governing permissions and # limitations under the License. -import logging import sys +import logging + +from django.utils.translation import gettext as _ +from beeswax.design import hql_query +from beeswax.models import QUERY_TYPES +from beeswax.server import dbms +from beeswax.server.dbms import ( + HiveServer2Dbms, + QueryServerException, + QueryServerTimeoutException, + get_query_server_config as beeswax_query_server_config, + get_query_server_config_via_connector, +) from desktop.conf import CLUSTER_ID, has_connectors from desktop.lib.exceptions_renderable import PopupException from desktop.lib.i18n import smart_str from desktop.models import Cluster -from beeswax.design import hql_query -from beeswax.models import QUERY_TYPES -from beeswax.server import dbms -from beeswax.server.dbms import HiveServer2Dbms, QueryServerException, QueryServerTimeoutException, \ - get_query_server_config as beeswax_query_server_config, get_query_server_config_via_connector - from impala import conf from impala.impala_flags import get_hs2_http_port -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - - LOG = logging.getLogger() @@ -102,13 +102,11 @@ def get_nested_select(cls, database, table, column, nested=None): from_clause = '.'.join('`%s`' % token.strip('`') for token in from_tokens) return select_clause, from_clause - @classmethod def get_histogram_query(cls, database, table, column, nested=None): select_clause, from_clause = cls.get_nested_select(database, table, column, nested) return 'SELECT histogram(%s) FROM %s' % (select_clause, from_clause) - # Deprecated def invalidate(self, database=None, table=None, flush_all=False): handle = None @@ -145,7 +143,6 @@ def invalidate(self, database=None, table=None, flush_all=False): if handle: self.close(handle) - def refresh_table(self, database, table): handle = None try: @@ -159,7 +156,6 @@ def refresh_table(self, database, table): if handle: self.close(handle) - def get_histogram(self, database, table, column, nested=None): """ Returns the results of an Impala SELECT histogram() FROM query for a given column or nested type. @@ -185,15 +181,12 @@ def get_histogram(self, database, table, column, nested=None): return results - def get_exec_summary(self, query_handle, session_handle): return self.client._client.get_exec_summary(query_handle, session_handle) - def get_runtime_profile(self, query_handle, session_handle): return self.client._client.get_runtime_profile(query_handle, session_handle) - def _get_beeswax_tables(self, database): beeswax_query_server = dbms.get( user=self.client.user, @@ -203,7 +196,6 @@ def _get_beeswax_tables(self, database): ) return beeswax_query_server.get_tables(database=database) - def _get_different_tables(self, database): beeswax_tables = self._get_beeswax_tables(database) impala_tables = self.get_tables(database=database) diff --git a/apps/impala/src/impala/dbms_tests.py b/apps/impala/src/impala/dbms_tests.py index 0313afc43bb..9b94c5a7d1e 100644 --- a/apps/impala/src/impala/dbms_tests.py +++ b/apps/impala/src/impala/dbms_tests.py @@ -15,35 +15,28 @@ # See the License for the specific language governing permissions and # limitations under the License. +import sys import json import logging -import pytest -import sys +from unittest.mock import Mock, patch +import pytest from django.urls import reverse import desktop.conf as desktop_conf from desktop.lib.django_test_util import make_logged_in_client -from useradmin.models import User - - from impala.dbms import get_query_server_config - -if sys.version_info[0] > 2: - from unittest.mock import patch, Mock -else: - from mock import patch, Mock - +from useradmin.models import User LOG = logging.getLogger() + @pytest.mark.django_db class TestDbms(): def setup_method(self): self.client = make_logged_in_client() - def test_get_connector_config(self): connector = { 'type': 'impala-compute', diff --git a/apps/impala/src/impala/server.py b/apps/impala/src/impala/server.py index f6ca1502a09..4bc48788857 100644 --- a/apps/impala/src/impala/server.py +++ b/apps/impala/src/impala/server.py @@ -15,27 +15,23 @@ # See the License for the specific language governing permissions and # limitations under the License. -from past.builtins import basestring -from builtins import object +import sys import json import logging import threading -import sys +from builtins import object + +from django.utils.translation import gettext as _ +from ImpalaService import ImpalaHiveServer2Service +from past.builtins import basestring +from beeswax.server.dbms import QueryServerException +from beeswax.server.hive_server2_lib import HiveServerClient from desktop.lib.exceptions_renderable import PopupException from desktop.lib.rest.http_client import HttpClient from desktop.lib.rest.resource import Resource -from beeswax.server.dbms import QueryServerException -from beeswax.server.hive_server2_lib import HiveServerClient - -from ImpalaService import ImpalaHiveServer2Service -from impala.impala_flags import get_webserver_certificate_file, is_webserver_spnego_enabled, is_kerberos_enabled -from impala.conf import DAEMON_API_USERNAME, DAEMON_API_PASSWORD, DAEMON_API_PASSWORD_SCRIPT, DAEMON_API_AUTH_SCHEME, COORDINATOR_URL - -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ +from impala.conf import COORDINATOR_URL, DAEMON_API_AUTH_SCHEME, DAEMON_API_PASSWORD, DAEMON_API_PASSWORD_SCRIPT, DAEMON_API_USERNAME +from impala.impala_flags import get_webserver_certificate_file, is_kerberos_enabled, is_webserver_spnego_enabled LOG = logging.getLogger() @@ -87,7 +83,6 @@ def get_exec_summary(self, operation_handle, session_handle): return self._serialize_exec_summary(resp.summary) - def get_runtime_profile(self, operation_handle, session_handle): """ Calls Impala HS2 API's GetRuntimeProfile method on the given query handle @@ -105,7 +100,6 @@ def get_runtime_profile(self, operation_handle, session_handle): return resp.profile - def _serialize_exec_summary(self, summary): try: summary_dict = { @@ -169,29 +163,24 @@ def __init__(self, server_url): def __str__(self): return "ImpalaDaemonApi at %s" % self._url - @property def url(self): return self._url - @property def security_enabled(self): return self._security_enabled - @property def user(self): return self._thread_local.user - def set_user(self, user): if hasattr(user, 'username'): self._thread_local.user = user.username else: self._thread_local.user = user - def get_queries(self): params = { 'json': 'true' @@ -206,7 +195,6 @@ def get_queries(self): except ValueError as e: raise ImpalaDaemonApiException('ImpalaDaemonApi did not return valid JSON: %s' % e) - def get_query(self, query_id): params = { 'query_id': query_id, @@ -222,7 +210,6 @@ def get_query(self, query_id): except ValueError as e: raise ImpalaDaemonApiException('ImpalaDaemonApi did not return valid JSON: %s' % e) - def get_query_profile(self, query_id): params = { 'query_id': query_id, diff --git a/apps/impala/src/impala/server_tests.py b/apps/impala/src/impala/server_tests.py index b697d2726df..1946d5e768f 100644 --- a/apps/impala/src/impala/server_tests.py +++ b/apps/impala/src/impala/server_tests.py @@ -16,21 +16,16 @@ # See the License for the specific language governing permissions and # limitations under the License. +import sys import logging +from unittest.mock import MagicMock, Mock, patch + import pytest -import sys -from desktop.lib.exceptions_renderable import PopupException from desktop.lib.django_test_util import make_logged_in_client -from useradmin.models import User - +from desktop.lib.exceptions_renderable import PopupException from impala.server import ImpalaDaemonApi, _get_impala_server_url - -if sys.version_info[0] > 2: - from unittest.mock import patch, Mock, MagicMock -else: - from mock import patch, Mock, MagicMock - +from useradmin.models import User LOG = logging.getLogger() @@ -46,7 +41,6 @@ def test_get_impala_server_url_when_no_session(self): with pytest.raises(PopupException): _get_impala_server_url(session=None) - def test_digest_auth(self): with patch('impala.server.DAEMON_API_USERNAME.get') as DAEMON_API_USERNAME_get: @@ -73,7 +67,6 @@ def test_digest_auth(self): server._client.set_kerberos_auth.assert_not_called() server._client.set_basic_auth.assert_not_called() - def test_basic_auth(self): with patch('impala.server.DAEMON_API_USERNAME.get') as DAEMON_API_USERNAME_get: @@ -104,7 +97,6 @@ def test_basic_auth(self): server._client.set_digest_auth.assert_not_called() server._client.set_kerberos_auth.assert_not_called() - def test_kerberos_auth(self): with patch('impala.server.DAEMON_API_USERNAME.get') as DAEMON_API_USERNAME_get: @@ -127,7 +119,6 @@ def test_kerberos_auth(self): with patch('impala.server.HttpClient') as HttpClient: with patch('impala.server.is_webserver_spnego_enabled') as is_webserver_spnego_enabled: - DAEMON_API_USERNAME_get.return_value = None DAEMON_API_PASSWORD_get.return_value = 'impala' is_webserver_spnego_enabled.return_value = False diff --git a/apps/impala/src/impala/test_impala_flags.py b/apps/impala/src/impala/test_impala_flags.py index 1d78504fb5b..32766ae32a9 100644 --- a/apps/impala/src/impala/test_impala_flags.py +++ b/apps/impala/src/impala/test_impala_flags.py @@ -15,16 +15,12 @@ # See the License for the specific language governing permissions and # limitations under the License. -import logging import os -import sys +import logging import tempfile from impala import conf, impala_flags - -open_file = open - LOG = logging.getLogger() @@ -47,7 +43,7 @@ def test_impala_flags(): -max_result_cache_size=%d -authorized_proxy_user_config=hue=* """ % expected_rows - open_file(os.path.join(test_impala_conf_dir, 'impalad_flags'), 'w').write(flags) + open(os.path.join(test_impala_conf_dir, 'impalad_flags'), 'w').write(flags) resets.append(conf.IMPALA_CONF_DIR.set_for_testing(test_impala_conf_dir)) impala_flags.reset() diff --git a/apps/impala/src/impala/tests.py b/apps/impala/src/impala/tests.py index b20805696bc..765999d6203 100644 --- a/apps/impala/src/impala/tests.py +++ b/apps/impala/src/impala/tests.py @@ -15,53 +15,46 @@ # See the License for the specific language governing permissions and # limitations under the License. -from builtins import object -import json -import logging -import pytest import re import sys +import json +import logging +from builtins import object +from unittest.mock import Mock, patch +import pytest from django.urls import reverse import desktop.conf as desktop_conf -from desktop.lib.django_test_util import make_logged_in_client -from desktop.lib.exceptions_renderable import PopupException -from desktop.lib.test_utils import add_to_group -from desktop.models import Document from beeswax import data_export -from beeswax.design import hql_query from beeswax.data_export import download -from beeswax.models import SavedQuery, QueryHistory +from beeswax.design import hql_query +from beeswax.models import QueryHistory, SavedQuery from beeswax.server import dbms -from beeswax.test_base import get_query_server_config, wait_for_query_to_finish, fetch_query_result_data +from beeswax.test_base import fetch_query_result_data, get_query_server_config, wait_for_query_to_finish from beeswax.tests import _make_query +from desktop.lib.django_test_util import make_logged_in_client +from desktop.lib.exceptions_renderable import PopupException +from desktop.lib.test_utils import add_to_group +from desktop.models import Document from hadoop.pseudo_hdfs4 import get_db_prefix, is_live_cluster -from useradmin.models import User - from impala import conf from impala.dbms import ImpalaDbms - -if sys.version_info[0] > 2: - from unittest.mock import patch, Mock -else: - from mock import patch, Mock - +from useradmin.models import User LOG = logging.getLogger() class MockDbms(object): - def get_databases(self): return ['db1', 'db2'] def get_tables(self, database): return ['table1', 'table2'] + @pytest.mark.django_db class TestMockedImpala(object): - def setup_method(self): self.client = make_logged_in_client() @@ -116,29 +109,28 @@ def test_invalidate(self): get_hive_metastore_interpreters.return_value = [] with pytest.raises(PopupException): - ddms.invalidate('default') # No hive/metastore configured + ddms.invalidate('default') # No hive/metastore configured get_hive_metastore_interpreters.return_value = ['hive'] ddms.invalidate('default') ddms.client.query.assert_called_once_with(ddms.client.query.call_args[0][0]) - assert 'customers' in ddms.client.query.call_args[0][0].hql_query # diff of 1 table + assert 'customers' in ddms.client.query.call_args[0][0].hql_query # diff of 1 table get_different_tables.return_value = ['customers', '', '', '', '', '', '', '', '', '', ''] with pytest.raises(PopupException): - ddms.invalidate('default') # diff of 11 tables. Limit is 10. + ddms.invalidate('default') # diff of 11 tables. Limit is 10. ddms.invalidate('default', 'customers') - assert ddms.client.query.call_count == 2 # Second call - assert 'customers' in ddms.client.query.call_args[0][0].hql_query # invalidate 1 table + assert ddms.client.query.call_count == 2 # Second call + assert 'customers' in ddms.client.query.call_args[0][0].hql_query # invalidate 1 table ddms.invalidate() - assert ddms.client.query.call_count == 3 # Third call - assert 'customers' not in ddms.client.query.call_args[0][0].hql_query # Full invalidate + assert ddms.client.query.call_count == 3 # Third call + assert 'customers' not in ddms.client.query.call_args[0][0].hql_query # Full invalidate @pytest.mark.integration class TestImpalaIntegration(object): - @classmethod def setup_class(cls): cls.finish = [] @@ -155,7 +147,7 @@ def setup_class(cls): queries = [ 'DROP TABLE IF EXISTS %(db)s.tweets;' % {'db': cls.DATABASE}, 'DROP DATABASE IF EXISTS %(db)s CASCADE;' % {'db': cls.DATABASE}, - 'CREATE DATABASE %(db)s;' % {'db': cls.DATABASE} + 'CREATE DATABASE %(db)s;' % {'db': cls.DATABASE}, ] for query in queries: @@ -164,19 +156,26 @@ def setup_class(cls): content = json.loads(resp.content) assert content['status'] == 0, resp.content - queries = [""" + queries = [ + """ CREATE TABLE tweets (row_num INTEGER, id_str STRING, text STRING) STORED AS PARQUET; - """, """ + """, + """ INSERT INTO TABLE tweets VALUES (1, "531091827395682000", "My dad looks younger than costa"); - """, """ + """, + """ INSERT INTO TABLE tweets VALUES (2, "531091827781550000", "There is a thin line between your partner being vengeful and you reaping the consequences of your bad actions towards your partner."); - """, """ + """, # noqa: E501 + """ INSERT INTO TABLE tweets VALUES (3, "531091827768979000", "@Mustang_Sally83 and they need to get into you :))))"); - """, """ + """, + """ INSERT INTO TABLE tweets VALUES (4, "531091827114668000", "@RachelZJohnson thank you rach!xxx"); - """, """ + """, + """ INSERT INTO TABLE tweets VALUES (5, "531091827949309000", "i think @WWERollins was robbed of the IC title match this week on RAW also i wonder if he will get a rematch i hope so @WWE"); - """] + """, # noqa: E501 + ] for query in queries: resp = _make_query(cls.client, query, database=cls.DATABASE, local=False, server_name='impala') @@ -184,28 +183,31 @@ def setup_class(cls): content = json.loads(resp.content) assert content['status'] == 0, resp.content - @classmethod def teardown_class(cls): # We need to drop tables before dropping the database - queries = [""" + queries = [ + """ DROP TABLE IF EXISTS %(db)s.tweets; - """ % {'db': cls.DATABASE}, """ + """ + % {'db': cls.DATABASE}, + """ DROP DATABASE %(db)s CASCADE; - """ % {'db': cls.DATABASE}] + """ + % {'db': cls.DATABASE}, + ] for query in queries: resp = _make_query(cls.client, query, database='default', local=False, server_name='impala') resp = wait_for_query_to_finish(cls.client, resp, max=180.0) # Check the cleanup databases = cls.db.get_databases() - assert not cls.DATABASE in databases - assert not '%(db)s_other' % {'db': cls.DATABASE} in databases + assert cls.DATABASE not in databases + assert '%(db)s_other' % {'db': cls.DATABASE} not in databases for f in cls.finish: f() - def test_basic_flow(self): dbs = self.db.get_databases() assert '_impala_builtins' in dbs, dbs @@ -247,12 +249,11 @@ def test_basic_flow(self): content = json.loads(resp.content) assert 0 == content['status'] - def test_data_download(self): hql = 'SELECT * FROM tweets %(limit)s' FETCH_SIZE = data_export.FETCH_SIZE - data_export.FETCH_SIZE = 2 # Decrease fetch size to validate last fetch logic + data_export.FETCH_SIZE = 2 # Decrease fetch size to validate last fetch logic try: query = hql_query(hql % {'limit': ''}) @@ -263,7 +264,6 @@ def test_data_download(self): csv_content = ''.join(csv_resp.streaming_content) assert len(csv_content.strip().split('\n')) == 5 + 1 - query = hql_query(hql % {'limit': 'LIMIT 0'}) handle = self.db.execute_and_wait(query) @@ -287,7 +287,6 @@ def test_data_download(self): finally: data_export.FETCH_SIZE = FETCH_SIZE - def test_explain(self): QUERY = """ SELECT * FROM tweets ORDER BY row_num; @@ -297,17 +296,15 @@ def test_explain(self): assert 'MERGING-EXCHANGE' in json_response['explanation'], json_response assert 'SCAN HDFS' in json_response['explanation'], json_response - def test_get_table_sample(self): client = make_logged_in_client() resp = client.get(reverse('impala:get_sample_data', kwargs={'database': self.DATABASE, 'table': 'tweets'})) data = json.loads(resp.content) assert 0 == data['status'], data - assert [u'row_num', u'id_str', u'text'] == data['headers'], data + assert ['row_num', 'id_str', 'text'] == data['headers'], data assert len(data['rows']), data - def test_get_session(self): session = None try: @@ -327,14 +324,12 @@ def test_get_session(self): except Exception: pass - def test_get_settings(self): resp = self.client.get(reverse("impala:get_settings")) json_resp = json.loads(resp.content) assert 0 == json_resp['status'] assert 'QUERY_TIMEOUT_S' in json_resp['settings'] - def test_invalidate_tables(self): # Helper function to get Impala and Beeswax (HMS) tables def get_impala_beeswax_tables(): @@ -347,9 +342,10 @@ def get_impala_beeswax_tables(): return impala_tables, beeswax_tables impala_tables, beeswax_tables = get_impala_beeswax_tables() - assert impala_tables == beeswax_tables, ( - "\ntest_invalidate_tables: `%s`\nImpala Tables: %s\nBeeswax Tables: %s" - % (self.DATABASE, ','.join(impala_tables), ','.join(beeswax_tables)) + assert impala_tables == beeswax_tables, "\ntest_invalidate_tables: `%s`\nImpala Tables: %s\nBeeswax Tables: %s" % ( + self.DATABASE, + ','.join(impala_tables), + ','.join(beeswax_tables), ) hql = """ @@ -360,18 +356,18 @@ def get_impala_beeswax_tables(): impala_tables, beeswax_tables = get_impala_beeswax_tables() # New table is not found by Impala assert 'new_table' in beeswax_tables, beeswax_tables - assert not 'new_table' in impala_tables, impala_tables + assert 'new_table' not in impala_tables, impala_tables resp = self.client.post(reverse('impala:invalidate'), {'database': self.DATABASE}) impala_tables, beeswax_tables = get_impala_beeswax_tables() # Invalidate picks up new table - assert impala_tables == beeswax_tables, ( - "\ntest_invalidate_tables: `%s`\nImpala Tables: %s\nBeeswax Tables: %s" - % (self.DATABASE, ','.join(impala_tables), ','.join(beeswax_tables)) + assert impala_tables == beeswax_tables, "\ntest_invalidate_tables: `%s`\nImpala Tables: %s\nBeeswax Tables: %s" % ( + self.DATABASE, + ','.join(impala_tables), + ','.join(beeswax_tables), ) - def test_refresh_table(self): # Helper function to get Impala and Beeswax (HMS) columns def get_impala_beeswax_columns(): @@ -382,8 +378,12 @@ def get_impala_beeswax_columns(): return impala_columns, beeswax_columns impala_columns, beeswax_columns = get_impala_beeswax_columns() - assert impala_columns == beeswax_columns, ("\ntest_refresh_table: `%s`.`%s`\nImpala Columns: %s\nBeeswax Columns: %s" - % (self.DATABASE, 'tweets', ','.join(impala_columns), ','.join(beeswax_columns))) + assert impala_columns == beeswax_columns, "\ntest_refresh_table: `%s`.`%s`\nImpala Columns: %s\nBeeswax Columns: %s" % ( + self.DATABASE, + 'tweets', + ','.join(impala_columns), + ','.join(beeswax_columns), + ) hql = """ ALTER TABLE tweets ADD COLUMNS (new_column INT); @@ -393,15 +393,18 @@ def get_impala_beeswax_columns(): impala_columns, beeswax_columns = get_impala_beeswax_columns() # New column is not found by Impala assert 'new_column' in beeswax_columns, beeswax_columns - assert not 'new_column' in impala_columns, impala_columns + assert 'new_column' not in impala_columns, impala_columns resp = self.client.post(reverse('impala:refresh_table', kwargs={'database': self.DATABASE, 'table': 'tweets'})) impala_columns, beeswax_columns = get_impala_beeswax_columns() # Invalidate picks up new column - assert impala_columns == beeswax_columns, ("\ntest_refresh_table: `%s`.`%s`\nImpala Columns: %s\nBeeswax Columns: %s" - % (self.DATABASE, 'tweets', ','.join(impala_columns), ','.join(beeswax_columns))) - + assert impala_columns == beeswax_columns, "\ntest_refresh_table: `%s`.`%s`\nImpala Columns: %s\nBeeswax Columns: %s" % ( + self.DATABASE, + 'tweets', + ','.join(impala_columns), + ','.join(beeswax_columns), + ) def test_get_exec_summary(self): query = """ @@ -427,7 +430,6 @@ def test_get_exec_summary(self): assert 'nodes' in data['summary'], data assert len(data['summary']['nodes']) > 0, data['summary']['nodes'] - def test_get_runtime_profile(self): query = """ SELECT COUNT(1) FROM tweets; @@ -462,20 +464,23 @@ def create_saved_query(app_name, owner): def test_ssl_cacerts(): for desktop_kwargs, conf_kwargs, expected in [ - ({'present': False}, {'present': False}, ''), - ({'present': False}, {'data': 'local-cacerts.pem'}, 'local-cacerts.pem'), - - ({'data': 'global-cacerts.pem'}, {'present': False}, 'global-cacerts.pem'), - ({'data': 'global-cacerts.pem'}, {'data': 'local-cacerts.pem'}, 'local-cacerts.pem'), - ]: + ({'present': False}, {'present': False}, ''), + ({'present': False}, {'data': 'local-cacerts.pem'}, 'local-cacerts.pem'), + ({'data': 'global-cacerts.pem'}, {'present': False}, 'global-cacerts.pem'), + ({'data': 'global-cacerts.pem'}, {'data': 'local-cacerts.pem'}, 'local-cacerts.pem'), + ]: resets = [ desktop_conf.SSL_CACERTS.set_for_testing(**desktop_kwargs), conf.SSL.CACERTS.set_for_testing(**conf_kwargs), ] try: - assert conf.SSL.CACERTS.get() == expected, ('desktop:%s conf:%s expected:%s got:%s' - % (desktop_kwargs, conf_kwargs, expected, conf.SSL.CACERTS.get())) + assert conf.SSL.CACERTS.get() == expected, 'desktop:%s conf:%s expected:%s got:%s' % ( + desktop_kwargs, + conf_kwargs, + expected, + conf.SSL.CACERTS.get(), + ) finally: for reset in resets: reset() @@ -483,25 +488,28 @@ def test_ssl_cacerts(): def test_ssl_validate(): for desktop_kwargs, conf_kwargs, expected in [ - ({'present': False}, {'present': False}, True), - ({'present': False}, {'data': False}, False), - ({'present': False}, {'data': True}, True), - - ({'data': False}, {'present': False}, False), - ({'data': False}, {'data': False}, False), - ({'data': False}, {'data': True}, True), - - ({'data': True}, {'present': False}, True), - ({'data': True}, {'data': False}, False), - ({'data': True}, {'data': True}, True), - ]: + ({'present': False}, {'present': False}, True), + ({'present': False}, {'data': False}, False), + ({'present': False}, {'data': True}, True), + ({'data': False}, {'present': False}, False), + ({'data': False}, {'data': False}, False), + ({'data': False}, {'data': True}, True), + ({'data': True}, {'present': False}, True), + ({'data': True}, {'data': False}, False), + ({'data': True}, {'data': True}, True), + ]: resets = [ desktop_conf.SSL_VALIDATE.set_for_testing(**desktop_kwargs), conf.SSL.VALIDATE.set_for_testing(**conf_kwargs), ] try: - assert conf.SSL.VALIDATE.get() == expected, 'desktop:%s conf:%s expected:%s got:%s' % (desktop_kwargs, conf_kwargs, expected, conf.SSL.VALIDATE.get()) + assert conf.SSL.VALIDATE.get() == expected, 'desktop:%s conf:%s expected:%s got:%s' % ( + desktop_kwargs, + conf_kwargs, + expected, + conf.SSL.VALIDATE.get(), + ) finally: for reset in resets: reset() @@ -509,9 +517,9 @@ def test_ssl_validate(): def test_thrift_over_http_config(): resets = [ - conf.SERVER_HOST.set_for_testing('impalad_host'), - conf.SERVER_PORT.set_for_testing(21050), - conf.USE_THRIFT_HTTP.set_for_testing(True) + conf.SERVER_HOST.set_for_testing('impalad_host'), + conf.SERVER_PORT.set_for_testing(21050), + conf.USE_THRIFT_HTTP.set_for_testing(True), ] with patch('impala.dbms.get_hs2_http_port') as get_hs2_http_port: get_hs2_http_port.return_value = 30000 @@ -527,10 +535,10 @@ def test_thrift_over_http_config(): def test_thrift_over_http_config_with_proxy_endpoint(): resets = [ - conf.SERVER_HOST.set_for_testing('impala_proxy'), - conf.SERVER_PORT.set_for_testing(36000), - conf.USE_THRIFT_HTTP.set_for_testing(True), - conf.PROXY_ENDPOINT.set_for_testing('/endpoint') + conf.SERVER_HOST.set_for_testing('impala_proxy'), + conf.SERVER_PORT.set_for_testing(36000), + conf.USE_THRIFT_HTTP.set_for_testing(True), + conf.PROXY_ENDPOINT.set_for_testing('/endpoint'), ] with patch('impala.dbms.get_hs2_http_port') as get_hs2_http_port: get_hs2_http_port.return_value = 30000 @@ -545,16 +553,22 @@ def test_thrift_over_http_config_with_proxy_endpoint(): class TestImpalaDbms(object): - def test_get_impala_nested_select(self): assert ImpalaDbms.get_nested_select('default', 'customers', 'id', None) == ('id', '`default`.`customers`') - assert (ImpalaDbms.get_nested_select('default', 'customers', 'email_preferences', 'categories/promos/') == - ('email_preferences.categories.promos', '`default`.`customers`')) - assert (ImpalaDbms.get_nested_select('default', 'customers', 'addresses', 'key') == - ('key', '`default`.`customers`.`addresses`')) - assert (ImpalaDbms.get_nested_select('default', 'customers', 'addresses', 'value/street_1/') == - ('street_1', '`default`.`customers`.`addresses`')) - assert (ImpalaDbms.get_nested_select('default', 'customers', 'orders', 'item/order_date') == - ('order_date', '`default`.`customers`.`orders`')) - assert (ImpalaDbms.get_nested_select('default', 'customers', 'orders', 'item/items/item/product_id') == - ('product_id', '`default`.`customers`.`orders`.`items`')) + assert ImpalaDbms.get_nested_select('default', 'customers', 'email_preferences', 'categories/promos/') == ( + 'email_preferences.categories.promos', + '`default`.`customers`', + ) + assert ImpalaDbms.get_nested_select('default', 'customers', 'addresses', 'key') == ('key', '`default`.`customers`.`addresses`') + assert ImpalaDbms.get_nested_select('default', 'customers', 'addresses', 'value/street_1/') == ( + 'street_1', + '`default`.`customers`.`addresses`', + ) + assert ImpalaDbms.get_nested_select('default', 'customers', 'orders', 'item/order_date') == ( + 'order_date', + '`default`.`customers`.`orders`', + ) + assert ImpalaDbms.get_nested_select('default', 'customers', 'orders', 'item/items/item/product_id') == ( + 'product_id', + '`default`.`customers`.`orders`.`items`', + ) diff --git a/apps/impala/src/impala/urls.py b/apps/impala/src/impala/urls.py index 5b84a6e0c86..54a8d83e3bf 100644 --- a/apps/impala/src/impala/urls.py +++ b/apps/impala/src/impala/urls.py @@ -17,14 +17,11 @@ import sys +from django.urls import re_path + from beeswax.urls import urlpatterns as beeswax_urls from impala import api as impala_api -if sys.version_info[0] > 2: - from django.urls import re_path -else: - from django.conf.urls import url as re_path - urlpatterns = [ re_path(r'^api/invalidate$', impala_api.invalidate, name='invalidate'), re_path(r'^api/refresh/(?P\w+)/(?P\w+)$', impala_api.refresh_table, name='refresh_table'), diff --git a/apps/jobbrowser/src/jobbrowser/api.py b/apps/jobbrowser/src/jobbrowser/api.py index b0930d7ac02..fe4045230c1 100644 --- a/apps/jobbrowser/src/jobbrowser/api.py +++ b/apps/jobbrowser/src/jobbrowser/api.py @@ -15,33 +15,26 @@ # See the License for the specific language governing permissions and # limitations under the License. -from builtins import object -import logging import sys - +import logging +from builtins import object from datetime import datetime, timedelta -from django.core.paginator import Paginator -from desktop.lib.exceptions_renderable import PopupException -from desktop.lib.rest.http_client import RestException - -from hadoop.conf import YARN_CLUSTERS -from hadoop.cluster import rm_ha +from django.core.paginator import Paginator +from django.utils.translation import gettext as _ -import hadoop.yarn.history_server_api as history_server_api import hadoop.yarn.mapreduce_api as mapreduce_api import hadoop.yarn.node_manager_api as node_manager_api +import hadoop.yarn.history_server_api as history_server_api import hadoop.yarn.resource_manager_api as resource_manager_api import hadoop.yarn.spark_history_server_api as spark_history_server_api - -from jobbrowser.conf import SHARE_JOBS -from jobbrowser.yarn_models import Application, YarnV2Job, Job as YarnJob, KilledJob as KilledYarnJob, Container, SparkJob from desktop.auth.backend import is_admin - -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ +from desktop.lib.exceptions_renderable import PopupException +from desktop.lib.rest.http_client import RestException +from hadoop.cluster import rm_ha +from hadoop.conf import YARN_CLUSTERS +from jobbrowser.conf import SHARE_JOBS +from jobbrowser.yarn_models import Application, Container, Job as YarnJob, KilledJob as KilledYarnJob, SparkJob, YarnV2Job LOG = logging.getLogger() @@ -93,8 +86,8 @@ def kill_job(self, job_id): @rm_ha def get_jobs(self, user, **kwargs): - state_filters = {'running': 'UNDEFINED', 'completed': 'SUCCEEDED', 'failed': 'FAILED', 'killed': 'KILLED',} - states_filters = {'running': 'NEW,NEW_SAVING,SUBMITTED,ACCEPTED,RUNNING', 'completed': 'FINISHED', 'failed': 'FAILED,KILLED',} + state_filters = {'running': 'UNDEFINED', 'completed': 'SUCCEEDED', 'failed': 'FAILED', 'killed': 'KILLED', } + states_filters = {'running': 'NEW,NEW_SAVING,SUBMITTED,ACCEPTED,RUNNING', 'completed': 'FINISHED', 'failed': 'FAILED,KILLED', } filters = {} if kwargs['username']: @@ -108,11 +101,11 @@ def get_jobs(self, user, **kwargs): if kwargs.get('time_value'): filters['startedTimeBegin'] = self._get_started_time_begin(kwargs.get('time_value'), kwargs.get('time_unit')) - if self.resource_manager_api: # This happens when yarn is not configured, but we need jobbrowser for Impala + if self.resource_manager_api: # This happens when yarn is not configured, but we need jobbrowser for Impala json = self.resource_manager_api.apps(**filters) else: json = {} - if type(json) == str and 'This is standby RM' in json: + if type(json) is str and 'This is standby RM' in json: raise Exception(json) if json.get('apps'): @@ -147,12 +140,10 @@ def filter_jobs(self, user, jobs, **kwargs): is_admin(user) or job.user == user.username] - def _get_job_from_history_server(self, job_id): resp = self.history_server_api.job(self.user, job_id) return YarnJob(self.history_server_api, resp['job']) - @rm_ha def get_job(self, jobid): job_id = jobid.replace('application', 'job') @@ -237,5 +228,5 @@ def __init__(self, application_id, job): class JobExpired(Exception): def __init__(self, job): - super(JobExpired, self).__init__('JobExpired: %s' %job) + super(JobExpired, self).__init__('JobExpired: %s' % job) self.job = job diff --git a/apps/jobbrowser/src/jobbrowser/api2.py b/apps/jobbrowser/src/jobbrowser/api2.py index b51fe0f7dc1..765a1bf08cc 100644 --- a/apps/jobbrowser/src/jobbrowser/api2.py +++ b/apps/jobbrowser/src/jobbrowser/api2.py @@ -15,28 +15,21 @@ # See the License for the specific language governing permissions and # limitations under the License. +import sys import json import logging -import sys - from urllib.request import Request, urlopen from django.http import HttpResponse +from django.utils.translation import gettext as _ -from desktop.lib.i18n import smart_unicode from desktop.lib.django_util import JsonResponse +from desktop.lib.i18n import smart_str from desktop.views import serve_403_error - from jobbrowser.apis.base_api import get_api from jobbrowser.apis.query_store import query_store_proxy, stream_download_bundle - from jobbrowser.conf import DISABLE_KILLING_JOBS, USE_PROXY -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - LOG = logging.getLogger() @@ -49,7 +42,7 @@ def decorator(*args, **kwargs): except Exception as e: LOG.exception('Error running %s' % func) response['status'] = -1 - response['message'] = smart_unicode(e) + response['message'] = smart_str(e) finally: if response: return JsonResponse(response) @@ -167,7 +160,7 @@ def profile(request): ]) api = get_api(request.user, interface, cluster=cluster) - api._set_request(request) # For YARN + api._set_request(request) # For YARN resp = api.profile(app_id, app_type, app_property, app_filters) diff --git a/apps/jobbrowser/src/jobbrowser/apis/base_api.py b/apps/jobbrowser/src/jobbrowser/apis/base_api.py index 94213e4685f..abd1f1785e8 100644 --- a/apps/jobbrowser/src/jobbrowser/apis/base_api.py +++ b/apps/jobbrowser/src/jobbrowser/apis/base_api.py @@ -15,19 +15,16 @@ # See the License for the specific language governing permissions and # limitations under the License. -from builtins import object -import logging -import posixpath import re import sys +import logging +import posixpath +from builtins import object -from hadoop.fs.hadoopfs import Hdfs -from desktop.lib.exceptions_renderable import PopupException +from django.utils.translation import gettext as _ -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ +from desktop.lib.exceptions_renderable import PopupException +from hadoop.fs.hadoopfs import Hdfs LOG = logging.getLogger() @@ -96,13 +93,13 @@ def __init__(self, user): def apps(self, filters): return {'apps': [], 'total': 0} - def app(self, appid): return {} # Also contains progress (0-100) and status [RUNNING, SUCCEEDED, PAUSED, FAILED] + def app(self, appid): return {} # Also contains progress (0-100) and status [RUNNING, SUCCEEDED, PAUSED, FAILED] def action(self, app_ids, operation): return {} def logs(self, appid, app_type, log_name, is_embeddable=False): return {'progress': 0, 'logs': ''} - def profile(self, appid, app_type, app_property, app_filters): return {} # Tasks, XML, counters... + def profile(self, appid, app_type, app_property, app_filters): return {} # Tasks, XML, counters... def _set_request(self, request): self.request = request diff --git a/apps/jobbrowser/src/jobbrowser/apis/beat_api.py b/apps/jobbrowser/src/jobbrowser/apis/beat_api.py index 1788f21113d..01756484233 100644 --- a/apps/jobbrowser/src/jobbrowser/apis/beat_api.py +++ b/apps/jobbrowser/src/jobbrowser/apis/beat_api.py @@ -15,21 +15,16 @@ # See the License for the specific language governing permissions and # limitations under the License. -import logging import sys - +import logging from datetime import datetime from dateutil import parser -from desktop.lib.scheduler.lib.beat import CeleryBeatApi +from django.utils.translation import gettext as _ +from desktop.lib.scheduler.lib.beat import CeleryBeatApi from jobbrowser.apis.base_api import Api -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - LOG = logging.getLogger() @@ -47,7 +42,7 @@ def apps(self, filters): 'status': self._massage_status(app), 'apiStatus': self._api_status(self._massage_status(app)), 'type': 'celery-beat', - 'user': app['description'], # No user id available yet + 'user': app['description'], # No user id available yet 'progress': 50, 'queue': app['queue'], 'canWrite': self.user.username == app['description'], @@ -58,7 +53,6 @@ def apps(self, filters): 'total': len(tasks) } - def app(self, appid): appid = appid.rsplit('-')[-1] api = CeleryBeatApi(user=self.user) @@ -81,7 +75,6 @@ def app(self, appid): } } - def action(self, app_ids, operation): api = CeleryBeatApi(user=self.user) @@ -95,13 +88,15 @@ def action(self, app_ids, operation): except Exception: LOG.exception('Could not stop job %s' % app_id) - return {'kills': operations, 'status': len(app_ids) - len(operations), 'message': _('%s signal sent to %s') % (operation['action'], operations)} - + return { + 'kills': operations, + 'status': len(app_ids) - len(operations), + 'message': _('%s signal sent to %s') % (operation['action'], operations), + } def logs(self, appid, app_type, log_name=None, is_embeddable=False): return {'logs': ''} - def profile(self, appid, app_type, app_property, app_filters): appid = appid.rsplit('-')[-1] @@ -112,14 +107,12 @@ def profile(self, appid, app_type, app_property, app_filters): else: return {} - def _api_status(self, status): if status == 'RUNNING': return 'RUNNING' else: return 'PAUSED' - def _massage_status(self, task): return 'RUNNING' if task['enabled'] else 'PAUSED' @@ -170,15 +163,12 @@ def app(self, appid): return common - def action(self, appid, action): return {} - def logs(self, appid, app_type, log_name=None, is_embeddable=False): return {'logs': ''} - def profile(self, appid, app_type, app_property): return {} @@ -188,4 +178,4 @@ def _api_status(self, status): elif status in ['COMPLETED']: return 'SUCCEEDED' else: - return 'FAILED' # INTERRUPTED , KILLED, TERMINATED and FAILED + return 'FAILED' # INTERRUPTED , KILLED, TERMINATED and FAILED diff --git a/apps/jobbrowser/src/jobbrowser/apis/beeswax_query_api.py b/apps/jobbrowser/src/jobbrowser/apis/beeswax_query_api.py index 8c38025b4c1..ed206d9181b 100644 --- a/apps/jobbrowser/src/jobbrowser/apis/beeswax_query_api.py +++ b/apps/jobbrowser/src/jobbrowser/apis/beeswax_query_api.py @@ -14,24 +14,18 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from builtins import filter - -import logging import re import sys - +import logging +from builtins import filter from datetime import datetime +from django.utils.translation import gettext as _ + from desktop.lib.exceptions_renderable import PopupException from desktop.lib.python_util import current_ms_from_utc - from jobbrowser.apis.base_api import Api -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - LOG = logging.getLogger() try: @@ -43,13 +37,13 @@ class BeeswaxQueryApi(Api): def __init__(self, user, cluster=None): - self.user=user - self.cluster=cluster + self.user = user + self.cluster = cluster def apps(self, filters): filter_map = self._get_filter_map(filters) limit = filters.get('pagination', {'limit': 25}).get('limit') - jobs = query_history.get_query_history(request_user=filter_map.get('effective_user'), start_date=filter_map.get('date'), start_time=filter_map.get('time'), query_id=filter_map.get('query_id'), status=filter_map.get('status'), limit=limit) + jobs = query_history.get_query_history(request_user=filter_map.get('effective_user'), start_date=filter_map.get('date'), start_time=filter_map.get('time'), query_id=filter_map.get('query_id'), status=filter_map.get('status'), limit=limit) # noqa: E501 current_time = current_ms_from_utc() apps = { @@ -123,7 +117,7 @@ def app(self, appid): def action(self, appid, action): message = {'message': '', 'status': 0} - return message; + return message def logs(self, appid, app_type, log_name=None, is_embeddable=False): return {'logs': ''} @@ -131,12 +125,12 @@ def logs(self, appid, app_type, log_name=None, is_embeddable=False): def profile(self, appid, app_type, app_property, app_filters): message = {'message': '', 'status': 0} - return message; + return message def profile_encoded(self, appid): message = {'message': '', 'status': 0} - return message; + return message def _get_status(self, job): return 'RUNNING' if len(job[1]) <= 1 else "FINISHED" @@ -155,18 +149,18 @@ def _get_filter_map(self, filters): filter_map = {} if filters.get("text"): filter_names = { - 'user':'effective_user', - 'id':'query_id', - 'name':'state', - 'type':'stmt_type', - 'status':'status' + 'user': 'effective_user', + 'id': 'query_id', + 'name': 'state', + 'type': 'stmt_type', + 'status': 'status' } def make_lambda(name, value): return lambda app: app[name] == value for key, name in list(filter_names.items()): - text_filter = re.search(r"\s*("+key+")\s*:([^ ]+)", filters.get("text")) + text_filter = re.search(r"\s*(" + key + r")\s*:([^ ]+)", filters.get("text")) if text_filter and text_filter.group(1) == key: filter_map[name] = text_filter.group(2).strip() @@ -191,10 +185,10 @@ def _time_in_ms(self, time, period): elif period == 's': return float(time) * 1000 elif period == 'm': - return float(time) * 60000 #1000*60 + return float(time) * 60000 # 1000*60 elif period == 'h': - return float(time) * 3600000 #1000*60*60 + return float(time) * 3600000 # 1000*60*60 elif period == 'd': return float(time) * 86400000 # 1000*60*60*24 else: - return float(time) \ No newline at end of file + return float(time) diff --git a/apps/jobbrowser/src/jobbrowser/apis/clusters.py b/apps/jobbrowser/src/jobbrowser/apis/clusters.py index 5fcddd4d3fc..dc4abbd00f4 100644 --- a/apps/jobbrowser/src/jobbrowser/apis/clusters.py +++ b/apps/jobbrowser/src/jobbrowser/apis/clusters.py @@ -15,23 +15,16 @@ # See the License for the specific language governing permissions and # limitations under the License. -import logging import sys - +import logging from datetime import datetime -from dateutil import parser +from dateutil import parser from django.utils import timezone - -from notebook.connectors.altus import DataWarehouse2Api +from django.utils.translation import gettext as _ from jobbrowser.apis.base_api import Api - -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - +from notebook.connectors.altus import DataWarehouse2Api LOG = logging.getLogger() @@ -45,19 +38,18 @@ def __init__(self, user, version=1): super(ClusterApi, self).__init__(user) self.version = version - self.api = DataWarehouse2Api(self.user) - + self.api = DataWarehouse2Api(self.user) def apps(self, filters): - #jobs = self.api.list_clusters() + # jobs = self.api.list_clusters() return { u'status': 0, u'total': 3, u'apps': [ - {u'status': u'ONLINE', u'name': u'Internal EDH', u'submitted': u'2018-10-04 08:34:39.128886', u'queue': u'group', u'user': u'jo0', u'canWrite': False, u'duration': 0, u'progress': u'100 / 100', u'type': u'GKE 100 nodes 100CPU 20TB', u'id': u'crn:altus:engine:k8s:12a0079b-1591-4ca0-b721-a446bda74e67:cluster:jo0/cbf7bbb1-f956-45e4-a269-d239efbc9996', u'apiStatus': u'RUNNING'}, - {u'status': u'ONLINE', u'name': u'gke_gcp-eng-dsdw_us-west2-b_impala-demo', u'submitted': u'2018-10-04 08:34:39.128881', u'queue': u'group', u'user': u'r0', u'canWrite': False, u'duration': 0, u'progress': u'4 / 4', u'type': u'GKE 4 nodes 16CPU 64GB', u'id': u'crn:altus:engine:k8s:12a0079b-1591-4ca0-b721-a446bda74e67:cluster:r0/0da5e627-ee33-45c5-9179-cc6b95008d2e', u'apiStatus': u'RUNNING'}, - {u'status': u'ONLINE', u'name': u'DW-fraud', u'submitted': u'2018-10-04 08:34:39.128881', u'queue': u'group', u'user': u'r0', u'canWrite': False, u'duration': 0, u'progress': u'50 / 50', u'type': u'OpenShift 50 nodes 30CPU 2TB', u'id': u'crn:altus:engine:k8s:12a0079b-1591-4ca0-b721-a446bda74e67:cluster:r0/0da5e627-ee33-45c5-9179-cc6b95008d2e', u'apiStatus': u'RUNNING'}, + {u'status': u'ONLINE', u'name': u'Internal EDH', u'submitted': u'2018-10-04 08:34:39.128886', u'queue': u'group', u'user': u'jo0', u'canWrite': False, u'duration': 0, u'progress': u'100 / 100', u'type': u'GKE 100 nodes 100CPU 20TB', u'id': u'crn:altus:engine:k8s:12a0079b-1591-4ca0-b721-a446bda74e67:cluster:jo0/cbf7bbb1-f956-45e4-a269-d239efbc9996', u'apiStatus': u'RUNNING'}, # noqa: E501 + {u'status': u'ONLINE', u'name': u'gke_gcp-eng-dsdw_us-west2-b_impala-demo', u'submitted': u'2018-10-04 08:34:39.128881', u'queue': u'group', u'user': u'r0', u'canWrite': False, u'duration': 0, u'progress': u'4 / 4', u'type': u'GKE 4 nodes 16CPU 64GB', u'id': u'crn:altus:engine:k8s:12a0079b-1591-4ca0-b721-a446bda74e67:cluster:r0/0da5e627-ee33-45c5-9179-cc6b95008d2e', u'apiStatus': u'RUNNING'}, # noqa: E501 + {u'status': u'ONLINE', u'name': u'DW-fraud', u'submitted': u'2018-10-04 08:34:39.128881', u'queue': u'group', u'user': u'r0', u'canWrite': False, u'duration': 0, u'progress': u'50 / 50', u'type': u'OpenShift 50 nodes 30CPU 2TB', u'id': u'crn:altus:engine:k8s:12a0079b-1591-4ca0-b721-a446bda74e67:cluster:r0/0da5e627-ee33-45c5-9179-cc6b95008d2e', u'apiStatus': u'RUNNING'}, # noqa: E501 ] } @@ -71,14 +63,13 @@ def apps(self, filters): 'user': app['clusterName'].split('-', 1)[0], 'progress': app.get('progress', 100), 'queue': 'group', - 'duration': ((datetime.now() - parser.parse(app['creationDate']).replace(tzinfo=None)).seconds * 1000) if app['creationDate'] else 0, + 'duration': ((datetime.now() - parser.parse(app['creationDate']).replace(tzinfo=None)).seconds * 1000) if app['creationDate'] else 0, # noqa: E501 'submitted': app['creationDate'], 'canWrite': True } for app in sorted(jobs['clusters'], key=lambda a: a['creationDate'], reverse=True)], 'total': len(jobs['clusters']) } - def app(self, appid): handle = self.api.describe_cluster(cluster_id=appid) @@ -116,20 +107,18 @@ def action(self, appid, action): elif result.get('contents') and message.get('status') != -1: message['message'] = result.get('contents') - return message; - + return message def logs(self, appid, app_type, log_name=None, is_embeddable=False): return {'logs': ''} - def profile(self, appid, app_type, app_property): return {} def _api_status(self, status): - if status in ['CREATING', 'CREATED', 'ONLINE', 'SCALING_UP', 'SCALING_DOWN', 'STARTING']: # ONLINE ... are from K8s + if status in ['CREATING', 'CREATED', 'ONLINE', 'SCALING_UP', 'SCALING_DOWN', 'STARTING']: # ONLINE ... are from K8s return 'RUNNING' elif status in ['ARCHIVING', 'COMPLETED', 'TERMINATING', 'STOPPED']: return 'SUCCEEDED' else: - return 'FAILED' # KILLED and FAILED + return 'FAILED' # KILLED and FAILED diff --git a/apps/jobbrowser/src/jobbrowser/apis/data_eng_api.py b/apps/jobbrowser/src/jobbrowser/apis/data_eng_api.py index 7e7837c0c93..da6e57c42d9 100644 --- a/apps/jobbrowser/src/jobbrowser/apis/data_eng_api.py +++ b/apps/jobbrowser/src/jobbrowser/apis/data_eng_api.py @@ -15,19 +15,14 @@ # See the License for the specific language governing permissions and # limitations under the License. -import logging import sys +import logging +from datetime import datetime, timedelta -from datetime import datetime, timedelta - -from notebook.connectors.altus import DataEngApi, DATE_FORMAT +from django.utils.translation import gettext as _ from jobbrowser.apis.base_api import Api - -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ +from notebook.connectors.altus import DATE_FORMAT, DataEngApi LOG = logging.getLogger() @@ -59,11 +54,9 @@ def apps(self, filters): 'total': len(jobs) } - def app(self, appid): return {} - def action(self, appid, action): message = {'message': '', 'status': 0} @@ -78,13 +71,11 @@ def action(self, appid, action): elif result.get('contents') and message.get('status') != -1: message['message'] = result.get('contents') - return message; - + return message def logs(self, appid, app_type, log_name=None, is_embeddable=False): return {'logs': ''} - def profile(self, appid, app_type, app_property): return {} @@ -94,7 +85,7 @@ def _api_status(self, status): elif status in ['ARCHIVING', 'COMPLETED', 'TERMINATING']: return 'SUCCEEDED' else: - return 'FAILED' # KILLED and FAILED + return 'FAILED' # KILLED and FAILED class DataEngJobApi(Api): @@ -155,15 +146,12 @@ def app(self, appid): return common - def action(self, appid, action): return {} - def logs(self, appid, app_type, log_name=None, is_embeddable=False): return {'logs': ''} - def profile(self, appid, app_type, app_property): return {} @@ -173,4 +161,4 @@ def _api_status(self, status): elif status in ['COMPLETED']: return 'SUCCEEDED' else: - return 'FAILED' # INTERRUPTED , KILLED, TERMINATED and FAILED + return 'FAILED' # INTERRUPTED , KILLED, TERMINATED and FAILED diff --git a/apps/jobbrowser/src/jobbrowser/apis/data_warehouse.py b/apps/jobbrowser/src/jobbrowser/apis/data_warehouse.py index da27bbc6b42..be1d1b68fee 100644 --- a/apps/jobbrowser/src/jobbrowser/apis/data_warehouse.py +++ b/apps/jobbrowser/src/jobbrowser/apis/data_warehouse.py @@ -15,23 +15,16 @@ # See the License for the specific language governing permissions and # limitations under the License. -import logging import sys - +import logging from datetime import datetime -from dateutil import parser +from dateutil import parser from django.utils import timezone - -from notebook.connectors.altus import AnalyticDbApi, DataWarehouse2Api +from django.utils.translation import gettext as _ from jobbrowser.apis.base_api import Api - -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - +from notebook.connectors.altus import AnalyticDbApi, DataWarehouse2Api LOG = logging.getLogger() @@ -45,8 +38,7 @@ def __init__(self, user, version=1): super(DataWarehouseClusterApi, self).__init__(user) self.version = version - self.api = DataWarehouse2Api(self.user) if version == 2 else AnalyticDbApi(self.user) - + self.api = DataWarehouse2Api(self.user) if version == 2 else AnalyticDbApi(self.user) def apps(self, filters): jobs = self.api.list_clusters() @@ -57,18 +49,17 @@ def apps(self, filters): 'name': '%(clusterName)s' % app, 'status': app['status'], 'apiStatus': self._api_status(app['status']), - 'type': '%(instanceType)s' % app, #'Altus %(workersGroupSize)sX %(instanceType)s %(cdhVersion)s' % app, + 'type': '%(instanceType)s' % app, # 'Altus %(workersGroupSize)sX %(instanceType)s %(cdhVersion)s' % app, 'user': app['clusterName'].split('-', 1)[0], 'progress': app.get('progress', 100), 'queue': 'group', - 'duration': ((datetime.now() - parser.parse(app['creationDate']).replace(tzinfo=None)).seconds * 1000) if app['creationDate'] else 0, + 'duration': ((datetime.now() - parser.parse(app['creationDate']).replace(tzinfo=None)).seconds * 1000) if app['creationDate'] else 0, # noqa: E501 'submitted': app['creationDate'], 'canWrite': True } for app in sorted(jobs['clusters'], key=lambda a: a['creationDate'], reverse=True)], 'total': len(jobs['clusters']) } - def app(self, appid): handle = self.api.describe_cluster(cluster_id=appid) @@ -104,13 +95,11 @@ def action(self, appid, action): elif result.get('contents') and message.get('status') != -1: message['message'] = result.get('contents') - return message; - + return message def logs(self, appid, app_type, log_name=None, is_embeddable=False): return {'logs': ''} - def profile(self, app_id, app_type, app_property, app_filters): return {} @@ -122,4 +111,4 @@ def _api_status(self, status): elif status in ['ARCHIVING', 'COMPLETED', 'TERMINATING', 'TERMINATED']: return 'SUCCEEDED' else: - return 'FAILED' # KILLED and FAILED + return 'FAILED' # KILLED and FAILED diff --git a/apps/jobbrowser/src/jobbrowser/apis/history.py b/apps/jobbrowser/src/jobbrowser/apis/history.py index e47a92d4423..ca5756d81a5 100644 --- a/apps/jobbrowser/src/jobbrowser/apis/history.py +++ b/apps/jobbrowser/src/jobbrowser/apis/history.py @@ -15,24 +15,18 @@ # See the License for the specific language governing permissions and # limitations under the License. -import logging import sys - +import logging from datetime import datetime + from dateutil import parser +from django.utils.translation import gettext as _ from desktop.models import Document2 -from notebook.api import _get_statement -from notebook.models import Notebook - from jobbrowser.apis.base_api import Api from jobbrowser.conf import MAX_JOB_FETCH - -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - +from notebook.api import _get_statement +from notebook.models import Notebook LOG = logging.getLogger() @@ -46,7 +40,7 @@ def apps(self, filters): for app in tasks: # Copied, Document class should have a get_history method (via method or inheritance) notebook = Notebook(document=app).get_data() - is_notification_manager = False # Supposed SQL Editor query only right now + is_notification_manager = False # Supposed SQL Editor query only right now if 'snippets' in notebook: statement = notebook['description'] if is_notification_manager else _get_statement(notebook) history = { @@ -86,7 +80,6 @@ def apps(self, filters): 'total': len(tasks) } - def app(self, appid): appid = appid.rsplit('-')[-1] @@ -108,7 +101,6 @@ def app(self, appid): } } - def action(self, app_ids, operation): # Notebook API pass @@ -116,13 +108,11 @@ def action(self, app_ids, operation): def logs(self, appid, app_type, log_name=None, is_embeddable=False): return {'logs': ''} - def profile(self, appid, app_type, app_property, app_filters): appid = appid.rsplit('-')[-1] return {} - def _api_status(self, task): if task['data']['status'] in ('expired', 'failed'): return 'FAILED' diff --git a/apps/jobbrowser/src/jobbrowser/apis/hive_query_api.py b/apps/jobbrowser/src/jobbrowser/apis/hive_query_api.py index cc6c939ddc3..8063a25eea4 100644 --- a/apps/jobbrowser/src/jobbrowser/apis/hive_query_api.py +++ b/apps/jobbrowser/src/jobbrowser/apis/hive_query_api.py @@ -14,31 +14,25 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from builtins import filter - +import sys import logging +from builtins import filter +from datetime import datetime from logging import exception -import sys -from datetime import datetime +from django.utils.translation import gettext as _ from beeswax.models import QueryHistory from desktop.lib.exceptions_renderable import PopupException from desktop.lib.python_util import current_ms_from_utc from desktop.lib.rest.http_client import HttpClient from desktop.lib.rest.resource import Resource -from notebook.models import _get_notebook_api, make_notebook, MockRequest - from jobbrowser.apis.base_api import Api - -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - +from notebook.models import MockRequest, _get_notebook_api, make_notebook LOG = logging.getLogger() + class HiveQueryApi(Api): HEADERS = {'X-Requested-By': 'das'} diff --git a/apps/jobbrowser/src/jobbrowser/apis/job_api.py b/apps/jobbrowser/src/jobbrowser/apis/job_api.py index 2c9bcd10389..9010532cef8 100644 --- a/apps/jobbrowser/src/jobbrowser/apis/job_api.py +++ b/apps/jobbrowser/src/jobbrowser/apis/job_api.py @@ -15,31 +15,26 @@ # See the License for the specific language governing permissions and # limitations under the License. +import sys import json import logging -import sys from django.utils.encoding import smart_str -from hadoop.yarn import resource_manager_api +from django.utils.translation import gettext as _ from desktop.lib.django_util import JsonResponse from desktop.lib.exceptions import MessageException from desktop.lib.exceptions_renderable import PopupException -from jobbrowser.conf import MAX_JOB_FETCH, LOG_OFFSET +from hadoop.yarn import resource_manager_api +from jobbrowser.conf import LOG_OFFSET, MAX_JOB_FETCH from jobbrowser.views import job_executor_logs, job_single_logs -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - - LOG = logging.getLogger() LOG_OFFSET_BYTES = LOG_OFFSET.get() try: - from jobbrowser.api import YarnApi as NativeYarnApi, ApplicationNotRunning, JobExpired + from jobbrowser.api import ApplicationNotRunning, JobExpired, YarnApi as NativeYarnApi from jobbrowser.apis.base_api import Api, MockDjangoRequest, _extract_query_params from jobbrowser.views import job_attempt_logs_json, kill_job, massage_job_for_json from jobbrowser.yarn_models import Application @@ -71,7 +66,7 @@ def profile(self, appid, app_type, app_property, app_filters): def _get_api(self, appid): try: - if type(appid) == list: + if type(appid) is list: return self.yarn_api elif appid.startswith('task_'): return YarnMapReduceTaskApi(self.user, appid) @@ -82,8 +77,8 @@ def _get_api(self, appid): elif appid.find('_executor_') > 0: return SparkExecutorApi(self.user, appid) else: - return self.yarn_api # application_ - except: + return self.yarn_api # application_ + except Exception: raise PopupException("Job would have failed due to which there no attempt or appattempt information available") def _set_request(self, request): @@ -134,7 +129,6 @@ def apps(self, filters): 'total': len(apps) } - def app(self, appid): try: job = NativeYarnApi(self.user).get_job(jobid=appid) @@ -151,7 +145,6 @@ def app(self, appid): LOG.exception(msg % appid) raise PopupException(_(msg) % appid, detail=e) - app = massage_job_for_json(job, user=self.user) common = { @@ -211,7 +204,6 @@ def app(self, appid): return common - def action(self, operation, app_ids): if operation['action'] == 'kill': kills = [] @@ -226,7 +218,6 @@ def action(self, operation, app_ids): else: return {} - def logs(self, appid, app_type, log_name, is_embeddable=False): logs = '' logs_list = [] @@ -238,7 +229,7 @@ def logs(self, appid, app_type, log_name, is_embeddable=False): logs = parseResponse.get('logs') logs_list = parseResponse.get('logsList') if logs and len(logs) == 4: - if app_type == 'YarnV2' and logs[0]: #logs[0] is diagnostics + if app_type == 'YarnV2' and logs[0]: # logs[0] is diagnostics logs = logs[0] else: logs = logs[1] @@ -254,7 +245,6 @@ def logs(self, appid, app_type, log_name, is_embeddable=False): LOG.warning('No task attempt found for logs: %s' % smart_str(e)) return {'logs': logs, 'logsList': logs_list} - def profile(self, appid, app_type, app_property, app_filters): if app_type == 'MAPREDUCE': if app_property == 'tasks': @@ -286,7 +276,8 @@ def _api_status(self, status): elif status == 'SUCCEEDED': return 'SUCCEEDED' else: - return 'FAILED' # FAILED, KILLED + return 'FAILED' # FAILED, KILLED + class YarnAttemptApi(Api): @@ -297,7 +288,6 @@ def __init__(self, user, app_id): self.task_id = '_'.join(app_id.replace(start, 'task_').split('_')[:5]) self.attempt_id = app_id.split('_')[3] - def apps(self): attempts = NativeYarnApi(self.user).get_task(jobid=self.app_id, task_id=self.task_id).attempts @@ -306,7 +296,6 @@ def apps(self): 'total': len(attempts) } - def app(self, appid): task = NativeYarnApi(self.user).get_task(jobid=self.app_id, task_id=self.task_id).get_attempt(self.attempt_id) @@ -319,7 +308,6 @@ def app(self, appid): return common - def logs(self, appid, app_type, log_name, is_embeddable=False): if log_name == 'default': log_name = 'stdout' @@ -329,46 +317,44 @@ def logs(self, appid, app_type, log_name, is_embeddable=False): return {'progress': 0, 'logs': syslog if log_name == 'syslog' else stderr if log_name == 'stderr' else stdout} - def profile(self, appid, app_type, app_property, app_filters): if app_property == 'counters': return NativeYarnApi(self.user).get_task(jobid=self.app_id, task_id=self.task_id).get_attempt(self.attempt_id).counters return {} - def _massage_task(self, task): return { - #"elapsedMergeTime" : task.elapsedMergeTime, - #"shuffleFinishTime" : task.shuffleFinishTime, + # "elapsedMergeTime" : task.elapsedMergeTime, + # "shuffleFinishTime" : task.shuffleFinishTime, 'id': task.appAttemptId if hasattr(task, 'appAttemptId') else '', 'appAttemptId': task.appAttemptId if hasattr(task, 'appAttemptId') else '', 'blacklistedNodes': task.blacklistedNodes if hasattr(task, 'blacklistedNodes') else '', - 'containerId' : task.containerId if hasattr(task, 'containerId') else '', + 'containerId': task.containerId if hasattr(task, 'containerId') else '', 'diagnostics': task.diagnostics if hasattr(task, 'diagnostics') else '', - "startTimeFormatted" : task.startTimeFormatted if hasattr(task, 'startTimeFormatted') else '', - "startTime" : int(task.startTime) if hasattr(task, 'startTime') else '', - "finishTime" : int(task.finishedTime) if hasattr(task, 'finishedTime') else '', - "finishTimeFormatted" : task.finishTimeFormatted if hasattr(task, 'finishTimeFormatted') else '', - "type" : task.type + '_ATTEMPT' if hasattr(task, 'type') else '', + "startTimeFormatted": task.startTimeFormatted if hasattr(task, 'startTimeFormatted') else '', + "startTime": int(task.startTime) if hasattr(task, 'startTime') else '', + "finishTime": int(task.finishedTime) if hasattr(task, 'finishedTime') else '', + "finishTimeFormatted": task.finishTimeFormatted if hasattr(task, 'finishTimeFormatted') else '', + "type": task.type + '_ATTEMPT' if hasattr(task, 'type') else '', 'nodesBlacklistedBySystem': task.nodesBlacklistedBySystem if hasattr(task, 'nodesBlacklistedBySystem') else '', 'nodeId': task.nodeId if hasattr(task, 'nodeId') else '', 'nodeHttpAddress': task.nodeHttpAddress if hasattr(task, 'nodeHttpAddress') else '', 'logsLink': task.logsLink if hasattr(task, 'logsLink') else '', "app_id": self.app_id, "task_id": self.task_id, - 'duration' : task.duration if hasattr(task, 'duration') else '', - 'durationFormatted' : task.duration if hasattr(task, 'durationFormatted') else '', + 'duration': task.duration if hasattr(task, 'duration') else '', + 'durationFormatted': task.duration if hasattr(task, 'durationFormatted') else '', 'state': task.status if hasattr(task, 'status') else '' } + class YarnMapReduceTaskApi(Api): def __init__(self, user, app_id): Api.__init__(self, user) self.app_id = '_'.join(app_id.replace('task_', 'application_').split('_')[:3]) - def apps(self, filters): filter_params = { 'task_types': None, @@ -403,7 +389,6 @@ def apps(self, filters): 'total': len(tasks) } - def app(self, appid): task = NativeYarnApi(self.user).get_task(jobid=self.app_id, task_id=appid) @@ -417,7 +402,6 @@ def app(self, appid): return common - def logs(self, appid, app_type, log_name, is_embeddable=False): if log_name == 'default': log_name = 'stdout' @@ -430,7 +414,6 @@ def logs(self, appid, app_type, log_name, is_embeddable=False): logs = '' return {'progress': 0, 'logs': logs} - def profile(self, appid, app_type, app_property, app_filters): if app_property == 'attempts': return { @@ -461,7 +444,7 @@ def _api_status(self, status): elif status == 'SUCCEEDED': return 'SUCCEEDED' else: - return 'FAILED' # FAILED, KILLED + return 'FAILED' # FAILED, KILLED class YarnMapReduceTaskAttemptApi(Api): @@ -473,7 +456,6 @@ def __init__(self, user, app_id): self.task_id = '_'.join(app_id.replace(start, 'task_').split('_')[:5]) self.attempt_id = app_id - def apps(self): attempts = NativeYarnApi(self.user).get_task(jobid=self.app_id, task_id=self.task_id).attempts @@ -482,7 +464,6 @@ def apps(self): 'total': len(attempts) } - def app(self, appid): task = NativeYarnApi(self.user).get_task(jobid=self.app_id, task_id=self.task_id).get_attempt(self.attempt_id) @@ -495,7 +476,6 @@ def app(self, appid): return common - def logs(self, appid, app_type, log_name, is_embeddable=False): if log_name == 'default': log_name = 'stdout' @@ -505,43 +485,40 @@ def logs(self, appid, app_type, log_name, is_embeddable=False): return {'progress': 0, 'logs': syslog if log_name == 'syslog' else stderr if log_name == 'stderr' else stdout} - def profile(self, appid, app_type, app_property, app_filters): if app_property == 'counters': return NativeYarnApi(self.user).get_task(jobid=self.app_id, task_id=self.task_id).get_attempt(self.attempt_id).counters return {} - def _api_status(self, status): if status in ['NEW', 'SUBMITTED', 'ACCEPTED', 'RUNNING']: return 'RUNNING' elif status == 'SUCCEEDED': return 'SUCCEEDED' else: - return 'FAILED' # FAILED, KILLED - + return 'FAILED' # FAILED, KILLED def _massage_task(self, task): return { - #"elapsedMergeTime" : task.elapsedMergeTime, - #"shuffleFinishTime" : task.shuffleFinishTime, - "assignedContainerId" : task.assignedContainerId if hasattr(task, 'assignedContainerId') else task.amContainerId if hasattr(task, 'amContainerId') else '', - "progress" : task.progress if hasattr(task, 'progress') else '', - "elapsedTime" : task.elapsedTime if hasattr(task, 'elapsedTime') else '', - "state" : task.state if hasattr(task, 'state') else task.appAttemptState if hasattr(task, 'appAttemptState') else '', - #"elapsedShuffleTime" : task.elapsedShuffleTime, - #"mergeFinishTime" : task.mergeFinishTime, - "rack" : task.rack if hasattr(task, 'rack') else '', - #"elapsedReduceTime" : task.elapsedReduceTime, - "nodeHttpAddress" : task.nodeHttpAddress if hasattr(task, 'nodeHttpAddress') else '', - "type" : task.type + '_ATTEMPT' if hasattr(task, 'type') else '', - "startTime" : task.startTime if hasattr(task, 'startTime') else '', - "id" : task.id if hasattr(task, 'id') else task.appAttemptId if hasattr(task, 'appAttemptId') else '', - "finishTime" : task.finishTime if hasattr(task, 'finishTime') else int(task.finishedTime) if hasattr(task, 'finishedTime') else '', + # "elapsedMergeTime" : task.elapsedMergeTime, + # "shuffleFinishTime" : task.shuffleFinishTime, + "assignedContainerId": task.assignedContainerId if hasattr(task, 'assignedContainerId') else task.amContainerId if hasattr(task, 'amContainerId') else '', # noqa: E501 + "progress": task.progress if hasattr(task, 'progress') else '', + "elapsedTime": task.elapsedTime if hasattr(task, 'elapsedTime') else '', + "state": task.state if hasattr(task, 'state') else task.appAttemptState if hasattr(task, 'appAttemptState') else '', + # "elapsedShuffleTime" : task.elapsedShuffleTime, + # "mergeFinishTime" : task.mergeFinishTime, + "rack": task.rack if hasattr(task, 'rack') else '', + # "elapsedReduceTime" : task.elapsedReduceTime, + "nodeHttpAddress": task.nodeHttpAddress if hasattr(task, 'nodeHttpAddress') else '', + "type": task.type + '_ATTEMPT' if hasattr(task, 'type') else '', + "startTime": task.startTime if hasattr(task, 'startTime') else '', + "id": task.id if hasattr(task, 'id') else task.appAttemptId if hasattr(task, 'appAttemptId') else '', + "finishTime": task.finishTime if hasattr(task, 'finishTime') else int(task.finishedTime) if hasattr(task, 'finishedTime') else '', "app_id": self.app_id, "task_id": self.task_id, - 'apiStatus': self._api_status(task.state) if hasattr(task, 'state') else self._api_status(task.appAttemptState) if hasattr(task, 'appAttemptState') else '', + 'apiStatus': self._api_status(task.state) if hasattr(task, 'state') else self._api_status(task.appAttemptState) if hasattr(task, 'appAttemptState') else '', # noqa: E501 'host': task.host if hasattr(task, 'host') else '', 'rpcPort': task.rpcPort if hasattr(task, 'rpcPort') else '', 'diagnosticsInfo': task.diagnosticsInfo if hasattr(task, 'diagnosticsInfo') else '' diff --git a/apps/jobbrowser/src/jobbrowser/apis/livy_api.py b/apps/jobbrowser/src/jobbrowser/apis/livy_api.py index 80173079295..3e5ec493718 100644 --- a/apps/jobbrowser/src/jobbrowser/apis/livy_api.py +++ b/apps/jobbrowser/src/jobbrowser/apis/livy_api.py @@ -15,17 +15,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -import logging import sys +import logging -from spark.livy_client import get_api +from django.utils.translation import gettext as _ from jobbrowser.apis.base_api import Api - -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ +from spark.livy_client import get_api LOG = logging.getLogger() @@ -53,7 +49,6 @@ def apps(self, filters): 'total': jobs['total'] } - def app(self, appid): appid = appid.rsplit('-')[-1] api = get_api(self.user) @@ -76,15 +71,12 @@ def app(self, appid): } } - def action(self, appid, action): return {} - def logs(self, appid, app_type, log_name=None, is_embeddable=False): return {'logs': ''} - def profile(self, appid, app_type, app_property, app_filters): appid = appid.rsplit('-')[-1] @@ -95,14 +87,13 @@ def profile(self, appid, app_type, app_property, app_filters): else: return {} - def _api_status(self, status): if status in ['CREATING', 'CREATED', 'TERMINATING']: return 'RUNNING' elif status in ['ARCHIVING', 'COMPLETED']: return 'SUCCEEDED' else: - return 'FAILED' # KILLED and FAILED + return 'FAILED' # KILLED and FAILED class LivyJobApi(Api): @@ -151,15 +142,12 @@ def app(self, appid): return common - def action(self, appid, action): return {} - def logs(self, appid, app_type, log_name=None, is_embeddable=False): return {'logs': ''} - def profile(self, appid, app_type, app_property): return {} @@ -169,4 +157,4 @@ def _api_status(self, status): elif status in ['COMPLETED']: return 'SUCCEEDED' else: - return 'FAILED' # INTERRUPTED , KILLED, TERMINATED and FAILED + return 'FAILED' # INTERRUPTED , KILLED, TERMINATED and FAILED diff --git a/apps/jobbrowser/src/jobbrowser/apis/query_api.py b/apps/jobbrowser/src/jobbrowser/apis/query_api.py index a7fe3f3dfbf..a92c6fa5eb2 100644 --- a/apps/jobbrowser/src/jobbrowser/apis/query_api.py +++ b/apps/jobbrowser/src/jobbrowser/apis/query_api.py @@ -15,38 +15,32 @@ # See the License for the specific language governing permissions and # limitations under the License. -from builtins import filter -from builtins import range -import itertools -import logging +import os import re import sys import time +import logging +import itertools +from builtins import filter, range from datetime import datetime +from urllib.parse import urlparse + import pytz from babel import localtime -import os - -from urllib.parse import urlparse +from django.utils.translation import gettext as _ from desktop.lib import export_csvxls from impala.conf import COORDINATOR_UI_SPNEGO +from jobbrowser.apis.base_api import Api from libanalyze import analyze as analyzer, rules from notebook.conf import ENABLE_QUERY_ANALYSIS -from jobbrowser.apis.base_api import Api - -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - -ANALYZER = rules.TopDownAnalysis() # We need to parse some files so save as global +ANALYZER = rules.TopDownAnalysis() # We need to parse some files so save as global LOG = logging.getLogger() try: - from beeswax.models import Session, Compute - from impala.server import get_api as get_impalad_api, _get_impala_server_url + from beeswax.models import Compute, Session + from impala.server import _get_impala_server_url, get_api as get_impalad_api except ImportError as e: LOG.exception('Some application are not enabled: %s' % e) @@ -65,7 +59,8 @@ def _get_api(user, cluster=None): server_url = _get_impala_server_url(session) return get_impalad_api(user=user, url=server_url) -def _convert_to_6_digit_ms_local_time(start_time): + +def _convert_to_6_digit_ms_local_time(start_time): if '.' in start_time: time, microseconds = start_time.split('.') if len(microseconds) > 6: @@ -73,7 +68,7 @@ def _convert_to_6_digit_ms_local_time(start_time): start_time = '.'.join([time, microseconds]) else: start_time = f'{start_time}.000000' - + local_tz = pytz.timezone(os.environ.get('TZ', 'UTC')) # Convert to datetime object in UTC, convert to provided timezone, and then format back into a string return (datetime.strptime(start_time, "%Y-%m-%d %H:%M:%S.%f") @@ -81,6 +76,7 @@ def _convert_to_6_digit_ms_local_time(start_time): .astimezone(local_tz) .strftime("%Y-%m-%d %H:%M:%S.%f")) + class QueryApi(Api): def __init__(self, user, impala_api=None, cluster=None): @@ -96,7 +92,7 @@ def apps(self, filters): filter_list = self._get_filter_list(filters) jobs_iter = itertools.chain(jobs['in_flight_queries'], jobs['completed_queries']) - jobs_iter_filtered = self._n_filter(filter_list, jobs_iter) + jobs_iter_filtered = self._n_filter(filter_list, jobs_iter) apps = { 'apps': sorted([{ @@ -130,8 +126,8 @@ def apps(self, filters): def _time_in_ms_groups(self, groups): time = 0 for x in range(0, len(groups), 3): - if groups[x+1]: - time += self._time_in_ms(groups[x+1], groups[x+2]) + if groups[x + 1]: + time += self._time_in_ms(groups[x + 1], groups[x + 2]) return time def _time_in_ms(self, time, period): @@ -142,9 +138,9 @@ def _time_in_ms(self, time, period): elif period == 's': return float(time) * 1000 elif period == 'm': - return float(time) * 60000 #1000*60 + return float(time) * 60000 # 1000*60 elif period == 'h': - return float(time) * 3600000 #1000*60*60 + return float(time) * 3600000 # 1000*60*60 elif period == 'd': return float(time) * 86400000 # 1000*60*60*24 else: @@ -164,8 +160,8 @@ def app(self, appid): parsed_api_url = urlparse(self.api.url) app.update({ - 'progress': float(progress_groups.group(1)) \ - if progress_groups and progress_groups.group(1) else 100 \ + 'progress': float(progress_groups.group(1)) + if progress_groups and progress_groups.group(1) else 100 if self._api_status(app.get('status')) in ['SUCCEEDED', 'FAILED'] else 1, 'type': 'queries', 'doc_url': '%s/query_plan?query_id=%s' % (self.api.url, appid) if not COORDINATOR_UI_SPNEGO.get() else @@ -195,7 +191,7 @@ def action(self, appid, action): elif result.get('contents') and message.get('status') != -1: message['message'] = result.get('contents') - return message; + return message def logs(self, appid, app_type, log_name=None, is_embeddable=False): return {'logs': ''} @@ -214,12 +210,11 @@ def profile(self, appid, app_type, app_property, app_filters): else: return self._query(appid) - def profile_encoded(self, appid): return self.api.get_query_profile_encoded(query_id=appid) def _memory(self, appid, app_type, app_property, app_filters): - return self.api.get_query_memory(query_id=appid); + return self.api.get_query_memory(query_id=appid) def _metrics(self, appid): query_profile = self.api.get_query_profile_encoded(appid) @@ -253,6 +248,7 @@ def get_exchange_icon(o): return {'svg': 'hi-random'} else: return {'svg': 'hi-exchange'} + def get_sigma_icon(o): if re.search(r'streaming', o['label_detail'], re.IGNORECASE): return {'svg': 'hi-sigma'} @@ -275,6 +271,7 @@ def get_sigma_icon(o): 'ANALYTIC': {'type': 'SINGULAR', 'icon': {'svg': 'hi-timeline'}}, 'UNION': {'type': 'UNION', 'icon': {'svg': 'hi-merge'}} } + def process(node, mapping=mapping): node['id'], node['name'] = node['label'].split(':') details = mapping.get(node['name']) @@ -335,7 +332,7 @@ def make_lambda(name, value): return lambda app: app[name] == value for key, name in list(filter_names.items()): - text_filter = re.search(r"\s*("+key+")\s*:([^ ]+)", filters.get("text")) + text_filter = re.search(r"\s*(" + key + r")\s*:([^ ]+)", filters.get("text")) if text_filter and text_filter.group(1) == key: filter_list.append(make_lambda(name, text_filter.group(2).strip())) if filters.get("time"): diff --git a/apps/jobbrowser/src/jobbrowser/apis/query_api_tests.py b/apps/jobbrowser/src/jobbrowser/apis/query_api_tests.py index b51faa36058..3d9e7d149bb 100644 --- a/apps/jobbrowser/src/jobbrowser/apis/query_api_tests.py +++ b/apps/jobbrowser/src/jobbrowser/apis/query_api_tests.py @@ -16,30 +16,24 @@ # See the License for the specific language governing permissions and # limitations under the License. -import json -import logging import os -import pytest import sys +import json +import logging +from unittest.mock import Mock, patch +import pytest from django.urls import reverse from desktop.auth.backend import rewrite_user from desktop.lib.django_test_util import make_logged_in_client from impala.conf import COORDINATOR_UI_SPNEGO +from jobbrowser.apis.query_api import QueryApi, _convert_to_6_digit_ms_local_time from useradmin.models import User -from jobbrowser.apis.query_api import QueryApi -from jobbrowser.apis.query_api import _convert_to_6_digit_ms_local_time - -if sys.version_info[0] > 2: - from unittest.mock import patch, Mock -else: - from mock import patch, Mock - - LOG = logging.getLogger() + class TestConvertTo6DigitMsLocalTime(): @patch.dict(os.environ, {'TZ': 'America/New_York'}) def convert_6_digit(self): @@ -60,7 +54,7 @@ def convert_3_digit(self): expected_time = "2023-07-14 08:00:00.123000" assert expected_time == converted_time - + @patch.dict(os.environ, {'TZ': 'America/New_York'}) def convert_9_digit(self): start_time = "2023-07-14 12:00:00.123456789" @@ -79,7 +73,8 @@ def convert_0_digit(self): # America/New_York timezone is UTC-4 expected_time = "2023-07-14 08:00:00.000000" - assert expected_time == converted_time + assert expected_time == converted_time + @pytest.mark.django_db class TestApi(): @@ -88,7 +83,6 @@ def setup_method(self): self.client = make_logged_in_client(username="test", groupname="default", recreate=True, is_superuser=False) self.user = rewrite_user(User.objects.get(username="test")) - def test_download_profile(self): with patch('jobbrowser.apis.query_api._get_api') as _get_api: with patch('jobbrowser.apis.query_api.QueryApi._query_profile') as _query_profile: diff --git a/apps/jobbrowser/src/jobbrowser/apis/schedule_api.py b/apps/jobbrowser/src/jobbrowser/apis/schedule_api.py index fe7920ef68b..7e4fda8f93a 100644 --- a/apps/jobbrowser/src/jobbrowser/apis/schedule_api.py +++ b/apps/jobbrowser/src/jobbrowser/apis/schedule_api.py @@ -15,29 +15,24 @@ # See the License for the specific language governing permissions and # limitations under the License. -from builtins import object -import logging -import json import sys +import json +import logging +from builtins import object -from liboozie.oozie_api import get_oozie -from liboozie.utils import format_time +from django.utils.translation import gettext as _ from jobbrowser.apis.base_api import Api, MockDjangoRequest -from jobbrowser.apis.workflow_api import _manage_oozie_job, _filter_oozie_jobs - -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - +from jobbrowser.apis.workflow_api import _filter_oozie_jobs, _manage_oozie_job +from liboozie.oozie_api import get_oozie +from liboozie.utils import format_time LOG = logging.getLogger() try: from oozie.conf import OOZIE_JOBS_COUNT - from oozie.views.dashboard import list_oozie_coordinator, get_oozie_job_log, massaged_oozie_jobs_for_json, has_job_edition_permission + from oozie.views.dashboard import get_oozie_job_log, has_job_edition_permission, list_oozie_coordinator, massaged_oozie_jobs_for_json except Exception as e: LOG.warning('Some application are not enabled: %s' % e) @@ -71,7 +66,6 @@ def apps(self, filters): 'total': jobs.total } - def app(self, appid, offset=1, filters={}): oozie_api = get_oozie(self.user) coordinator = oozie_api.get_coordinator(jobid=appid) @@ -79,14 +73,14 @@ def app(self, appid, offset=1, filters={}): mock_get = MockGet() mock_get.update('offset', offset) - """ + """ The Oozie job api supports one or more "status" parameters. The valid status values are: - + WAITING, READY, SUBMITTED, RUNNING, SUSPENDED, TIMEDOUT, SUCCEEDED, KILLED, FAILED, IGNORED, SKIPPED - + The job browser UI has a generic filter mechanism that is re-used across all different type of jobs, that parameter is called "states" and it only has three possible values: completed, running or failed - + Here we adapt this to fit the API requirements, "state" becomes "status" and the values are translated based on how it's been done historically (for instance list_oozie_coordinator.mako around line 725). """ @@ -125,18 +119,15 @@ def app(self, appid, offset=1, filters={}): return common - def action(self, app_ids, action): return _manage_oozie_job(self.user, action, app_ids) - def logs(self, appid, app_type, log_name=None, is_embeddable=False): request = MockDjangoRequest(self.user) data = get_oozie_job_log(request, job_id=appid) return {'logs': json.loads(data.content)['log']} - def profile(self, appid, app_type, app_property, app_filters): if app_property == 'xml': oozie_api = get_oozie(self.user) @@ -199,7 +190,7 @@ def __ini__(self, statuses): @property def properties(self): - if self._prop == None: + if self._prop is None: self._prop = {} return self._prop diff --git a/apps/jobbrowser/src/jobbrowser/apis/schedule_hive.py b/apps/jobbrowser/src/jobbrowser/apis/schedule_hive.py index 1686f6c4a09..bc560fbf254 100644 --- a/apps/jobbrowser/src/jobbrowser/apis/schedule_hive.py +++ b/apps/jobbrowser/src/jobbrowser/apis/schedule_hive.py @@ -15,21 +15,16 @@ # See the License for the specific language governing permissions and # limitations under the License. -import logging import sys - +import logging from datetime import datetime from dateutil import parser -from desktop.lib.scheduler.lib.hive import HiveSchedulerApi +from django.utils.translation import gettext as _ +from desktop.lib.scheduler.lib.hive import HiveSchedulerApi from jobbrowser.apis.base_api import Api -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - LOG = logging.getLogger() @@ -58,7 +53,6 @@ def apps(self, filters): 'total': len(tasks) } - def app(self, appid): appid = appid.rsplit('-')[-1] api = HiveSchedulerApi(user=self.user) @@ -83,7 +77,6 @@ def app(self, appid): } } - def action(self, app_ids, operation): api = HiveSchedulerApi(user=self.user) @@ -103,11 +96,9 @@ def action(self, app_ids, operation): 'message': _('%s signal sent to %s') % (operation['action'], operations) } - def logs(self, appid, app_type, log_name=None, is_embeddable=False): return {'logs': ''} - def profile(self, appid, app_type, app_property, app_filters): appid = appid.rsplit('-')[-1] @@ -124,13 +115,11 @@ def profile(self, appid, app_type, app_property, app_filters): else: return {} - def _api_status(self, status): if status == 'RUNNING': return 'RUNNING' else: return 'PAUSED' - def _massage_status(self, task): return 'RUNNING' if task['enabled'] else 'PAUSED' diff --git a/apps/jobbrowser/src/jobbrowser/apis/workflow_api.py b/apps/jobbrowser/src/jobbrowser/apis/workflow_api.py index b2b741229d2..84cfe9f2248 100644 --- a/apps/jobbrowser/src/jobbrowser/apis/workflow_api.py +++ b/apps/jobbrowser/src/jobbrowser/apis/workflow_api.py @@ -15,26 +15,30 @@ # See the License for the specific language governing permissions and # limitations under the License. -import logging -import json import sys +import json +import logging -from jobbrowser.apis.base_api import Api, MockDjangoRequest, _extract_query_params, is_linkable, hdfs_link_js -from liboozie.oozie_api import get_oozie +from django.utils.translation import gettext as _ -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ +from jobbrowser.apis.base_api import Api, MockDjangoRequest, _extract_query_params, hdfs_link_js, is_linkable +from liboozie.oozie_api import get_oozie LOG = logging.getLogger() try: + from oozie.conf import ENABLE_OOZIE_BACKEND_FILTERING, OOZIE_JOBS_COUNT from oozie.forms import ParameterForm - from oozie.conf import OOZIE_JOBS_COUNT, ENABLE_OOZIE_BACKEND_FILTERING - from oozie.views.dashboard import get_oozie_job_log, list_oozie_workflow, manage_oozie_jobs, bulk_manage_oozie_jobs, \ - has_dashboard_jobs_access, massaged_oozie_jobs_for_json, has_job_edition_permission + from oozie.views.dashboard import ( + bulk_manage_oozie_jobs, + get_oozie_job_log, + has_dashboard_jobs_access, + has_job_edition_permission, + list_oozie_workflow, + manage_oozie_jobs, + massaged_oozie_jobs_for_json, + ) has_oozie_installed = True OOZIE_JOBS_COUNT_LIMIT = OOZIE_JOBS_COUNT.get() except Exception as e: @@ -54,7 +58,7 @@ def apps(self, filters): wf_list = oozie_api.get_workflows(**kwargs) return { - 'apps':[{ + 'apps': [{ 'id': app['id'], 'name': app['appName'], 'status': app['status'], @@ -70,7 +74,6 @@ def apps(self, filters): 'total': wf_list.total } - def app(self, appid): if '@' in appid: return WorkflowActionApi(self.user).app(appid) @@ -103,11 +106,9 @@ def app(self, appid): return common - def action(self, app_ids, action): return _manage_oozie_job(self.user, action, app_ids) - def logs(self, appid, app_type, log_name=None, is_embeddable=False): if '@' in appid: return WorkflowActionApi(self.user).logs(appid, app_type) @@ -117,7 +118,6 @@ def logs(self, appid, app_type, log_name=None, is_embeddable=False): return {'logs': json.loads(data.content)['log']} - def profile(self, appid, app_type, app_property, app_filters): if '@' in appid: return WorkflowActionApi(self.self.user).profile(appid, app_type, app_property) @@ -133,7 +133,9 @@ def profile(self, appid, app_type, app_property, app_filters): workflow = oozie_api.get_job(jobid=appid) return { 'properties': workflow.conf_dict, - 'properties_display': [{'name': key, 'value': val, 'link': is_linkable(key, val) and hdfs_link_js(val)} for key, val in workflow.conf_dict.items()], + 'properties_display': [ + {'name': key, 'value': val, 'link': is_linkable(key, val) and hdfs_link_js(val)} for key, val in workflow.conf_dict.items() + ], } return {} @@ -146,7 +148,7 @@ def _api_status(self, status): elif status == 'SUCCEEDED': return 'SUCCEEDED' else: - return 'FAILED' # KILLED and FAILED + return 'FAILED' # KILLED and FAILED def _get_variables(self, workflow): parameters = [] @@ -185,7 +187,6 @@ def app(self, appid): return common - def logs(self, appid, app_type, log_name=None): return {'progress': 0, 'logs': ''} @@ -229,7 +230,7 @@ def _filter_oozie_jobs(user, filters, kwargs): kwargs['cnt'] = min(filters['pagination']['limit'], OOZIE_JOBS_COUNT_LIMIT) if filters.get('states'): - states_filters = {'running': ['RUNNING', 'PREP', 'SUSPENDED'], 'completed': ['SUCCEEDED'], 'failed': ['FAILED', 'KILLED'],} + states_filters = {'running': ['RUNNING', 'PREP', 'SUSPENDED'], 'completed': ['SUCCEEDED'], 'failed': ['FAILED', 'KILLED'], } for _state in filters.get('states'): for _status in states_filters[_state]: kwargs['filters'].extend([('status', _status)]) diff --git a/apps/jobbrowser/src/jobbrowser/conf.py b/apps/jobbrowser/src/jobbrowser/conf.py index 2964ba27703..761a155787d 100644 --- a/apps/jobbrowser/src/jobbrowser/conf.py +++ b/apps/jobbrowser/src/jobbrowser/conf.py @@ -17,13 +17,9 @@ import sys -from desktop.lib.conf import Config, coerce_bool, ConfigSection - -if sys.version_info[0] > 2: - from django.utils.translation import gettext_lazy as _ -else: - from django.utils.translation import ugettext_lazy as _ +from django.utils.translation import gettext_lazy as _ +from desktop.lib.conf import Config, ConfigSection, coerce_bool SHARE_JOBS = Config( key='share_jobs', @@ -84,10 +80,12 @@ default=False ) + def is_query_store_url_set(): """Check if query store url is configured""" return QUERY_STORE.SERVER_URL.get() != '' + QUERY_STORE = ConfigSection( key="query_store", help=_("Configs for managing query store interface."), diff --git a/apps/jobbrowser/src/jobbrowser/models.py b/apps/jobbrowser/src/jobbrowser/models.py index 943ccbb8a1c..a3e6116af10 100644 --- a/apps/jobbrowser/src/jobbrowser/models.py +++ b/apps/jobbrowser/src/jobbrowser/models.py @@ -16,32 +16,26 @@ # limitations under the License. from __future__ import division -from builtins import str -from builtins import object -import datetime -import logging -import math -import functools + import re import sys +import math +import logging +import datetime +import functools +from builtins import object, str from django.db import connection, models from django.urls import reverse from django.utils.html import escape +from django.utils.translation import gettext as _ from desktop.auth.backend import is_admin from desktop.conf import REST_CONN_TIMEOUT from desktop.lib import i18n from desktop.lib.view_util import format_duration_in_millis, location_to_url - from jobbrowser.conf import DISABLE_KILLING_JOBS -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - - LOG = logging.getLogger() @@ -49,21 +43,24 @@ def can_view_job(username, job): acl = get_acls(job).get('mapreduce.job.acl-view-job', '') return acl == '*' or username in acl.split(',') + def can_modify_job(username, job): acl = get_acls(job).get('mapreduce.job.acl-modify-job', '') return acl == '*' or username in acl.split(',') + def get_acls(job): if job.is_mr2: try: acls = job.acls - except: + except Exception: LOG.exception('failed to get acls') acls = {} return acls else: return job.full_job_conf + def can_kill_job(self, user): if DISABLE_KILLING_JOBS.get(): return False @@ -102,7 +99,7 @@ def _make_links(cls, log, is_embeddable=False): def _replace_hdfs_link(self, is_embeddable=False, match=None): try: return '%s' % (location_to_url(match.group(0), strict=False, is_embeddable=is_embeddable), match.group(0)) - except: + except Exception: LOG.exception('failed to replace hdfs links: %s' % (match.groups(),)) return match.group(0) @@ -110,7 +107,7 @@ def _replace_hdfs_link(self, is_embeddable=False, match=None): def _replace_mr_link(self, match): try: return '%s' % (reverse('jobbrowser:jobbrowser.views.single_job', kwargs={'job': match.group(0)}), match.group(0)) - except: + except Exception: LOG.exception('failed to replace mr links: %s' % (match.groups(),)) return match.group(0) diff --git a/apps/jobbrowser/src/jobbrowser/templatetags/unix_ms_to_datetime.py b/apps/jobbrowser/src/jobbrowser/templatetags/unix_ms_to_datetime.py index 051ced3b5e4..5c77fa5c21d 100644 --- a/apps/jobbrowser/src/jobbrowser/templatetags/unix_ms_to_datetime.py +++ b/apps/jobbrowser/src/jobbrowser/templatetags/unix_ms_to_datetime.py @@ -16,24 +16,23 @@ # limitations under the License. from __future__ import division -import datetime -import django -import math + import sys +import math +import datetime -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ +import django +from django.utils.translation import gettext as _ register = django.template.Library() + @register.filter(name='unix_ms_to_datetime') def unix_ms_to_datetime(unixtime): """unixtime is seconds since the epoch""" if unixtime: return datetime.datetime.fromtimestamp(math.floor(unixtime / 1000)) return _("No time") -unix_ms_to_datetime.is_safe = True +unix_ms_to_datetime.is_safe = True diff --git a/apps/jobbrowser/src/jobbrowser/tests.py b/apps/jobbrowser/src/jobbrowser/tests.py index 6bf3ce08a50..0b683e422cc 100644 --- a/apps/jobbrowser/src/jobbrowser/tests.py +++ b/apps/jobbrowser/src/jobbrowser/tests.py @@ -16,50 +16,45 @@ # limitations under the License. -from builtins import range -from builtins import object -import json -import logging -import pytest import re import sys +import json import time +import logging import unittest -import pytz +from builtins import object, range +from datetime import datetime -from django.urls import reverse +import pytz +import pytest +from babel import localtime from django.test import TestCase +from django.urls import reverse from desktop.lib.django_test_util import make_logged_in_client -from desktop.lib.test_utils import grant_access, add_to_group +from desktop.lib.test_utils import add_to_group, grant_access from desktop.models import Document from hadoop import cluster from hadoop.conf import YARN_CLUSTERS from hadoop.pseudo_hdfs4 import is_live_cluster -from hadoop.yarn import resource_manager_api, mapreduce_api, history_server_api, spark_history_server_api +from hadoop.yarn import history_server_api, mapreduce_api, resource_manager_api, spark_history_server_api from hadoop.yarn.spark_history_server_api import SparkHistoryServerApi -from liboozie.oozie_api_tests import OozieServerProvider -from oozie.models import Workflow -from useradmin.models import User - from jobbrowser import views from jobbrowser.api import get_api -from jobbrowser.apis.query_api import QueryApi from jobbrowser.apis import job_api +from jobbrowser.apis.query_api import QueryApi from jobbrowser.conf import SHARE_JOBS -from jobbrowser.models import can_view_job, can_modify_job, LinkJobLogs +from jobbrowser.models import LinkJobLogs, can_modify_job, can_view_job from jobbrowser.yarn_models import SparkJob - -from datetime import datetime -from babel import localtime - +from liboozie.oozie_api_tests import OozieServerProvider +from oozie.models import Workflow +from useradmin.models import User LOG = logging.getLogger() _INITIALIZED = False class TestBrowser(object): - def test_format_counter_name(self): assert "Foo Bar" == views.format_counter_name("fooBar") assert "Foo Bar Baz" == views.format_counter_name("fooBarBaz") @@ -67,6 +62,7 @@ def test_format_counter_name(self): assert "Foo." == views.format_counter_name("foo.") assert "A Bbb Ccc" == views.format_counter_name("A_BBB_CCC") + def get_hadoop_job_id(oozie_api, oozie_jobid, action_index=1, timeout=60, step=5): hadoop_job_id = None start = time.time() @@ -84,7 +80,6 @@ def get_hadoop_job_id(oozie_api, oozie_jobid, action_index=1, timeout=60, step=5 @pytest.mark.requires_hadoop @pytest.mark.integration class TestJobBrowserWithHadoop(TestCase, OozieServerProvider): - @classmethod def setup_class(cls): OozieServerProvider.setup_class() @@ -108,14 +103,17 @@ def setup_class(cls): # Run the sleep example, since it doesn't require user home directory design_id = cls.design.id - response = cls.client.post(reverse('oozie:submit_workflow', - args=[design_id]), - data={u'form-MAX_NUM_FORMS': [u''], - u'form-INITIAL_FORMS': [u'1'], - u'form-0-name': [u'REDUCER_SLEEP_TIME'], - u'form-0-value': [u'1'], - u'form-TOTAL_FORMS': [u'1']}, - follow=True) + response = cls.client.post( + reverse('oozie:submit_workflow', args=[design_id]), + data={ + 'form-MAX_NUM_FORMS': [''], + 'form-INITIAL_FORMS': ['1'], + 'form-0-name': ['REDUCER_SLEEP_TIME'], + 'form-0-value': ['1'], + 'form-TOTAL_FORMS': ['1'], + }, + follow=True, + ) oozie_jobid = response.context[0]['oozie_workflow'].id OozieServerProvider.wait_until_completion(oozie_jobid) @@ -129,7 +127,7 @@ def teardown_class(cls): Workflow.objects.filter(name__contains=cls.username).delete() # Remove user home directories. cls.cluster.fs.do_as_superuser(cls.cluster.fs.rmtree, cls.home_dir) - except: + except Exception: LOG.exception('failed to teardown %s' % cls.home_dir) cls.cluster.fs.setuser(cls.prev_user) @@ -137,28 +135,32 @@ def teardown_class(cls): def create_design(cls): job_name = '%s_%s' % (cls.username, 'sleep_job') if not Document.objects.available_docs(Workflow, cls.user).filter(name=job_name).exists(): - response = cls.client.post(reverse('jobsub:jobsub.views.new_design', - kwargs={'node_type': 'mapreduce'}), - data={'name': job_name, - 'description': '', - 'node_type': 'mapreduce', - 'jar_path': '/user/hue/oozie/workspaces/lib/hadoop-examples.jar', - 'prepares': '[]', - 'files': '[]', - 'archives': '[]', - 'job_properties': ('[{\"name\":\"mapred.reduce.tasks\",\"value\":\"1\"},' - '{\"name\":\"mapred.mapper.class\",\"value\":\"org.apache.hadoop.examples.SleepJob\"},' - '{\"name\":\"mapred.reducer.class\",\"value\":\"org.apache.hadoop.examples.SleepJob\"},' - '{\"name\":\"mapred.mapoutput.key.class\",\"value\":\"org.apache.hadoop.io.IntWritable\"},' - '{\"name\":\"mapred.mapoutput.value.class\",\"value\":\"org.apache.hadoop.io.NullWritable\"},' - '{\"name\":\"mapred.output.format.class\",\"value\":\"org.apache.hadoop.mapred.lib.NullOutputFormat\"},' - '{\"name\":\"mapred.input.format.class\",\"value\":\"org.apache.hadoop.examples.SleepJob$SleepInputFormat\"},' - '{\"name\":\"mapred.partitioner.class\",\"value\":\"org.apache.hadoop.examples.SleepJob\"},' - '{\"name\":\"mapred.speculative.execution\",\"value\":\"false\"},' - '{\"name\":\"sleep.job.map.sleep.time\",\"value\":\"0\"},' - '{\"name\":\"sleep.job.reduce.sleep.time\",\"value\":\"${REDUCER_SLEEP_TIME}\"}]') + response = cls.client.post( + reverse('jobsub:jobsub.views.new_design', kwargs={'node_type': 'mapreduce'}), + data={ + 'name': job_name, + 'description': '', + 'node_type': 'mapreduce', + 'jar_path': '/user/hue/oozie/workspaces/lib/hadoop-examples.jar', + 'prepares': '[]', + 'files': '[]', + 'archives': '[]', + 'job_properties': ( + '[{"name":"mapred.reduce.tasks","value":"1"},' + '{"name":"mapred.mapper.class","value":"org.apache.hadoop.examples.SleepJob"},' + '{"name":"mapred.reducer.class","value":"org.apache.hadoop.examples.SleepJob"},' + '{"name":"mapred.mapoutput.key.class","value":"org.apache.hadoop.io.IntWritable"},' + '{"name":"mapred.mapoutput.value.class","value":"org.apache.hadoop.io.NullWritable"},' + '{"name":"mapred.output.format.class","value":"org.apache.hadoop.mapred.lib.NullOutputFormat"},' + '{"name":"mapred.input.format.class","value":"org.apache.hadoop.examples.SleepJob$SleepInputFormat"},' + '{"name":"mapred.partitioner.class","value":"org.apache.hadoop.examples.SleepJob"},' + '{"name":"mapred.speculative.execution","value":"false"},' + '{"name":"sleep.job.map.sleep.time","value":"0"},' + '{"name":"sleep.job.reduce.sleep.time","value":"${REDUCER_SLEEP_TIME}"}]' + ), }, - HTTP_X_REQUESTED_WITH='XMLHttpRequest') + HTTP_X_REQUESTED_WITH='XMLHttpRequest', + ) assert response.status_code == 200 return Document.objects.available_docs(Workflow, cls.user).get(name=job_name).content_object @@ -200,11 +202,13 @@ def test_failed_jobs(self): TestJobBrowserWithHadoop.cluster.fs.mkdir(TestJobBrowserWithHadoop.home_dir + "/jt-test_failed_jobs") TestJobBrowserWithHadoop.cluster.fs.mkdir(INPUT_DIR) TestJobBrowserWithHadoop.cluster.fs.rmtree(OUTPUT_DIR) - except: + except Exception: LOG.exception('failed to teardown tests') job_name = '%s_%s' % (TestJobBrowserWithHadoop.username, 'test_failed_jobs-1') - response = TestJobBrowserWithHadoop.client.post(reverse('jobsub:jobsub.views.new_design', kwargs={'node_type': 'mapreduce'}), { + response = TestJobBrowserWithHadoop.client.post( + reverse('jobsub:jobsub.views.new_design', kwargs={'node_type': 'mapreduce'}), + { 'name': [job_name], 'description': ['description test_failed_jobs-1'], 'args': '', @@ -212,24 +216,33 @@ def test_failed_jobs(self): 'prepares': '[]', 'archives': '[]', 'files': '[]', - 'job_properties': ['[{"name":"mapred.input.dir","value":"%s"},\ + 'job_properties': [ + '[{"name":"mapred.input.dir","value":"%s"},\ {"name":"mapred.output.dir","value":"%s"},\ {"name":"mapred.mapper.class","value":"org.apache.hadoop.mapred.lib.dne"},\ {"name":"mapred.combiner.class","value":"org.apache.hadoop.mapred.lib.dne"},\ - {"name":"mapred.reducer.class","value":"org.apache.hadoop.mapred.lib.dne"}]' % (INPUT_DIR, OUTPUT_DIR)] - }, HTTP_X_REQUESTED_WITH='XMLHttpRequest', follow=True) + {"name":"mapred.reducer.class","value":"org.apache.hadoop.mapred.lib.dne"}]' + % (INPUT_DIR, OUTPUT_DIR) + ], + }, + HTTP_X_REQUESTED_WITH='XMLHttpRequest', + follow=True, + ) # Submit the job design_dict = json.loads(response.content) design_id = int(design_dict['id']) - response = TestJobBrowserWithHadoop.client.post(reverse('oozie:submit_workflow', - args=[design_id]), - data={u'form-MAX_NUM_FORMS': [u''], - u'form-INITIAL_FORMS': [u'1'], - u'form-0-name': [u'REDUCER_SLEEP_TIME'], - u'form-0-value': [u'1'], - u'form-TOTAL_FORMS': [u'1']}, - follow=True) + response = TestJobBrowserWithHadoop.client.post( + reverse('oozie:submit_workflow', args=[design_id]), + data={ + 'form-MAX_NUM_FORMS': [''], + 'form-INITIAL_FORMS': ['1'], + 'form-0-name': ['REDUCER_SLEEP_TIME'], + 'form-0-value': ['1'], + 'form-TOTAL_FORMS': ['1'], + }, + follow=True, + ) oozie_jobid = response.context[0]['oozie_workflow'].id job = OozieServerProvider.wait_until_completion(oozie_jobid) hadoop_job_id = get_hadoop_job_id(TestJobBrowserWithHadoop.oozie, oozie_jobid, 1) @@ -238,14 +251,14 @@ def test_failed_jobs(self): # Select only killed jobs (should be absent) # Taking advantage of the fact new jobs are at the top of the list! response = TestJobBrowserWithHadoop.client.post('/jobbrowser/jobs/', {'format': 'json', 'state': 'killed'}) - assert not hadoop_job_id_short in response.content + assert hadoop_job_id_short not in response.content # Select only failed jobs (should be present) # Map job should succeed. Reduce job should fail. response = TestJobBrowserWithHadoop.client.post('/jobbrowser/jobs/', {'format': 'json', 'state': 'failed'}) assert hadoop_job_id_short in response.content - raise pytest.skip("Skipping Test") # Not compatible with MR2 + raise pytest.skip("Skipping Test") # Not compatible with MR2 # The single job view should have the failed task table response = TestJobBrowserWithHadoop.client.get('/jobbrowser/jobs/%s' % (hadoop_job_id,)) @@ -279,11 +292,11 @@ def test_jobs_page(self): response = TestJobBrowserWithHadoop.client.post('/jobbrowser/jobs/', {'format': 'json', 'state': 'completed'}) assert TestJobBrowserWithHadoop.hadoop_job_id_short in response.content response = TestJobBrowserWithHadoop.client.post('/jobbrowser/jobs/', {'format': 'json', 'state': 'failed'}) - assert not TestJobBrowserWithHadoop.hadoop_job_id_short in response.content + assert TestJobBrowserWithHadoop.hadoop_job_id_short not in response.content response = TestJobBrowserWithHadoop.client.post('/jobbrowser/jobs/', {'format': 'json', 'state': 'running'}) - assert not TestJobBrowserWithHadoop.hadoop_job_id_short in response.content + assert TestJobBrowserWithHadoop.hadoop_job_id_short not in response.content response = TestJobBrowserWithHadoop.client.post('/jobbrowser/jobs/', {'format': 'json', 'state': 'killed'}) - assert not TestJobBrowserWithHadoop.hadoop_job_id_short in response.content + assert TestJobBrowserWithHadoop.hadoop_job_id_short not in response.content def test_tasks_page(self): raise pytest.skip("Skipping Test") @@ -326,7 +339,7 @@ def test_job_permissions(self): finish = SHARE_JOBS.set_for_testing(False) try: response = client_not_me.post('/jobbrowser/jobs/', {'format': 'json', 'user': ''}) - assert not TestJobBrowserWithHadoop.hadoop_job_id_short in response.content + assert TestJobBrowserWithHadoop.hadoop_job_id_short not in response.content finally: finish() @@ -379,7 +392,6 @@ def test_job_single_logs(self): @pytest.mark.django_db class TestMapReduce2NoHadoop(object): - def setup_method(self): # Beware: Monkey patching if not hasattr(resource_manager_api, 'old_get_resource_manager_api'): @@ -391,7 +403,6 @@ def setup_method(self): if not hasattr(spark_history_server_api, 'old_get_spark_history_server_api'): spark_history_server_api.old_get_spark_history_server_api = spark_history_server_api.get_history_server_api - self.c = make_logged_in_client(is_superuser=False) grant_access("test", "test", "jobbrowser") self.user = User.objects.get(username='test') @@ -409,13 +420,9 @@ def setup_method(self): history_server_api.get_history_server_api = lambda username: HistoryServerApi(username) spark_history_server_api.get_history_server_api = lambda: MockSparkHistoryApi() - self.finish = [ - YARN_CLUSTERS['default'].SUBMIT_TO.set_for_testing(True), - SHARE_JOBS.set_for_testing(False) - ] + self.finish = [YARN_CLUSTERS['default'].SUBMIT_TO.set_for_testing(True), SHARE_JOBS.set_for_testing(False)] assert cluster.is_yarn() - def teardown_method(self): resource_manager_api.get_resource_manager = getattr(resource_manager_api, 'old_get_resource_manager') mapreduce_api.get_mapreduce_api = getattr(mapreduce_api, 'old_get_mapreduce_api') @@ -485,7 +492,7 @@ def job_not_assigned(self): assert result['status'] == 0 def test_acls_job(self): - response = self.c.get('/jobbrowser/jobs/job_1356251510842_0054') # Check in perm decorator + response = self.c.get('/jobbrowser/jobs/job_1356251510842_0054') # Check in perm decorator assert can_view_job('test', response.context[0]['job']) assert can_modify_job('test', response.context[0]['job']) @@ -496,10 +503,7 @@ def test_acls_job(self): assert not can_modify_job('test3', response.context[0]['job']) response2 = self.c3.get('/jobbrowser/jobs/job_1356251510842_0054') - if sys.version_info[0] < 3: - assert b'don't have permission to access job' in response2.content, response2.content - else: - assert b'don't have permission to access job' in response2.content, response2.content + assert b'don't have permission to access job' in response2.content, response2.content def test_kill_job(self): job_id = 'application_1356251510842_0054' @@ -513,10 +517,8 @@ def test_kill_job(self): assert b'Kill operation is forbidden.' in response.content, response.content - @pytest.mark.django_db class TestResourceManagerHaNoHadoop(object): - def setup_method(self): # Beware: Monkey patching if not hasattr(resource_manager_api, 'old_get_resource_manager_api'): @@ -545,16 +547,14 @@ def teardown_method(self): for f in self.finish: f() - def test_failover_no_ha(self): self.finish = [ - YARN_CLUSTERS.set_for_testing({'default': {}}), - - YARN_CLUSTERS['default'].SUBMIT_TO.set_for_testing(True), - YARN_CLUSTERS['default'].RESOURCE_MANAGER_API_URL.set_for_testing('rm_host_active'), - YARN_CLUSTERS['default'].HISTORY_SERVER_API_URL.set_for_testing('jhs_host'), - YARN_CLUSTERS['default'].SECURITY_ENABLED.set_for_testing(False), - YARN_CLUSTERS['default'].SSL_CERT_CA_VERIFY.set_for_testing(False), + YARN_CLUSTERS.set_for_testing({'default': {}}), + YARN_CLUSTERS['default'].SUBMIT_TO.set_for_testing(True), + YARN_CLUSTERS['default'].RESOURCE_MANAGER_API_URL.set_for_testing('rm_host_active'), + YARN_CLUSTERS['default'].HISTORY_SERVER_API_URL.set_for_testing('jhs_host'), + YARN_CLUSTERS['default'].SECURITY_ENABLED.set_for_testing(False), + YARN_CLUSTERS['default'].SSL_CERT_CA_VERIFY.set_for_testing(False), ] resource_manager_api.API_CACHE = None @@ -569,24 +569,21 @@ def test_failover_no_ha(self): with pytest.raises(Exception): api.get_jobs(self.user, username=self.user.username, state='running', text='') - def test_failover_ha(self): self.finish = [ - YARN_CLUSTERS.set_for_testing({'ha1': {}, 'ha2': {}}), - - YARN_CLUSTERS['ha1'].SUBMIT_TO.set_for_testing(True), - YARN_CLUSTERS['ha1'].RESOURCE_MANAGER_API_URL.set_for_testing('rm_host_active'), - YARN_CLUSTERS['ha1'].HISTORY_SERVER_API_URL.set_for_testing('jhs_host'), - YARN_CLUSTERS['ha1'].SECURITY_ENABLED.set_for_testing(False), - YARN_CLUSTERS['ha1'].SSL_CERT_CA_VERIFY.set_for_testing(False), - YARN_CLUSTERS['ha2'].SUBMIT_TO.set_for_testing(True), - YARN_CLUSTERS['ha2'].RESOURCE_MANAGER_API_URL.set_for_testing('rm_2_host'), - YARN_CLUSTERS['ha2'].HISTORY_SERVER_API_URL.set_for_testing('jhs_host'), - YARN_CLUSTERS['ha2'].SECURITY_ENABLED.set_for_testing(False), - YARN_CLUSTERS['ha2'].SSL_CERT_CA_VERIFY.set_for_testing(False), + YARN_CLUSTERS.set_for_testing({'ha1': {}, 'ha2': {}}), + YARN_CLUSTERS['ha1'].SUBMIT_TO.set_for_testing(True), + YARN_CLUSTERS['ha1'].RESOURCE_MANAGER_API_URL.set_for_testing('rm_host_active'), + YARN_CLUSTERS['ha1'].HISTORY_SERVER_API_URL.set_for_testing('jhs_host'), + YARN_CLUSTERS['ha1'].SECURITY_ENABLED.set_for_testing(False), + YARN_CLUSTERS['ha1'].SSL_CERT_CA_VERIFY.set_for_testing(False), + YARN_CLUSTERS['ha2'].SUBMIT_TO.set_for_testing(True), + YARN_CLUSTERS['ha2'].RESOURCE_MANAGER_API_URL.set_for_testing('rm_2_host'), + YARN_CLUSTERS['ha2'].HISTORY_SERVER_API_URL.set_for_testing('jhs_host'), + YARN_CLUSTERS['ha2'].SECURITY_ENABLED.set_for_testing(False), + YARN_CLUSTERS['ha2'].SSL_CERT_CA_VERIFY.set_for_testing(False), ] - resource_manager_api.API_CACHE = None api = get_api(self.user, jt=None) @@ -598,7 +595,9 @@ def test_failover_ha(self): # rm1 is set to to fail the 3rd time YARN_CLUSTERS['ha1'].RESOURCE_MANAGER_API_URL.set_for_testing('rm_1_host') - YARN_CLUSTERS['ha2'].RESOURCE_MANAGER_API_URL.set_for_testing('rm_2_host_active') # Just tells mocked RM that it should say it is active + YARN_CLUSTERS['ha2'].RESOURCE_MANAGER_API_URL.set_for_testing( + 'rm_2_host_active' + ) # Just tells mocked RM that it should say it is active api.get_jobs(self.user, username=self.user.username, state='running', text='') assert api.resource_manager_api.from_failover api.resource_manager_api.from_failover = False @@ -622,41 +621,54 @@ def test_failover_ha(self): class TestImpalaApi(object): - def setup_method(self): api = MockImpalaQueryApi('http://url.com') self.api = QueryApi(None, impala_api=api) def handle_query_start_time(self, start_time): - query_start_time = datetime.strptime(start_time[:-3], "%Y-%m-%d %H:%M:%S.%f"). \ - replace(tzinfo=pytz.utc).astimezone(localtime._get_localzone()).strftime("%Y-%m-%d %H:%M:%S.%f") + query_start_time = ( + datetime.strptime(start_time[:-3], "%Y-%m-%d %H:%M:%S.%f") + .replace(tzinfo=pytz.utc) + .astimezone(localtime._get_localzone()) + .strftime("%Y-%m-%d %H:%M:%S.%f") + ) return query_start_time def test_apps(self): response = self.api.apps({}) target = [ { - 'status': u'FINISHED', + 'status': 'FINISHED', 'rows_fetched': 28, - 'user': u'admin', + 'user': 'admin', 'canWrite': False, 'duration': 3355000.0, - 'id': u'8a46a8865624698f:b80b211500000000', + 'id': '8a46a8865624698f:b80b211500000000', 'apiStatus': 'SUCCEEDED', - 'name': u'SELECT sample_07.description, sample_07.salary FROM sample...', + 'name': 'SELECT sample_07.description, sample_07.salary FROM sample...', 'submitted': self.handle_query_start_time('2017-10-25 15:38:26.637010000'), - 'queue': u'root.admin', - 'waiting': True, 'progress': u'1 / 1 ( 100%)', - 'type': u'QUERY', 'waiting_time': u'52m8s' + 'queue': 'root.admin', + 'waiting': True, + 'progress': '1 / 1 ( 100%)', + 'type': 'QUERY', + 'waiting_time': '52m8s', }, { - 'status': u'FINISHED', - 'rows_fetched': 53, 'user': u'admin', 'canWrite': False, - 'duration': 3369000.0, 'id': u'4d497267f34ff17d:817bdfb500000000', - 'apiStatus': 'SUCCEEDED', 'name': u'select * from customers', - 'submitted': self.handle_query_start_time('2017-10-25 15:38:12.872825000'), 'queue': u'root.admin', - 'waiting': True, 'progress': u'2 / 3 (66.6667%)', 'type': u'QUERY', 'waiting_time': u'52m8s' - } + 'status': 'FINISHED', + 'rows_fetched': 53, + 'user': 'admin', + 'canWrite': False, + 'duration': 3369000.0, + 'id': '4d497267f34ff17d:817bdfb500000000', + 'apiStatus': 'SUCCEEDED', + 'name': 'select * from customers', + 'submitted': self.handle_query_start_time('2017-10-25 15:38:12.872825000'), + 'queue': 'root.admin', + 'waiting': True, + 'progress': '2 / 3 (66.6667%)', + 'type': 'QUERY', + 'waiting_time': '52m8s', + }, ] for i in range(0, len(target)): for key, value in target[i].items(): @@ -664,20 +676,34 @@ def test_apps(self): def test_app(self): response = self.api.app('4d497267f34ff17d:817bdfb500000000') - for key, value in {'status': u'FINISHED', 'name': u'select * from customers', - 'duration': 3369000.0, 'progress': 66.6667, 'user': u'admin', 'type': 'queries', + for key, value in { + 'status': 'FINISHED', + 'name': 'select * from customers', + 'duration': 3369000.0, + 'progress': 66.6667, + 'user': 'admin', + 'type': 'queries', 'id': '4d497267f34ff17d:817bdfb500000000', 'submitted': self.handle_query_start_time('2017-10-25 15:38:12.872825000'), - 'apiStatus': 'SUCCEEDED', 'doc_url': 'http://url.com/query_plan?query_id=4d497267f34ff17d:817bdfb500000000'}.items(): + 'apiStatus': 'SUCCEEDED', + 'doc_url': 'http://url.com/query_plan?query_id=4d497267f34ff17d:817bdfb500000000', + }.items(): assert response.get(key) == value response = self.api.app('8a46a8865624698f:b80b211500000000') - for key, value in {'status': u'FINISHED', 'name': u'SELECT sample_07.description, sample_07.salary FROM sample...', - 'duration': 3355000.0, 'progress': 100.0, 'user': u'admin', 'type': 'queries', + for key, value in { + 'status': 'FINISHED', + 'name': 'SELECT sample_07.description, sample_07.salary FROM sample...', + 'duration': 3355000.0, + 'progress': 100.0, + 'user': 'admin', + 'type': 'queries', 'id': '8a46a8865624698f:b80b211500000000', - 'submitted': self.handle_query_start_time('2017-10-25 15:38:26.637010000'), 'apiStatus': 'SUCCEEDED', - 'doc_url': 'http://url.com/query_plan?query_id=8a46a8865624698f:b80b211500000000'}.items(): + 'submitted': self.handle_query_start_time('2017-10-25 15:38:26.637010000'), + 'apiStatus': 'SUCCEEDED', + 'doc_url': 'http://url.com/query_plan?query_id=8a46a8865624698f:b80b211500000000', + }.items(): assert response.get(key) == value @@ -703,28 +729,26 @@ def teardown_method(self): def test_spark_executor_logs(self): # Spark job status is succeed - query_executor_data = {u'interface': [u'"jobs"'], u'app_id': [u'"driver_executor_application_1513618343677_0018"']} + query_executor_data = {'interface': ['"jobs"'], 'app_id': ['"driver_executor_application_1513618343677_0018"']} resp_executor = self.c.post('/jobbrowser/api/job/jobs', query_executor_data) response_executor = json.loads(resp_executor.content) assert response_executor['status'] == 0 assert response_executor['app']['executor_id'] == 'driver' - query_log_data = {u'interface': [u'"jobs"'], u'type': [u'"SPARK"'], - u'app_id': [u'"application_1513618343677_0018"'], u'name': [u'"default"']} + query_log_data = {'interface': ['"jobs"'], 'type': ['"SPARK"'], 'app_id': ['"application_1513618343677_0018"'], 'name': ['"default"']} resp_log = self.c.post('/jobbrowser/api/job/logs', query_log_data) response_log = json.loads(resp_log.content) assert response_log['status'] == 0 assert response_log['logs']['logs'] == 'dummy_logs' # Spark job status is running - query_executor_data = {u'interface': [u'"jobs"'], u'app_id': [u'"driver_executor_application_1513618343677_0020"']} + query_executor_data = {'interface': ['"jobs"'], 'app_id': ['"driver_executor_application_1513618343677_0020"']} resp_executor = self.c.post('/jobbrowser/api/job/jobs', query_executor_data) response_executor = json.loads(resp_executor.content) assert response_executor['status'] == 0 assert response_executor['app']['executor_id'] == 'driver' - query_log_data = {u'interface': [u'"jobs"'], u'type': [u'"SPARK"'], - u'app_id': [u'"application_1513618343677_0020"'], u'name': [u'"default"']} + query_log_data = {'interface': ['"jobs"'], 'type': ['"SPARK"'], 'app_id': ['"application_1513618343677_0020"'], 'name': ['"default"']} resp_log = self.c.post('/jobbrowser/api/job/logs', query_log_data) response_log = json.loads(resp_log.content) assert response_log['status'] == 0 @@ -758,6 +782,7 @@ class MockResourceManagerHaApi(object): Mock the RM API. Raise a failover exception after 2 calls. Is active if name contains 'active'. """ + def __init__(self, rm_url, security_enabled=False, ssl_cert_ca_verify=False): self.rm_url = rm_url self.from_failover = False @@ -784,147 +809,144 @@ def apps(self, **kwargs): raise Exception('standby RM after 2 tries') self.get_apps_count += 1 - return { - 'apps': { - 'app': [] - } - } + return {'apps': {'app': []}} def cluster(self): return {'clusterInfo': {'haState': 'ACTIVE' if 'active' in self.rm_url else 'STANDBY'}} class MockMapreduceHaApi(object): - def __init__(self, username): pass + def __init__(self, username): + pass class HistoryServerHaApi(object): - def __init__(self, username): pass + def __init__(self, username): + pass class MockResourceManagerApi(object): APPS = { 'application_1356251510842_0054': { - u'finishedTime': 1356961070119, - u'name': u'oozie:launcher:T=map-reduce:W=MapReduce-copy:A=Sleep:ID=0000004-121223003201296-oozie-oozi-W', - u'amContainerLogs': u'http://localhost:8042/node/containerlogs/container_1356251510842_0054_01_000001/romain', - u'clusterId': 1356251510842, - u'trackingUrl': u'http://localhost:8088/proxy/application_1356251510842_0054/jobhistory/job/job_1356251510842_0054', - u'amHostHttpAddress': u'localhost:8042', - u'startedTime': 1356961057225, - u'queue': u'default', - u'state': u'RUNNING', - u'elapsedTime': 12894, - u'finalStatus': u'UNDEFINED', - u'diagnostics': u'', - u'progress': 100.0, - u'trackingUI': u'History', - u'id': u'application_1356251510842_0054', - u'user': u'test', - # For when the job is KILLED - u'startTime': 1356961057226, - u'finishTime': 1356961057226, - u'applicationType': 'MAPREDUCE' + 'finishedTime': 1356961070119, + 'name': 'oozie:launcher:T=map-reduce:W=MapReduce-copy:A=Sleep:ID=0000004-121223003201296-oozie-oozi-W', + 'amContainerLogs': 'http://localhost:8042/node/containerlogs/container_1356251510842_0054_01_000001/romain', + 'clusterId': 1356251510842, + 'trackingUrl': 'http://localhost:8088/proxy/application_1356251510842_0054/jobhistory/job/job_1356251510842_0054', + 'amHostHttpAddress': 'localhost:8042', + 'startedTime': 1356961057225, + 'queue': 'default', + 'state': 'RUNNING', + 'elapsedTime': 12894, + 'finalStatus': 'UNDEFINED', + 'diagnostics': '', + 'progress': 100.0, + 'trackingUI': 'History', + 'id': 'application_1356251510842_0054', + 'user': 'test', + # For when the job is KILLED + 'startTime': 1356961057226, + 'finishTime': 1356961057226, + 'applicationType': 'MAPREDUCE', }, 'application_1356251510842_0009': { - u'finishedTime': 1356467118570, - u'name': u'oozie:action:T=map-reduce:W=MapReduce-copy2:A=Sleep:ID=0000002-121223003201296-oozie-oozi-W', - u'amContainerLogs': u'http://localhost:8042/node/containerlogs/container_1356251510842_0009_01_000001/romain', - u'clusterId': 1356251510842, - u'trackingUrl': u'http://localhost:8088/proxy/application_1356251510842_0009/jobhistory/job/job_1356251510842_0009', - u'amHostHttpAddress': u'localhost:8042', - u'startedTime': 1356467081121, - u'queue': u'default', - u'state': u'FINISHED', - u'elapsedTime': 37449, - u'finalStatus': u'SUCCEEDED', - u'diagnostics': u'', - u'progress': 100.0, - u'trackingUI': u'History', - u'id': u'application_1356251510842_0009', - u'user': u'test', - u'applicationType': 'MAPREDUCE' + 'finishedTime': 1356467118570, + 'name': 'oozie:action:T=map-reduce:W=MapReduce-copy2:A=Sleep:ID=0000002-121223003201296-oozie-oozi-W', + 'amContainerLogs': 'http://localhost:8042/node/containerlogs/container_1356251510842_0009_01_000001/romain', + 'clusterId': 1356251510842, + 'trackingUrl': 'http://localhost:8088/proxy/application_1356251510842_0009/jobhistory/job/job_1356251510842_0009', + 'amHostHttpAddress': 'localhost:8042', + 'startedTime': 1356467081121, + 'queue': 'default', + 'state': 'FINISHED', + 'elapsedTime': 37449, + 'finalStatus': 'SUCCEEDED', + 'diagnostics': '', + 'progress': 100.0, + 'trackingUI': 'History', + 'id': 'application_1356251510842_0009', + 'user': 'test', + 'applicationType': 'MAPREDUCE', }, 'application_1428442704693_0006': { - u'allocatedMB': 4096, - u'allocatedVCores': 3, - u'amContainerLogs': u'http://localhost:8042/node/containerlogs/container_1428442704693_0006_01_000001/erickt', - u'amHostHttpAddress': u'localhost:8042', - u'applicationTags': u'', - u'applicationType': u'SPARK', - u'clusterId': 1428442704693, - u'diagnostics': u'', - u'elapsedTime': 529040, - u'finalStatus': u'UNDEFINED', - u'finishedTime': 0, - u'id': u'application_1428442704693_0006', - u'memorySeconds': 2138468, - u'name': u'Spark shell', - u'numAMContainerPreempted': 0, - u'numNonAMContainerPreempted': 0, - u'preemptedResourceMB': 0, - u'preemptedResourceVCores': 0, - u'progress': 10.0, - u'queue': u'root.erickt', - u'runningContainers': 3, - u'startedTime': 1428443335161, - u'state': u'RUNNING', - u'trackingUI': u'ApplicationMaster', - u'trackingUrl': u'http://localhost:8088/proxy/application_1428442704693_0006/', - u'user': u'test', - u'vcoreSeconds': 1567, + 'allocatedMB': 4096, + 'allocatedVCores': 3, + 'amContainerLogs': 'http://localhost:8042/node/containerlogs/container_1428442704693_0006_01_000001/erickt', + 'amHostHttpAddress': 'localhost:8042', + 'applicationTags': '', + 'applicationType': 'SPARK', + 'clusterId': 1428442704693, + 'diagnostics': '', + 'elapsedTime': 529040, + 'finalStatus': 'UNDEFINED', + 'finishedTime': 0, + 'id': 'application_1428442704693_0006', + 'memorySeconds': 2138468, + 'name': 'Spark shell', + 'numAMContainerPreempted': 0, + 'numNonAMContainerPreempted': 0, + 'preemptedResourceMB': 0, + 'preemptedResourceVCores': 0, + 'progress': 10.0, + 'queue': 'root.erickt', + 'runningContainers': 3, + 'startedTime': 1428443335161, + 'state': 'RUNNING', + 'trackingUI': 'ApplicationMaster', + 'trackingUrl': 'http://localhost:8088/proxy/application_1428442704693_0006/', + 'user': 'test', + 'vcoreSeconds': 1567, }, 'application_1428442704693_0007': { - u'allocatedMB': -1, - u'allocatedVCores': -1, - u'applicationTags': u'', - u'applicationType': u'YARN', - u'clusterId': 1428442704693, - u'diagnostics': u'', - u'elapsedTime': 4056, - u'finalStatus': u'SUCCEEDED', - u'finishedTime': 1428454945371, - u'id': u'application_1428442704693_0007', - u'memorySeconds': 2290, - u'name': u'UnmanagedAM', - u'numAMContainerPreempted': 0, - u'numNonAMContainerPreempted': 0, - u'preemptedResourceMB': 0, - u'preemptedResourceVCores': 0, - u'progress': 100.0, - u'queue': u'root.erickt', - u'runningContainers': -1, - u'startedTime': 0, - u'state': u'FINISHED', - u'trackingUI': u'History', - u'trackingUrl': u'http://N/A', - u'user': u'test', - u'vcoreSeconds': 1, - } + 'allocatedMB': -1, + 'allocatedVCores': -1, + 'applicationTags': '', + 'applicationType': 'YARN', + 'clusterId': 1428442704693, + 'diagnostics': '', + 'elapsedTime': 4056, + 'finalStatus': 'SUCCEEDED', + 'finishedTime': 1428454945371, + 'id': 'application_1428442704693_0007', + 'memorySeconds': 2290, + 'name': 'UnmanagedAM', + 'numAMContainerPreempted': 0, + 'numNonAMContainerPreempted': 0, + 'preemptedResourceMB': 0, + 'preemptedResourceVCores': 0, + 'progress': 100.0, + 'queue': 'root.erickt', + 'runningContainers': -1, + 'startedTime': 0, + 'state': 'FINISHED', + 'trackingUI': 'History', + 'trackingUrl': 'http://N/A', + 'user': 'test', + 'vcoreSeconds': 1, + }, } - def __init__(self, user, rm_url=None): pass + def __init__(self, user, rm_url=None): + pass def apps(self, **kwargs): return { - 'apps': { - 'app': [ - # RUNNING - MockResourceManagerApi.APPS['application_1356251510842_0054'], - # FINISHED - MockResourceManagerApi.APPS['application_1356251510842_0009'], - # SPARK - MockResourceManagerApi.APPS['application_1428442704693_0006'], - # YARN - MockResourceManagerApi.APPS['application_1428442704693_0007'], + 'apps': { + 'app': [ + # RUNNING + MockResourceManagerApi.APPS['application_1356251510842_0054'], + # FINISHED + MockResourceManagerApi.APPS['application_1356251510842_0009'], + # SPARK + MockResourceManagerApi.APPS['application_1428442704693_0006'], + # YARN + MockResourceManagerApi.APPS['application_1428442704693_0007'], ] } } def app(self, job_id): - return { - u'app': MockResourceManagerApi.APPS[job_id] - } + return {'app': MockResourceManagerApi.APPS[job_id]} def kill(self, job_id): job_id = job_id.replace('job', 'application') @@ -934,524 +956,575 @@ def kill(self, job_id): class MockImpalaQueryApi(object): APPS = { '8a46a8865624698f:b80b211500000000': { - u'stmt_type': u'QUERY', u'resource_pool': u'root.admin', - u'waiting': True, u'last_event': u'Unregister query', - u'start_time': u'2017-10-25 15:38:26.637010000', - u'rows_fetched': 28, - u'stmt': u'SELECT sample_07.description, sample_07.salary\r\nFROM\r\n '\ - 'sample_07\r\nWHERE\r\n( sample_07.salary > 100000)\r\nORDER BY sample_07.salary DESC\r\nLIMIT 1000', - u'executing': False, u'state': u'FINISHED', u'query_id': u'8a46a8865624698f:b80b211500000000', - u'end_time': u'2017-10-25 16:34:22.592036000', u'duration': u'55m55s', u'progress': u'1 / 1 ( 100%)', - u'effective_user': u'admin', u'default_db': u'default', u'waiting_time': u'52m8s' + 'stmt_type': 'QUERY', + 'resource_pool': 'root.admin', + 'waiting': True, + 'last_event': 'Unregister query', + 'start_time': '2017-10-25 15:38:26.637010000', + 'rows_fetched': 28, + 'stmt': 'SELECT sample_07.description, sample_07.salary\r\nFROM\r\n ' + 'sample_07\r\nWHERE\r\n( sample_07.salary > 100000)\r\nORDER BY sample_07.salary DESC\r\nLIMIT 1000', + 'executing': False, + 'state': 'FINISHED', + 'query_id': '8a46a8865624698f:b80b211500000000', + 'end_time': '2017-10-25 16:34:22.592036000', + 'duration': '55m55s', + 'progress': '1 / 1 ( 100%)', + 'effective_user': 'admin', + 'default_db': 'default', + 'waiting_time': '52m8s', }, '4d497267f34ff17d:817bdfb500000000': { - u'stmt_type': u'QUERY', u'resource_pool': u'root.admin', u'waiting': True, - u'last_event': u'Unregister query', u'start_time': u'2017-10-25 15:38:12.872825000', - u'rows_fetched': 53, u'stmt': u'select * from customers', u'executing': False, - u'state': u'FINISHED', u'query_id': u'4d497267f34ff17d:817bdfb500000000', - u'end_time': u'2017-10-25 16:34:22.589811000', u'duration': u'56m9s', u'progress': u'2 / 3 (66.6667%)', - u'effective_user': u'admin', u'default_db': u'default', u'waiting_time': u'52m8s' - } + 'stmt_type': 'QUERY', + 'resource_pool': 'root.admin', + 'waiting': True, + 'last_event': 'Unregister query', + 'start_time': '2017-10-25 15:38:12.872825000', + 'rows_fetched': 53, + 'stmt': 'select * from customers', + 'executing': False, + 'state': 'FINISHED', + 'query_id': '4d497267f34ff17d:817bdfb500000000', + 'end_time': '2017-10-25 16:34:22.589811000', + 'duration': '56m9s', + 'progress': '2 / 3 (66.6667%)', + 'effective_user': 'admin', + 'default_db': 'default', + 'waiting_time': '52m8s', + }, } PLAN = { '4d497267f34ff17d:817bdfb500000000': { 'status': -1, - u'plan': { - u'status': u'OK', - u'plan_json': { - u'plan_nodes': [ + 'plan': { + 'status': 'OK', + 'plan_json': { + 'plan_nodes': [ { - u'num_instances': 1, u'output_card': 53, u'label_detail': u'UNPARTITIONED', u'label': u'01:EXCHANGE', u'is_broadcast': True, - u'max_time': u'0.000ns', u'avg_time': u'0.000ns', u'children': [], u'max_time_val': 0 + 'num_instances': 1, + 'output_card': 53, + 'label_detail': 'UNPARTITIONED', + 'label': '01:EXCHANGE', + 'is_broadcast': True, + 'max_time': '0.000ns', + 'avg_time': '0.000ns', + 'children': [], + 'max_time_val': 0, }, { - u'num_instances': 1, u'output_card': 53, u'label_detail': u'default.customers', u'data_stream_target': u'01:EXCHANGE', - u'label': u'00:SCAN HDFS', u'max_time': u'215.018ms', u'avg_time': u'215.018ms', u'children': [], u'max_time_val': 215018404 - } + 'num_instances': 1, + 'output_card': 53, + 'label_detail': 'default.customers', + 'data_stream_target': '01:EXCHANGE', + 'label': '00:SCAN HDFS', + 'max_time': '215.018ms', + 'avg_time': '215.018ms', + 'children': [], + 'max_time_val': 215018404, + }, ] }, - u'__common__': { - u'navbar': [ - {u'link': u'/backends', u'title': u'/backends'}, - {u'link': u'/catalog', u'title': u'/catalog'}, - {u'link': u'/hadoop-varz', u'title': u'/hadoop-varz'}, - {u'link': u'/log_level', u'title': u'/log_level'}, - {u'link': u'/logs', u'title': u'/logs'}, - {u'link': u'/memz', u'title': u'/memz'}, - {u'link': u'/metrics', u'title': u'/metrics'}, - {u'link': u'/queries', u'title': u'/queries'}, - {u'link': u'/rpcz', u'title': u'/rpcz'}, - {u'link': u'/sessions', u'title': u'/sessions'}, - {u'link': u'/threadz', u'title': u'/threadz'}, - {u'link': u'/varz', u'title': u'/varz'} + '__common__': { + 'navbar': [ + {'link': '/backends', 'title': '/backends'}, + {'link': '/catalog', 'title': '/catalog'}, + {'link': '/hadoop-varz', 'title': '/hadoop-varz'}, + {'link': '/log_level', 'title': '/log_level'}, + {'link': '/logs', 'title': '/logs'}, + {'link': '/memz', 'title': '/memz'}, + {'link': '/metrics', 'title': '/metrics'}, + {'link': '/queries', 'title': '/queries'}, + {'link': '/rpcz', 'title': '/rpcz'}, + {'link': '/sessions', 'title': '/sessions'}, + {'link': '/threadz', 'title': '/threadz'}, + {'link': '/varz', 'title': '/varz'}, ], - u'process-name': u'impalad' + 'process-name': 'impalad', }, - u'stmt': u'select * from customers', - u'summary': u'\nOperator #Hosts Avg Time Max Time #Rows Est. #Rows Peak Mem Est. Peak Mem Detail '\ - ' \n-------------------------------------------------------------------------------------------------'\ - '----------\n01:EXCHANGE 1 0.000ns 0.000ns 53 0 0 0 UNPARTITIONED '\ - ' \n00:SCAN HDFS 1 215.018ms 215.018ms 53 0 45.02 KB 32.00 MB default.customers ', - u'query_id': u'1a48b5796f8f07f5:49ba9e6b00000000', - u'plan': u'\n----------------\nPer-Host Resource Reservation: Memory=0B\nPer-Host Resource Estimates: Memory=32.00MB\nWARNING: '\ - 'The following tables have potentially corrupt table statistics.\nDrop and re-compute statistics to resolve this ' - 'problem.\ndefault.customers\nWARNING: The following tables are missing relevant table and/or column statistics.'\ - '\ndefault.customers\n\nF01:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1\nPLAN-ROOT SINK\n| mem-estimate=0B '\ - 'mem-reservation=0B\n|\n01:EXCHANGE [UNPARTITIONED]\n| mem-estimate=0B mem-reservation=0B\n| tuple-ids=0 row-size=19B '\ - 'cardinality=0\n|\nF00:PLAN FRAGMENT [RANDOM] hosts=1 instances=1\n00:SCAN HDFS [default.customers, RANDOM]\n partitions=1/1 '\ - 'files=1 size=15.44KB\n table stats: 0 rows total\n column stats: unavailable\n mem-estimate=32.00MB mem-reservation=0B'\ - '\n tuple-ids=0 row-size=19B cardinality=0\n----------------' - } + 'stmt': 'select * from customers', + 'summary': '\nOperator #Hosts Avg Time Max Time #Rows Est. #Rows Peak Mem Est. Peak Mem Detail ' + ' \n-------------------------------------------------------------------------------------------------' + '----------\n01:EXCHANGE 1 0.000ns 0.000ns 53 0 0 0 UNPARTITIONED ' + ' \n00:SCAN HDFS 1 215.018ms 215.018ms 53 0 45.02 KB 32.00 MB default.customers ', + 'query_id': '1a48b5796f8f07f5:49ba9e6b00000000', + 'plan': '\n----------------\nPer-Host Resource Reservation: Memory=0B\nPer-Host Resource Estimates: Memory=32.00MB\nWARNING: ' + 'The following tables have potentially corrupt table statistics.\nDrop and re-compute statistics to resolve this ' + 'problem.\ndefault.customers\nWARNING: The following tables are missing relevant table and/or column statistics.' + '\ndefault.customers\n\nF01:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1\nPLAN-ROOT SINK\n| mem-estimate=0B ' + 'mem-reservation=0B\n|\n01:EXCHANGE [UNPARTITIONED]\n| mem-estimate=0B mem-reservation=0B\n| tuple-ids=0 row-size=19B ' + 'cardinality=0\n|\nF00:PLAN FRAGMENT [RANDOM] hosts=1 instances=1\n00:SCAN HDFS [default.customers, RANDOM]\n partitions=1/1 ' + 'files=1 size=15.44KB\n table stats: 0 rows total\n column stats: unavailable\n mem-estimate=32.00MB mem-reservation=0B' + '\n tuple-ids=0 row-size=19B cardinality=0\n----------------', + }, }, '8a46a8865624698f:b80b211500000000': { 'status': -1, - u'plan': { - u'status': u'OK', - u'plan_json': { - u'plan_nodes': [ + 'plan': { + 'status': 'OK', + 'plan_json': { + 'plan_nodes': [ { - u'num_instances': 1, u'output_card': 28, u'label_detail': u'UNPARTITIONED', u'label': u'02:MERGING-EXCHANGE', - u'is_broadcast': True, u'max_time': u'0.000ns', u'avg_time': u'0.000ns', u'children': [], u'max_time_val': 0 + 'num_instances': 1, + 'output_card': 28, + 'label_detail': 'UNPARTITIONED', + 'label': '02:MERGING-EXCHANGE', + 'is_broadcast': True, + 'max_time': '0.000ns', + 'avg_time': '0.000ns', + 'children': [], + 'max_time_val': 0, }, { - u'num_instances': 1, u'output_card': 28, u'label_detail': u'', u'data_stream_target': u'02:MERGING-EXCHANGE', - u'label': u'01:TOP-N', u'max_time': u'0.000ns', u'avg_time': u'0.000ns', - u'children': [ + 'num_instances': 1, + 'output_card': 28, + 'label_detail': '', + 'data_stream_target': '02:MERGING-EXCHANGE', + 'label': '01:TOP-N', + 'max_time': '0.000ns', + 'avg_time': '0.000ns', + 'children': [ { - u'num_instances': 1, u'output_card': 28, u'label_detail': u'default.sample_07', u'label': u'00:SCAN HDFS', - u'max_time': u'250.020ms', u'avg_time': u'250.020ms', u'children': [], u'max_time_val': 250020583 + 'num_instances': 1, + 'output_card': 28, + 'label_detail': 'default.sample_07', + 'label': '00:SCAN HDFS', + 'max_time': '250.020ms', + 'avg_time': '250.020ms', + 'children': [], + 'max_time_val': 250020583, } ], - u'max_time_val': 0 - } + 'max_time_val': 0, + }, ] }, - u'__common__': { - u'navbar': [ - {u'link': u'/backends', u'title': u'/backends'}, - {u'link': u'/catalog', u'title': u'/catalog'}, - {u'link': u'/hadoop-varz', u'title': u'/hadoop-varz'}, - {u'link': u'/log_level', u'title': u'/log_level'}, - {u'link': u'/logs', u'title': u'/logs'}, - {u'link': u'/memz', u'title': u'/memz'}, - {u'link': u'/metrics', u'title': u'/metrics'}, - {u'link': u'/queries', u'title': u'/queries'}, - {u'link': u'/rpcz', u'title': u'/rpcz'}, - {u'link': u'/sessions', u'title': u'/sessions'}, - {u'link': u'/threadz', u'title': u'/threadz'}, - {u'link': u'/varz', u'title': u'/varz'} + '__common__': { + 'navbar': [ + {'link': '/backends', 'title': '/backends'}, + {'link': '/catalog', 'title': '/catalog'}, + {'link': '/hadoop-varz', 'title': '/hadoop-varz'}, + {'link': '/log_level', 'title': '/log_level'}, + {'link': '/logs', 'title': '/logs'}, + {'link': '/memz', 'title': '/memz'}, + {'link': '/metrics', 'title': '/metrics'}, + {'link': '/queries', 'title': '/queries'}, + {'link': '/rpcz', 'title': '/rpcz'}, + {'link': '/sessions', 'title': '/sessions'}, + {'link': '/threadz', 'title': '/threadz'}, + {'link': '/varz', 'title': '/varz'}, ], - u'process-name': u'impalad' + 'process-name': 'impalad', }, - u'stmt': u'SELECT sample_07.description, sample_07.salary\r\nFROM\r\n '\ - 'sample_07\r\nWHERE\r\n( sample_07.salary > 100000)\r\nORDER BY sample_07.salary DESC\r\nLIMIT 1000', - u'summary': u'\nOperator #Hosts Avg Time Max Time #Rows Est. #Rows Peak Mem Est. Peak Mem '\ - 'Detail \n-------------------------------------------------------------------------------------------'\ - '------------------------\n02:MERGING-EXCHANGE 1 0.000ns 0.000ns 28 0 0 '\ - ' 0 UNPARTITIONED \n01:TOP-N 1 0.000ns 0.000ns 28 0 80.00 KB '\ - ' 0 \n00:SCAN HDFS 1 250.020ms 250.020ms 28 0 173.00 KB '\ - '32.00 MB default.sample_07 ', - u'query_id': u'd424420e0c44ab9:c637ac2900000000', - u'plan': u'\n----------------\nPer-Host Resource Reservation: Memory=0B\nPer-Host Resource Estimates: Memory=32.00MB\nWARNING: '\ - 'The following tables have potentially corrupt table statistics.\nDrop and re-compute statistics to resolve this problem.'\ - '\ndefault.sample_07\nWARNING: The following tables are missing relevant table and/or column statistics.\n'\ - 'default.sample_07\n\nF01:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1\nPLAN-ROOT SINK\n| mem-estimate=0B '\ - 'mem-reservation=0B\n|\n02:MERGING-EXCHANGE [UNPARTITIONED]\n| order by: salary DESC\n| limit: 1000\n| mem-estimate=0B '\ - 'mem-reservation=0B\n| tuple-ids=1 row-size=19B cardinality=0\n|\nF00:PLAN FRAGMENT [RANDOM] hosts=1 instances=1\n01:TOP-N '\ - '[LIMIT=1000]\n| order by: salary DESC\n| mem-estimate=0B mem-reservation=0B\n| tuple-ids=1 row-size=19B '\ - 'cardinality=0\n|\n00:SCAN HDFS [default.sample_07, RANDOM]\n partitions=1/1 files=1 size=44.98KB\n predicates: '\ - '(sample_07.salary > 100000)\n table stats: 0 rows total\n column stats: unavailable\n parquet dictionary predicates: '\ - '(sample_07.salary > 100000)\n mem-estimate=32.00MB mem-reservation=0B\n tuple-ids=0 row-size=19B '\ - 'cardinality=0\n----------------' - } - } + 'stmt': 'SELECT sample_07.description, sample_07.salary\r\nFROM\r\n ' + 'sample_07\r\nWHERE\r\n( sample_07.salary > 100000)\r\nORDER BY sample_07.salary DESC\r\nLIMIT 1000', + 'summary': '\nOperator #Hosts Avg Time Max Time #Rows Est. #Rows Peak Mem Est. Peak Mem ' + 'Detail \n-------------------------------------------------------------------------------------------' + '------------------------\n02:MERGING-EXCHANGE 1 0.000ns 0.000ns 28 0 0 ' + ' 0 UNPARTITIONED \n01:TOP-N 1 0.000ns 0.000ns 28 0 80.00 KB ' + ' 0 \n00:SCAN HDFS 1 250.020ms 250.020ms 28 0 173.00 KB ' + '32.00 MB default.sample_07 ', + 'query_id': 'd424420e0c44ab9:c637ac2900000000', + 'plan': '\n----------------\nPer-Host Resource Reservation: Memory=0B\nPer-Host Resource Estimates: Memory=32.00MB\nWARNING: ' + 'The following tables have potentially corrupt table statistics.\nDrop and re-compute statistics to resolve this problem.' + '\ndefault.sample_07\nWARNING: The following tables are missing relevant table and/or column statistics.\n' + 'default.sample_07\n\nF01:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1\nPLAN-ROOT SINK\n| mem-estimate=0B ' + 'mem-reservation=0B\n|\n02:MERGING-EXCHANGE [UNPARTITIONED]\n| order by: salary DESC\n| limit: 1000\n| mem-estimate=0B ' + 'mem-reservation=0B\n| tuple-ids=1 row-size=19B cardinality=0\n|\nF00:PLAN FRAGMENT [RANDOM] hosts=1 instances=1\n01:TOP-N ' + '[LIMIT=1000]\n| order by: salary DESC\n| mem-estimate=0B mem-reservation=0B\n| tuple-ids=1 row-size=19B ' + 'cardinality=0\n|\n00:SCAN HDFS [default.sample_07, RANDOM]\n partitions=1/1 files=1 size=44.98KB\n predicates: ' + '(sample_07.salary > 100000)\n table stats: 0 rows total\n column stats: unavailable\n parquet dictionary predicates: ' + '(sample_07.salary > 100000)\n mem-estimate=32.00MB mem-reservation=0B\n tuple-ids=0 row-size=19B ' + 'cardinality=0\n----------------', + }, + }, } PROFILE = { '4d497267f34ff17d:817bdfb500000000': { - u'profile': u'Query (id=1a48b5796f8f07f5:49ba9e6b00000000):\n Summary:\n Session ID: 3348564c97187569:1c17ce45bdfbf0b2\n '\ - 'Session Type: HIVESERVER2\n HiveServer2 Protocol Version: V6\n Start Time: 2017-10-26 11:19:40.420511000\n End Time: '\ - '2017-10-26 11:23:11.426921000\n Query Type: QUERY\n Query State: FINISHED\n Query Status: OK\n Impala Version: '\ - 'impalad version 2.9.0-cdh5.12.1 RELEASE (build 6dacae08a283a36bb932335ae0c046977e2474e8)\n User: admin\n Connected User: '\ - 'admin\n Delegated User: \n Network Address: 10.16.2.226:63745\n Default Db: default\n Sql Statement: select * from '\ - 'customers\n Coordinator: nightly512-unsecure-2.gce.cloudera.com:22000\n Query Options (set by configuration): '\ - 'QUERY_TIMEOUT_S=600\n Query Options (set by configuration and planner): QUERY_TIMEOUT_S=600,MT_DOP=0\n Plan: '\ - '\n----------------\nPer-Host Resource Reservation: Memory=0B\nPer-Host Resource Estimates: Memory=32.00MB\nWARNING: The '\ - 'following tables have potentially corrupt table statistics.\nDrop and re-compute statistics to resolve this problem.'\ - '\ndefault.customers\nWARNING: The following tables are missing relevant table and/or column statistics.'\ - '\ndefault.customers\n\nF01:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1\nPLAN-ROOT SINK\n| mem-estimate=0B '\ - 'mem-reservation=0B\n|\n01:EXCHANGE [UNPARTITIONED]\n| mem-estimate=0B mem-reservation=0B\n| tuple-ids=0 row-size=19B '\ - 'cardinality=0\n|\nF00:PLAN FRAGMENT [RANDOM] hosts=1 instances=1\n00:SCAN HDFS [default.customers, RANDOM]\n partitions=1/1 '\ - 'files=1 size=15.44KB\n table stats: 0 rows total\n column stats: unavailable\n mem-estimate=32.00MB mem-reservation=0B\n '\ - 'tuple-ids=0 row-size=19B cardinality=0\n----------------\n Estimated Per-Host Mem: 33554432\n Per-Host Memory Reservation: '\ - '0\n Tables Missing Stats: default.customers\n Tables With Corrupt Table Stats: default.customers\n Request Pool: '\ - 'root.admin\n Admission result: Admitted immediately\n ExecSummary: \nOperator #Hosts Avg Time Max Time #Rows '\ - 'Est. #Rows Peak Mem Est. Peak Mem Detail \n-----------------------------------------------------------------------'\ - '------------------------------------\n01:EXCHANGE 1 0.000ns 0.000ns 53 0 0 0 '\ - 'UNPARTITIONED \n00:SCAN HDFS 1 215.018ms 215.018ms 53 0 45.02 KB 32.00 MB default.customers '\ - '\n Errors: \n Planner Timeline: 5s043ms\n - Metadata load started: 10.215ms (10.215ms)\n - Metadata load '\ - 'finished: 4s789ms (4s779ms)\n - Analysis finished: 4s856ms (66.876ms)\n - Equivalence classes computed: 4s894ms '\ - '(38.233ms)\n - Single node plan created: 4s945ms (50.928ms)\n - Runtime filters computed: 4s947ms (2.464ms)\n '\ - '- Distributed plan created: 4s953ms (5.784ms)\n - Lineage info computed: 4s955ms (2.144ms)\n - Planning finished: '\ - '5s043ms (88.082ms)\n Query Timeline: 3m31s\n - Query submitted: 0.000ns (0.000ns)\n - Planning finished: 5s061ms '\ - '(5s061ms)\n - Submit for admission: 5s062ms (1.000ms)\n - Completed admission: 5s062ms (0.000ns)\n - Ready to '\ - 'start on 1 backends: 5s062ms (0.000ns)\n - All 1 execution backends (2 fragment instances) started: 5s064ms (2.000ms)'\ - '\n - Rows available: 5s311ms (247.021ms)\n - First row fetched: 6s565ms (1s254ms)\n - Unregister query: 3m31s '\ - '(3m24s)\n - ComputeScanRangeAssignmentTimer: 0.000ns\n ImpalaServer:\n - ClientFetchWaitTimer: 3m25s\n - '\ - 'RowMaterializationTimer: 1.000ms\n Execution Profile 1a48b5796f8f07f5:49ba9e6b00000000:(Total: 250.021ms, non-child: 0.000ns, % '\ - 'non-child: 0.00%)\n Number of filters: 0\n Filter routing table: \n ID Src. Node Tgt. Node(s) Target type Partition '\ - 'filter Pending (Expected) First arrived Completed Enabled\n----------------------------------------------------------------'\ - '---------------------------------------------------\n\n Backend startup latencies: Count: 1, min / max: 1ms / 1ms, '\ - '25th %-ile: 1ms, 50th %-ile: 1ms, 75th %-ile: 1ms, 90th %-ile: 1ms, 95th %-ile: 1ms, 99.9th %-ile: 1ms\n Per Node Peak Memory '\ - 'Usage: nightly512-unsecure-2.gce.cloudera.com:22000(71.09 KB) \n - FiltersReceived: 0 (0)\n - FinalizationTimer: 0.000ns'\ - '\n Averaged Fragment F01:(Total: 1s501ms, non-child: 1s256ms, % non-child: 83.68%)\n split sizes: min: 0, max: 0, '\ - 'avg: 0, stddev: 0\n completion times: min:1s501ms max:1s501ms mean: 1s501ms stddev:0.000ns\n execution rates: '\ - 'min:0.00 /sec max:0.00 /sec mean:0.00 /sec stddev:0.00 /sec\n num instances: 1\n - AverageThreadTokens: 0.00 \n '\ - ' - BloomFilterBytes: 0\n - PeakMemoryUsage: 12.41 KB (12712)\n - PerHostPeakMemUsage: 71.09 KB (72800)\n - '\ - 'RowsProduced: 53 (53)\n - TotalNetworkReceiveTime: 219.018ms\n - TotalNetworkSendTime: 0.000ns\n - '\ - 'TotalStorageWaitTime: 0.000ns\n - TotalThreadsInvoluntaryContextSwitches: 0 (0)\n - TotalThreadsTotalWallClockTime: '\ - '1s473ms\n - TotalThreadsSysTime: 9.000us\n - TotalThreadsUserTime: 233.000us\n - '\ - 'TotalThreadsVoluntaryContextSwitches: 3 (3)\n Fragment Instance Lifecycle Timings:\n - ExecTime: 1s254ms\n '\ - ' - ExecTreeExecTime: 0.000ns\n - OpenTime: 219.018ms\n - ExecTreeOpenTime: 219.018ms\n - PrepareTime: '\ - '28.002ms\n - ExecTreePrepareTime: 0.000ns\n BlockMgr:\n - BlockWritesOutstanding: 0 (0)\n - '\ - 'BlocksCreated: 0 (0)\n - BlocksRecycled: 0 (0)\n - BufferedPins: 0 (0)\n - MaxBlockSize: 8.00 MB '\ - '(8388608)\n - MemoryLimit: 16.33 GB (17534060544)\n - PeakMemoryUsage: 0\n - ScratchBytesRead: 0'\ - '\n - ScratchBytesWritten: 0\n - ScratchFileUsedBytes: 0\n - ScratchReads: 0 (0)\n - '\ - 'ScratchWrites: 0 (0)\n - TotalBufferWaitTime: 0.000ns\n - TotalEncryptionTime: 0.000ns\n - '\ - 'TotalReadBlockTime: 0.000ns\n PLAN_ROOT_SINK:\n - PeakMemoryUsage: 0\n CodeGen:(Total: 26.002ms, non-child: '\ - '26.002ms, % non-child: 100.00%)\n - CodegenTime: 0.000ns\n - CompileTime: 0.000ns\n - LoadTime: 0.000ns'\ - '\n - ModuleBitcodeSize: 1.98 MB (2077616)\n - NumFunctions: 0 (0)\n - NumInstructions: 0 (0)\n - '\ - 'OptimizationTime: 0.000ns\n - PeakMemoryUsage: 0\n - PrepareTime: 25.002ms\n EXCHANGE_NODE (id=1):(Total: '\ - '219.018ms, non-child: 219.018ms, % non-child: 100.00%)\n - BytesReceived: 1.54 KB (1578)\n - ConvertRowBatchTime:'\ - ' 0.000ns\n - DeserializeRowBatchTimer: 0.000ns\n - FirstBatchArrivalWaitTime: 219.018ms\n - '\ - 'PeakMemoryUsage: 0\n - RowsReturned: 53 (53)\n - RowsReturnedRate: 241.00 /sec\n - SendersBlockedTimer:'\ - ' 0.000ns\n - SendersBlockedTotalTimer(*): 0.000ns\n Coordinator Fragment F01:\n Instance '\ - '1a48b5796f8f07f5:49ba9e6b00000000 (host=nightly512-unsecure-2.gce.cloudera.com:22000):(Total: 1s501ms, non-child: 1s256ms, % '\ - 'non-child: 83.68%)\n MemoryUsage(500.000ms): 12.00 KB, 12.00 KB, 12.00 KB\n - AverageThreadTokens: 0.00 \n '\ - ' - BloomFilterBytes: 0\n - PeakMemoryUsage: 12.41 KB (12712)\n - PerHostPeakMemUsage: 71.09 KB (72800)\n '\ - ' - RowsProduced: 53 (53)\n - TotalNetworkReceiveTime: 219.018ms\n - TotalNetworkSendTime: 0.000ns\n '\ - '- TotalStorageWaitTime: 0.000ns\n - TotalThreadsInvoluntaryContextSwitches: 0 (0)\n - '\ - 'TotalThreadsTotalWallClockTime: 1s473ms\n - TotalThreadsSysTime: 9.000us\n - TotalThreadsUserTime: 233.000us'\ - '\n - TotalThreadsVoluntaryContextSwitches: 3 (3)\n Fragment Instance Lifecycle Timings:\n - ExecTime: '\ - '1s254ms\n - ExecTreeExecTime: 0.000ns\n - OpenTime: 219.018ms\n - ExecTreeOpenTime: 219.018ms'\ - '\n - PrepareTime: 28.002ms\n - ExecTreePrepareTime: 0.000ns\n BlockMgr:\n - '\ - 'BlockWritesOutstanding: 0 (0)\n - BlocksCreated: 0 (0)\n - BlocksRecycled: 0 (0)\n - BufferedPins:'\ - ' 0 (0)\n - MaxBlockSize: 8.00 MB (8388608)\n - MemoryLimit: 16.33 GB (17534060544)\n - '\ - 'PeakMemoryUsage: 0\n - ScratchBytesRead: 0\n - ScratchBytesWritten: 0\n - ScratchFileUsedBytes: '\ - '0\n - ScratchReads: 0 (0)\n - ScratchWrites: 0 (0)\n - TotalBufferWaitTime: 0.000ns\n - '\ - 'TotalEncryptionTime: 0.000ns\n - TotalReadBlockTime: 0.000ns\n PLAN_ROOT_SINK:\n - PeakMemoryUsage: '\ - '0\n CodeGen:(Total: 26.002ms, non-child: 26.002ms, % non-child: 100.00%)\n - CodegenTime: 0.000ns\n - '\ - 'CompileTime: 0.000ns\n - LoadTime: 0.000ns\n - ModuleBitcodeSize: 1.98 MB (2077616)\n - '\ - 'NumFunctions: 0 (0)\n - NumInstructions: 0 (0)\n - OptimizationTime: 0.000ns\n - PeakMemoryUsage: '\ - '0\n - PrepareTime: 25.002ms\n EXCHANGE_NODE (id=1):(Total: 219.018ms, non-child: 0.000ns, % non-child: 0.00%)\n'\ - ' BytesReceived(500.000ms): 1.54 KB, 1.54 KB, 1.54 KB\n - BytesReceived: 1.54 KB (1578)\n - '\ - 'ConvertRowBatchTime: 0.000ns\n - DeserializeRowBatchTimer: 0.000ns\n - FirstBatchArrivalWaitTime: 219.018ms\n'\ - ' - PeakMemoryUsage: 0\n - RowsReturned: 53 (53)\n - RowsReturnedRate: 241.00 /sec\n - '\ - 'SendersBlockedTimer: 0.000ns\n - SendersBlockedTotalTimer(*): 0.000ns\n Averaged Fragment F00:(Total: 241.020ms, '\ - 'non-child: 0.000ns, % non-child: 0.00%)\n split sizes: min: 15.44 KB, max: 15.44 KB, avg: 15.44 KB, stddev: 0\n '\ - 'completion times: min:248.021ms max:248.021ms mean: 248.021ms stddev:0.000ns\n execution rates: min:62.26 KB/sec '\ - 'max:62.26 KB/sec mean:62.26 KB/sec stddev:0.61 B/sec\n num instances: 1\n - AverageThreadTokens: 0.00 \n - '\ - 'BloomFilterBytes: 0\n - PeakMemoryUsage: 63.09 KB (64608)\n - PerHostPeakMemUsage: 71.09 KB (72800)\n - '\ - 'RowsProduced: 53 (53)\n - TotalNetworkReceiveTime: 0.000ns\n - TotalNetworkSendTime: 0.000ns\n - '\ - 'TotalStorageWaitTime: 175.014ms\n - TotalThreadsInvoluntaryContextSwitches: 2 (2)\n - TotalThreadsTotalWallClockTime:'\ - ' 378.032ms\n - TotalThreadsSysTime: 1.998ms\n - TotalThreadsUserTime: 24.546ms\n - '\ - 'TotalThreadsVoluntaryContextSwitches: 13 (13)\n Fragment Instance Lifecycle Timings:\n - ExecTime: 176.015ms\n'\ - ' - ExecTreeExecTime: 176.015ms\n - OpenTime: 26.002ms\n - ExecTreeOpenTime: 1.000ms\n - '\ - 'PrepareTime: 39.003ms\n - ExecTreePrepareTime: 19.001ms\n DataStreamSender (dst_id=1):\n - BytesSent: '\ - '1.54 KB (1578)\n - NetworkThroughput(*): 0.00 /sec\n - OverallThroughput: 0.00 /sec\n - PeakMemoryUsage:'\ - ' 6.09 KB (6240)\n - RowsReturned: 53 (53)\n - SerializeBatchTime: 0.000ns\n - TransmitDataRPCTime: '\ - '0.000ns\n - UncompressedRowBatchSize: 2.05 KB (2098)\n CodeGen:(Total: 43.003ms, non-child: 43.003ms, % non-child: '\ - '100.00%)\n - CodegenTime: 1.000ms\n - CompileTime: 13.001ms\n - LoadTime: 0.000ns\n - '\ - 'ModuleBitcodeSize: 1.98 MB (2077616)\n - NumFunctions: 5 (5)\n - NumInstructions: 98 (98)\n - '\ - 'OptimizationTime: 11.000ms\n - PeakMemoryUsage: 49.00 KB (50176)\n - PrepareTime: 18.001ms\n HDFS_SCAN_NODE'\ - ' (id=0):(Total: 215.018ms, non-child: 215.018ms, % non-child: 100.00%)\n - AverageHdfsReadThreadConcurrency: 0.00 \n '\ - ' - AverageScannerThreadConcurrency: 0.00 \n - BytesRead: 16.71 KB (17111)\n - BytesReadDataNodeCache: 0\n '\ - ' - BytesReadLocal: 16.71 KB (17111)\n - BytesReadRemoteUnexpected: 0\n - BytesReadShortCircuit: 16.71 KB '\ - '(17111)\n - DecompressionTime: 0.000ns\n - MaxCompressedTextFileLength: 0\n - NumColumns: 2 (2)\n '\ - ' - NumDictFilteredRowGroups: 0 (0)\n - NumDisksAccessed: 1 (1)\n - NumRowGroups: 1 (1)\n - '\ - 'NumScannerThreadsStarted: 1 (1)\n - NumScannersWithNoReads: 0 (0)\n - NumStatsFilteredRowGroups: 0 (0)\n '\ - '- PeakMemoryUsage: 45.02 KB (46101)\n - PerReadThreadRawHdfsThroughput: 0.00 /sec\n - RemoteScanRanges: 0 (0)\n '\ - ' - RowBatchQueueGetWaitTime: 176.015ms\n - RowBatchQueuePutWaitTime: 0.000ns\n - RowsRead: 53 (53)\n '\ - ' - RowsReturned: 53 (53)\n - RowsReturnedRate: 246.00 /sec\n - ScanRangesComplete: 1 (1)\n - '\ - 'ScannerThreadsInvoluntaryContextSwitches: 0 (0)\n - ScannerThreadsTotalWallClockTime: 176.015ms\n - '\ - 'MaterializeTupleTime(*): 0.000ns\n - ScannerThreadsSysTime: 0.000ns\n - ScannerThreadsUserTime: '\ - '819.000us\n - ScannerThreadsVoluntaryContextSwitches: 9 (9)\n - TotalRawHdfsReadTime(*): 0.000ns\n - '\ - 'TotalReadThroughput: 0.00 /sec\n Fragment F00:\n Instance 1a48b5796f8f07f5:49ba9e6b00000001 '\ - '(host=nightly512-unsecure-2.gce.cloudera.com:22000):(Total: 241.020ms, non-child: 0.000ns, % non-child: 0.00%)\n Hdfs '\ - 'split stats (:<# splits>/): 0:1/15.44 KB \n - AverageThreadTokens: 0.00 \n - '\ - 'BloomFilterBytes: 0\n - PeakMemoryUsage: 63.09 KB (64608)\n - PerHostPeakMemUsage: 71.09 KB (72800)\n '\ - '- RowsProduced: 53 (53)\n - TotalNetworkReceiveTime: 0.000ns\n - TotalNetworkSendTime: 0.000ns\n - '\ - 'TotalStorageWaitTime: 175.014ms\n - TotalThreadsInvoluntaryContextSwitches: 2 (2)\n - '\ - 'TotalThreadsTotalWallClockTime: 378.032ms\n - TotalThreadsSysTime: 1.998ms\n - TotalThreadsUserTime: '\ - '24.546ms\n - TotalThreadsVoluntaryContextSwitches: 13 (13)\n Fragment Instance Lifecycle Timings:\n - '\ - 'ExecTime: 176.015ms\n - ExecTreeExecTime: 176.015ms\n - OpenTime: 26.002ms\n - '\ - 'ExecTreeOpenTime: 1.000ms\n - PrepareTime: 39.003ms\n - ExecTreePrepareTime: 19.001ms\n '\ - 'DataStreamSender (dst_id=1):\n - BytesSent: 1.54 KB (1578)\n - NetworkThroughput(*): 0.00 /sec\n '\ - '- OverallThroughput: 0.00 /sec\n - PeakMemoryUsage: 6.09 KB (6240)\n - RowsReturned: 53 (53)\n '\ - '- SerializeBatchTime: 0.000ns\n - TransmitDataRPCTime: 0.000ns\n - UncompressedRowBatchSize: 2.05 KB (2098)'\ - '\n CodeGen:(Total: 43.003ms, non-child: 43.003ms, % non-child: 100.00%)\n - CodegenTime: 1.000ms\n '\ - '- CompileTime: 13.001ms\n - LoadTime: 0.000ns\n - ModuleBitcodeSize: 1.98 MB (2077616)\n - '\ - 'NumFunctions: 5 (5)\n - NumInstructions: 98 (98)\n - OptimizationTime: 11.000ms\n - '\ - 'PeakMemoryUsage: 49.00 KB (50176)\n - PrepareTime: 18.001ms\n HDFS_SCAN_NODE (id=0):(Total: 215.018ms, '\ - 'non-child: 215.018ms, % non-child: 100.00%)\n Hdfs split stats (:<# splits>/): 0:1/15.44 KB \n'\ - ' ExecOption: PARQUET Codegen Enabled, Codegen enabled: 1 out of 1\n Hdfs Read Thread Concurrency Bucket: 0:0% '\ - '1:0% 2:0% 3:0% 4:0% \n File Formats: PARQUET/NONE:2 \n - FooterProcessingTime: (Avg: 168.014ms ; Min: '\ - '168.014ms ; Max: 168.014ms ; Number of samples: 1)\n - AverageHdfsReadThreadConcurrency: 0.00 \n - '\ - 'AverageScannerThreadConcurrency: 0.00 \n - BytesRead: 16.71 KB (17111)\n - BytesReadDataNodeCache: 0\n'\ - ' - BytesReadLocal: 16.71 KB (17111)\n - BytesReadRemoteUnexpected: 0\n - BytesReadShortCircuit: '\ - '16.71 KB (17111)\n - DecompressionTime: 0.000ns\n - MaxCompressedTextFileLength: 0\n - NumColumns:'\ - ' 2 (2)\n - NumDictFilteredRowGroups: 0 (0)\n - NumDisksAccessed: 1 (1)\n - NumRowGroups: 1 (1)\n '\ - ' - NumScannerThreadsStarted: 1 (1)\n - NumScannersWithNoReads: 0 (0)\n - NumStatsFilteredRowGroups: '\ - '0 (0)\n - PeakMemoryUsage: 45.02 KB (46101)\n - PerReadThreadRawHdfsThroughput: 0.00 /sec\n - '\ - 'RemoteScanRanges: 0 (0)\n - RowBatchQueueGetWaitTime: 176.015ms\n - RowBatchQueuePutWaitTime: 0.000ns\n'\ - ' - RowsRead: 53 (53)\n - RowsReturned: 53 (53)\n - RowsReturnedRate: 246.00 /sec\n - '\ - 'ScanRangesComplete: 1 (1)\n - ScannerThreadsInvoluntaryContextSwitches: 0 (0)\n - '\ - 'ScannerThreadsTotalWallClockTime: 176.015ms\n - MaterializeTupleTime(*): 0.000ns\n - '\ - 'ScannerThreadsSysTime: 0.000ns\n - ScannerThreadsUserTime: 819.000us\n - '\ - 'ScannerThreadsVoluntaryContextSwitches: 9 (9)\n - TotalRawHdfsReadTime(*): 0.000ns\n - '\ - 'TotalReadThroughput: 0.00 /sec\n', - u'query_id': u'1a48b5796f8f07f5:49ba9e6b00000000', - u'__common__': { - u'navbar': [ - {u'link': u'/backends', u'title': u'/backends'}, - {u'link': u'/catalog', u'title': u'/catalog'}, - {u'link': u'/hadoop-varz', u'title': u'/hadoop-varz'}, - {u'link': u'/log_level', u'title': u'/log_level'}, - {u'link': u'/logs', u'title': u'/logs'}, - {u'link': u'/memz', u'title': u'/memz'}, - {u'link': u'/metrics', u'title': u'/metrics'}, - {u'link': u'/queries', u'title': u'/queries'}, - {u'link': u'/rpcz', u'title': u'/rpcz'}, - {u'link': u'/sessions', u'title': u'/sessions'}, - {u'link': u'/threadz', u'title': u'/threadz'}, - {u'link': u'/varz', u'title': u'/varz'} + 'profile': 'Query (id=1a48b5796f8f07f5:49ba9e6b00000000):\n Summary:\n Session ID: 3348564c97187569:1c17ce45bdfbf0b2\n ' + 'Session Type: HIVESERVER2\n HiveServer2 Protocol Version: V6\n Start Time: 2017-10-26 11:19:40.420511000\n End Time: ' + '2017-10-26 11:23:11.426921000\n Query Type: QUERY\n Query State: FINISHED\n Query Status: OK\n Impala Version: ' + 'impalad version 2.9.0-cdh5.12.1 RELEASE (build 6dacae08a283a36bb932335ae0c046977e2474e8)\n User: admin\n Connected User: ' + 'admin\n Delegated User: \n Network Address: 10.16.2.226:63745\n Default Db: default\n Sql Statement: select * from ' + 'customers\n Coordinator: nightly512-unsecure-2.gce.cloudera.com:22000\n Query Options (set by configuration): ' + 'QUERY_TIMEOUT_S=600\n Query Options (set by configuration and planner): QUERY_TIMEOUT_S=600,MT_DOP=0\n Plan: ' + '\n----------------\nPer-Host Resource Reservation: Memory=0B\nPer-Host Resource Estimates: Memory=32.00MB\nWARNING: The ' + 'following tables have potentially corrupt table statistics.\nDrop and re-compute statistics to resolve this problem.' + '\ndefault.customers\nWARNING: The following tables are missing relevant table and/or column statistics.' + '\ndefault.customers\n\nF01:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1\nPLAN-ROOT SINK\n| mem-estimate=0B ' + 'mem-reservation=0B\n|\n01:EXCHANGE [UNPARTITIONED]\n| mem-estimate=0B mem-reservation=0B\n| tuple-ids=0 row-size=19B ' + 'cardinality=0\n|\nF00:PLAN FRAGMENT [RANDOM] hosts=1 instances=1\n00:SCAN HDFS [default.customers, RANDOM]\n partitions=1/1 ' + 'files=1 size=15.44KB\n table stats: 0 rows total\n column stats: unavailable\n mem-estimate=32.00MB mem-reservation=0B\n ' + 'tuple-ids=0 row-size=19B cardinality=0\n----------------\n Estimated Per-Host Mem: 33554432\n Per-Host Memory Reservation: ' + '0\n Tables Missing Stats: default.customers\n Tables With Corrupt Table Stats: default.customers\n Request Pool: ' + 'root.admin\n Admission result: Admitted immediately\n ExecSummary: \nOperator #Hosts Avg Time Max Time #Rows ' + 'Est. #Rows Peak Mem Est. Peak Mem Detail \n-----------------------------------------------------------------------' + '------------------------------------\n01:EXCHANGE 1 0.000ns 0.000ns 53 0 0 0 ' + 'UNPARTITIONED \n00:SCAN HDFS 1 215.018ms 215.018ms 53 0 45.02 KB 32.00 MB default.customers ' + '\n Errors: \n Planner Timeline: 5s043ms\n - Metadata load started: 10.215ms (10.215ms)\n - Metadata load ' + 'finished: 4s789ms (4s779ms)\n - Analysis finished: 4s856ms (66.876ms)\n - Equivalence classes computed: 4s894ms ' + '(38.233ms)\n - Single node plan created: 4s945ms (50.928ms)\n - Runtime filters computed: 4s947ms (2.464ms)\n ' + '- Distributed plan created: 4s953ms (5.784ms)\n - Lineage info computed: 4s955ms (2.144ms)\n - Planning finished: ' + '5s043ms (88.082ms)\n Query Timeline: 3m31s\n - Query submitted: 0.000ns (0.000ns)\n - Planning finished: 5s061ms ' + '(5s061ms)\n - Submit for admission: 5s062ms (1.000ms)\n - Completed admission: 5s062ms (0.000ns)\n - Ready to ' + 'start on 1 backends: 5s062ms (0.000ns)\n - All 1 execution backends (2 fragment instances) started: 5s064ms (2.000ms)' + '\n - Rows available: 5s311ms (247.021ms)\n - First row fetched: 6s565ms (1s254ms)\n - Unregister query: 3m31s ' + '(3m24s)\n - ComputeScanRangeAssignmentTimer: 0.000ns\n ImpalaServer:\n - ClientFetchWaitTimer: 3m25s\n - ' + 'RowMaterializationTimer: 1.000ms\n Execution Profile 1a48b5796f8f07f5:49ba9e6b00000000:(Total: 250.021ms, non-child: 0.000ns, % ' + 'non-child: 0.00%)\n Number of filters: 0\n Filter routing table: \n ID Src. Node Tgt. Node(s) Target type Partition ' + 'filter Pending (Expected) First arrived Completed Enabled\n----------------------------------------------------------------' + '---------------------------------------------------\n\n Backend startup latencies: Count: 1, min / max: 1ms / 1ms, ' + '25th %-ile: 1ms, 50th %-ile: 1ms, 75th %-ile: 1ms, 90th %-ile: 1ms, 95th %-ile: 1ms, 99.9th %-ile: 1ms\n Per Node Peak Memory ' + 'Usage: nightly512-unsecure-2.gce.cloudera.com:22000(71.09 KB) \n - FiltersReceived: 0 (0)\n - FinalizationTimer: 0.000ns' + '\n Averaged Fragment F01:(Total: 1s501ms, non-child: 1s256ms, % non-child: 83.68%)\n split sizes: min: 0, max: 0, ' + 'avg: 0, stddev: 0\n completion times: min:1s501ms max:1s501ms mean: 1s501ms stddev:0.000ns\n execution rates: ' + 'min:0.00 /sec max:0.00 /sec mean:0.00 /sec stddev:0.00 /sec\n num instances: 1\n - AverageThreadTokens: 0.00 \n ' + ' - BloomFilterBytes: 0\n - PeakMemoryUsage: 12.41 KB (12712)\n - PerHostPeakMemUsage: 71.09 KB (72800)\n - ' + 'RowsProduced: 53 (53)\n - TotalNetworkReceiveTime: 219.018ms\n - TotalNetworkSendTime: 0.000ns\n - ' + 'TotalStorageWaitTime: 0.000ns\n - TotalThreadsInvoluntaryContextSwitches: 0 (0)\n - TotalThreadsTotalWallClockTime: ' + '1s473ms\n - TotalThreadsSysTime: 9.000us\n - TotalThreadsUserTime: 233.000us\n - ' + 'TotalThreadsVoluntaryContextSwitches: 3 (3)\n Fragment Instance Lifecycle Timings:\n - ExecTime: 1s254ms\n ' + ' - ExecTreeExecTime: 0.000ns\n - OpenTime: 219.018ms\n - ExecTreeOpenTime: 219.018ms\n - PrepareTime: ' + '28.002ms\n - ExecTreePrepareTime: 0.000ns\n BlockMgr:\n - BlockWritesOutstanding: 0 (0)\n - ' + 'BlocksCreated: 0 (0)\n - BlocksRecycled: 0 (0)\n - BufferedPins: 0 (0)\n - MaxBlockSize: 8.00 MB ' + '(8388608)\n - MemoryLimit: 16.33 GB (17534060544)\n - PeakMemoryUsage: 0\n - ScratchBytesRead: 0' + '\n - ScratchBytesWritten: 0\n - ScratchFileUsedBytes: 0\n - ScratchReads: 0 (0)\n - ' + 'ScratchWrites: 0 (0)\n - TotalBufferWaitTime: 0.000ns\n - TotalEncryptionTime: 0.000ns\n - ' + 'TotalReadBlockTime: 0.000ns\n PLAN_ROOT_SINK:\n - PeakMemoryUsage: 0\n CodeGen:(Total: 26.002ms, non-child: ' + '26.002ms, % non-child: 100.00%)\n - CodegenTime: 0.000ns\n - CompileTime: 0.000ns\n - LoadTime: 0.000ns' + '\n - ModuleBitcodeSize: 1.98 MB (2077616)\n - NumFunctions: 0 (0)\n - NumInstructions: 0 (0)\n - ' + 'OptimizationTime: 0.000ns\n - PeakMemoryUsage: 0\n - PrepareTime: 25.002ms\n EXCHANGE_NODE (id=1):(Total: ' + '219.018ms, non-child: 219.018ms, % non-child: 100.00%)\n - BytesReceived: 1.54 KB (1578)\n - ConvertRowBatchTime:' + ' 0.000ns\n - DeserializeRowBatchTimer: 0.000ns\n - FirstBatchArrivalWaitTime: 219.018ms\n - ' + 'PeakMemoryUsage: 0\n - RowsReturned: 53 (53)\n - RowsReturnedRate: 241.00 /sec\n - SendersBlockedTimer:' + ' 0.000ns\n - SendersBlockedTotalTimer(*): 0.000ns\n Coordinator Fragment F01:\n Instance ' + '1a48b5796f8f07f5:49ba9e6b00000000 (host=nightly512-unsecure-2.gce.cloudera.com:22000):(Total: 1s501ms, non-child: 1s256ms, % ' + 'non-child: 83.68%)\n MemoryUsage(500.000ms): 12.00 KB, 12.00 KB, 12.00 KB\n - AverageThreadTokens: 0.00 \n ' + ' - BloomFilterBytes: 0\n - PeakMemoryUsage: 12.41 KB (12712)\n - PerHostPeakMemUsage: 71.09 KB (72800)\n ' + ' - RowsProduced: 53 (53)\n - TotalNetworkReceiveTime: 219.018ms\n - TotalNetworkSendTime: 0.000ns\n ' + '- TotalStorageWaitTime: 0.000ns\n - TotalThreadsInvoluntaryContextSwitches: 0 (0)\n - ' + 'TotalThreadsTotalWallClockTime: 1s473ms\n - TotalThreadsSysTime: 9.000us\n - TotalThreadsUserTime: 233.000us' + '\n - TotalThreadsVoluntaryContextSwitches: 3 (3)\n Fragment Instance Lifecycle Timings:\n - ExecTime: ' + '1s254ms\n - ExecTreeExecTime: 0.000ns\n - OpenTime: 219.018ms\n - ExecTreeOpenTime: 219.018ms' + '\n - PrepareTime: 28.002ms\n - ExecTreePrepareTime: 0.000ns\n BlockMgr:\n - ' + 'BlockWritesOutstanding: 0 (0)\n - BlocksCreated: 0 (0)\n - BlocksRecycled: 0 (0)\n - BufferedPins:' + ' 0 (0)\n - MaxBlockSize: 8.00 MB (8388608)\n - MemoryLimit: 16.33 GB (17534060544)\n - ' + 'PeakMemoryUsage: 0\n - ScratchBytesRead: 0\n - ScratchBytesWritten: 0\n - ScratchFileUsedBytes: ' + '0\n - ScratchReads: 0 (0)\n - ScratchWrites: 0 (0)\n - TotalBufferWaitTime: 0.000ns\n - ' + 'TotalEncryptionTime: 0.000ns\n - TotalReadBlockTime: 0.000ns\n PLAN_ROOT_SINK:\n - PeakMemoryUsage: ' + '0\n CodeGen:(Total: 26.002ms, non-child: 26.002ms, % non-child: 100.00%)\n - CodegenTime: 0.000ns\n - ' + 'CompileTime: 0.000ns\n - LoadTime: 0.000ns\n - ModuleBitcodeSize: 1.98 MB (2077616)\n - ' + 'NumFunctions: 0 (0)\n - NumInstructions: 0 (0)\n - OptimizationTime: 0.000ns\n - PeakMemoryUsage: ' + '0\n - PrepareTime: 25.002ms\n EXCHANGE_NODE (id=1):(Total: 219.018ms, non-child: 0.000ns, % non-child: 0.00%)\n' + ' BytesReceived(500.000ms): 1.54 KB, 1.54 KB, 1.54 KB\n - BytesReceived: 1.54 KB (1578)\n - ' + 'ConvertRowBatchTime: 0.000ns\n - DeserializeRowBatchTimer: 0.000ns\n - FirstBatchArrivalWaitTime: 219.018ms\n' + ' - PeakMemoryUsage: 0\n - RowsReturned: 53 (53)\n - RowsReturnedRate: 241.00 /sec\n - ' + 'SendersBlockedTimer: 0.000ns\n - SendersBlockedTotalTimer(*): 0.000ns\n Averaged Fragment F00:(Total: 241.020ms, ' + 'non-child: 0.000ns, % non-child: 0.00%)\n split sizes: min: 15.44 KB, max: 15.44 KB, avg: 15.44 KB, stddev: 0\n ' + 'completion times: min:248.021ms max:248.021ms mean: 248.021ms stddev:0.000ns\n execution rates: min:62.26 KB/sec ' + 'max:62.26 KB/sec mean:62.26 KB/sec stddev:0.61 B/sec\n num instances: 1\n - AverageThreadTokens: 0.00 \n - ' + 'BloomFilterBytes: 0\n - PeakMemoryUsage: 63.09 KB (64608)\n - PerHostPeakMemUsage: 71.09 KB (72800)\n - ' + 'RowsProduced: 53 (53)\n - TotalNetworkReceiveTime: 0.000ns\n - TotalNetworkSendTime: 0.000ns\n - ' + 'TotalStorageWaitTime: 175.014ms\n - TotalThreadsInvoluntaryContextSwitches: 2 (2)\n - TotalThreadsTotalWallClockTime:' + ' 378.032ms\n - TotalThreadsSysTime: 1.998ms\n - TotalThreadsUserTime: 24.546ms\n - ' + 'TotalThreadsVoluntaryContextSwitches: 13 (13)\n Fragment Instance Lifecycle Timings:\n - ExecTime: 176.015ms\n' + ' - ExecTreeExecTime: 176.015ms\n - OpenTime: 26.002ms\n - ExecTreeOpenTime: 1.000ms\n - ' + 'PrepareTime: 39.003ms\n - ExecTreePrepareTime: 19.001ms\n DataStreamSender (dst_id=1):\n - BytesSent: ' + '1.54 KB (1578)\n - NetworkThroughput(*): 0.00 /sec\n - OverallThroughput: 0.00 /sec\n - PeakMemoryUsage:' + ' 6.09 KB (6240)\n - RowsReturned: 53 (53)\n - SerializeBatchTime: 0.000ns\n - TransmitDataRPCTime: ' + '0.000ns\n - UncompressedRowBatchSize: 2.05 KB (2098)\n CodeGen:(Total: 43.003ms, non-child: 43.003ms, % non-child: ' + '100.00%)\n - CodegenTime: 1.000ms\n - CompileTime: 13.001ms\n - LoadTime: 0.000ns\n - ' + 'ModuleBitcodeSize: 1.98 MB (2077616)\n - NumFunctions: 5 (5)\n - NumInstructions: 98 (98)\n - ' + 'OptimizationTime: 11.000ms\n - PeakMemoryUsage: 49.00 KB (50176)\n - PrepareTime: 18.001ms\n HDFS_SCAN_NODE' + ' (id=0):(Total: 215.018ms, non-child: 215.018ms, % non-child: 100.00%)\n - AverageHdfsReadThreadConcurrency: 0.00 \n ' + ' - AverageScannerThreadConcurrency: 0.00 \n - BytesRead: 16.71 KB (17111)\n - BytesReadDataNodeCache: 0\n ' + ' - BytesReadLocal: 16.71 KB (17111)\n - BytesReadRemoteUnexpected: 0\n - BytesReadShortCircuit: 16.71 KB ' + '(17111)\n - DecompressionTime: 0.000ns\n - MaxCompressedTextFileLength: 0\n - NumColumns: 2 (2)\n ' + ' - NumDictFilteredRowGroups: 0 (0)\n - NumDisksAccessed: 1 (1)\n - NumRowGroups: 1 (1)\n - ' + 'NumScannerThreadsStarted: 1 (1)\n - NumScannersWithNoReads: 0 (0)\n - NumStatsFilteredRowGroups: 0 (0)\n ' + '- PeakMemoryUsage: 45.02 KB (46101)\n - PerReadThreadRawHdfsThroughput: 0.00 /sec\n - RemoteScanRanges: 0 (0)\n ' + ' - RowBatchQueueGetWaitTime: 176.015ms\n - RowBatchQueuePutWaitTime: 0.000ns\n - RowsRead: 53 (53)\n ' + ' - RowsReturned: 53 (53)\n - RowsReturnedRate: 246.00 /sec\n - ScanRangesComplete: 1 (1)\n - ' + 'ScannerThreadsInvoluntaryContextSwitches: 0 (0)\n - ScannerThreadsTotalWallClockTime: 176.015ms\n - ' + 'MaterializeTupleTime(*): 0.000ns\n - ScannerThreadsSysTime: 0.000ns\n - ScannerThreadsUserTime: ' + '819.000us\n - ScannerThreadsVoluntaryContextSwitches: 9 (9)\n - TotalRawHdfsReadTime(*): 0.000ns\n - ' + 'TotalReadThroughput: 0.00 /sec\n Fragment F00:\n Instance 1a48b5796f8f07f5:49ba9e6b00000001 ' + '(host=nightly512-unsecure-2.gce.cloudera.com:22000):(Total: 241.020ms, non-child: 0.000ns, % non-child: 0.00%)\n Hdfs ' + 'split stats (:<# splits>/): 0:1/15.44 KB \n - AverageThreadTokens: 0.00 \n - ' + 'BloomFilterBytes: 0\n - PeakMemoryUsage: 63.09 KB (64608)\n - PerHostPeakMemUsage: 71.09 KB (72800)\n ' + '- RowsProduced: 53 (53)\n - TotalNetworkReceiveTime: 0.000ns\n - TotalNetworkSendTime: 0.000ns\n - ' + 'TotalStorageWaitTime: 175.014ms\n - TotalThreadsInvoluntaryContextSwitches: 2 (2)\n - ' + 'TotalThreadsTotalWallClockTime: 378.032ms\n - TotalThreadsSysTime: 1.998ms\n - TotalThreadsUserTime: ' + '24.546ms\n - TotalThreadsVoluntaryContextSwitches: 13 (13)\n Fragment Instance Lifecycle Timings:\n - ' + 'ExecTime: 176.015ms\n - ExecTreeExecTime: 176.015ms\n - OpenTime: 26.002ms\n - ' + 'ExecTreeOpenTime: 1.000ms\n - PrepareTime: 39.003ms\n - ExecTreePrepareTime: 19.001ms\n ' + 'DataStreamSender (dst_id=1):\n - BytesSent: 1.54 KB (1578)\n - NetworkThroughput(*): 0.00 /sec\n ' + '- OverallThroughput: 0.00 /sec\n - PeakMemoryUsage: 6.09 KB (6240)\n - RowsReturned: 53 (53)\n ' + '- SerializeBatchTime: 0.000ns\n - TransmitDataRPCTime: 0.000ns\n - UncompressedRowBatchSize: 2.05 KB (2098)' + '\n CodeGen:(Total: 43.003ms, non-child: 43.003ms, % non-child: 100.00%)\n - CodegenTime: 1.000ms\n ' + '- CompileTime: 13.001ms\n - LoadTime: 0.000ns\n - ModuleBitcodeSize: 1.98 MB (2077616)\n - ' + 'NumFunctions: 5 (5)\n - NumInstructions: 98 (98)\n - OptimizationTime: 11.000ms\n - ' + 'PeakMemoryUsage: 49.00 KB (50176)\n - PrepareTime: 18.001ms\n HDFS_SCAN_NODE (id=0):(Total: 215.018ms, ' + 'non-child: 215.018ms, % non-child: 100.00%)\n Hdfs split stats (:<# splits>/): 0:1/15.44 KB \n' + ' ExecOption: PARQUET Codegen Enabled, Codegen enabled: 1 out of 1\n Hdfs Read Thread Concurrency Bucket: 0:0% ' + '1:0% 2:0% 3:0% 4:0% \n File Formats: PARQUET/NONE:2 \n - FooterProcessingTime: (Avg: 168.014ms ; Min: ' + '168.014ms ; Max: 168.014ms ; Number of samples: 1)\n - AverageHdfsReadThreadConcurrency: 0.00 \n - ' + 'AverageScannerThreadConcurrency: 0.00 \n - BytesRead: 16.71 KB (17111)\n - BytesReadDataNodeCache: 0\n' + ' - BytesReadLocal: 16.71 KB (17111)\n - BytesReadRemoteUnexpected: 0\n - BytesReadShortCircuit: ' + '16.71 KB (17111)\n - DecompressionTime: 0.000ns\n - MaxCompressedTextFileLength: 0\n - NumColumns:' + ' 2 (2)\n - NumDictFilteredRowGroups: 0 (0)\n - NumDisksAccessed: 1 (1)\n - NumRowGroups: 1 (1)\n ' + ' - NumScannerThreadsStarted: 1 (1)\n - NumScannersWithNoReads: 0 (0)\n - NumStatsFilteredRowGroups: ' + '0 (0)\n - PeakMemoryUsage: 45.02 KB (46101)\n - PerReadThreadRawHdfsThroughput: 0.00 /sec\n - ' + 'RemoteScanRanges: 0 (0)\n - RowBatchQueueGetWaitTime: 176.015ms\n - RowBatchQueuePutWaitTime: 0.000ns\n' + ' - RowsRead: 53 (53)\n - RowsReturned: 53 (53)\n - RowsReturnedRate: 246.00 /sec\n - ' + 'ScanRangesComplete: 1 (1)\n - ScannerThreadsInvoluntaryContextSwitches: 0 (0)\n - ' + 'ScannerThreadsTotalWallClockTime: 176.015ms\n - MaterializeTupleTime(*): 0.000ns\n - ' + 'ScannerThreadsSysTime: 0.000ns\n - ScannerThreadsUserTime: 819.000us\n - ' + 'ScannerThreadsVoluntaryContextSwitches: 9 (9)\n - TotalRawHdfsReadTime(*): 0.000ns\n - ' + 'TotalReadThroughput: 0.00 /sec\n', + 'query_id': '1a48b5796f8f07f5:49ba9e6b00000000', + '__common__': { + 'navbar': [ + {'link': '/backends', 'title': '/backends'}, + {'link': '/catalog', 'title': '/catalog'}, + {'link': '/hadoop-varz', 'title': '/hadoop-varz'}, + {'link': '/log_level', 'title': '/log_level'}, + {'link': '/logs', 'title': '/logs'}, + {'link': '/memz', 'title': '/memz'}, + {'link': '/metrics', 'title': '/metrics'}, + {'link': '/queries', 'title': '/queries'}, + {'link': '/rpcz', 'title': '/rpcz'}, + {'link': '/sessions', 'title': '/sessions'}, + {'link': '/threadz', 'title': '/threadz'}, + {'link': '/varz', 'title': '/varz'}, ], - u'process-name': u'impalad' - } + 'process-name': 'impalad', + }, }, '8a46a8865624698f:b80b211500000000': { - u'profile': u'Query (id=d424420e0c44ab9:c637ac2900000000):\n Summary:\n Session ID: 3348564c97187569:1c17ce45bdfbf0b2\n '\ - 'Session Type: HIVESERVER2\n HiveServer2 Protocol Version: V6\n Start Time: 2017-10-26 11:20:11.971764000\n End Time: '\ - '2017-10-26 11:23:11.429110000\n Query Type: QUERY\n Query State: FINISHED\n Query Status: OK\n Impala Version: '\ - 'impalad version 2.9.0-cdh5.12.1 RELEASE (build 6dacae08a283a36bb932335ae0c046977e2474e8)\n User: admin\n Connected User: '\ - 'admin\n Delegated User: \n Network Address: 10.16.2.226:63745\n Default Db: default\n Sql Statement: SELECT '\ - 'sample_07.description, sample_07.salary\r\nFROM\r\n sample_07\r\nWHERE\r\n( sample_07.salary > 100000)\r\nORDER BY '\ - 'sample_07.salary DESC\r\nLIMIT 1000\n Coordinator: nightly512-unsecure-2.gce.cloudera.com:22000\n Query Options '\ - '(set by configuration): QUERY_TIMEOUT_S=600\n Query Options (set by configuration and planner): QUERY_TIMEOUT_S=600,MT_DOP=0'\ - '\n Plan: \n----------------\nPer-Host Resource Reservation: Memory=0B\nPer-Host Resource Estimates: Memory=32.00MB\nWARNING: '\ - 'The following tables have potentially corrupt table statistics.\nDrop and re-compute statistics to resolve this problem.'\ - '\ndefault.sample_07\nWARNING: The following tables are missing relevant table and/or column statistics.\ndefault.sample_07\n\n'\ - 'F01:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1\nPLAN-ROOT SINK\n| mem-estimate=0B mem-reservation=0B\n|\n02:MERGING-'\ - 'EXCHANGE [UNPARTITIONED]\n| order by: salary DESC\n| limit: 1000\n| mem-estimate=0B mem-reservation=0B\n| tuple-ids=1 '\ - 'row-size=19B cardinality=0\n|\nF00:PLAN FRAGMENT [RANDOM] hosts=1 instances=1\n01:TOP-N [LIMIT=1000]\n| order by: salary '\ - 'DESC\n| mem-estimate=0B mem-reservation=0B\n| tuple-ids=1 row-size=19B cardinality=0\n|\n00:SCAN HDFS [default.sample_07, '\ - 'RANDOM]\n partitions=1/1 files=1 size=44.98KB\n predicates: (sample_07.salary > 100000)\n table stats: 0 rows total\n '\ - 'column stats: unavailable\n parquet dictionary predicates: (sample_07.salary > 100000)\n mem-estimate=32.00MB '\ - 'mem-reservation=0B\n tuple-ids=0 row-size=19B cardinality=0\n----------------\n Estimated Per-Host Mem: 33554432\n '\ - 'Per-Host Memory Reservation: 0\n Tables Missing Stats: default.sample_07\n Tables With Corrupt Table Stats: '\ - 'default.sample_07\n Request Pool: root.admin\n Admission result: Admitted immediately\n ExecSummary: \nOperator'\ - ' #Hosts Avg Time Max Time #Rows Est. #Rows Peak Mem Est. Peak Mem Detail \n-------------------'\ - '------------------------------------------------------------------------------------------------\n02:MERGING-EXCHANGE 1 '\ - ' 0.000ns 0.000ns 28 0 0 0 UNPARTITIONED \n01:TOP-N 1 0.000ns '\ - '0.000ns 28 0 80.00 KB 0 \n00:SCAN HDFS 1 250.020ms 250.020ms '\ - '28 0 173.00 KB 32.00 MB default.sample_07 \n Errors: \n Planner Timeline: 3s275ms\n - Metadata '\ - 'load started: 11.586ms (11.586ms)\n - Metadata load finished: 3s248ms (3s236ms)\n - Analysis finished: 3s254ms '\ - '(6.431ms)\n - Equivalence classes computed: 3s255ms (335.173us)\n - Single node plan created: 3s267ms (12.443ms)\n'\ - ' - Runtime filters computed: 3s267ms (92.906us)\n - Distributed plan created: 3s267ms (223.487us)\n - Lineage '\ - 'info computed: 3s268ms (348.540us)\n - Planning finished: 3s275ms (7.378ms)\n Query Timeline: 2m59s\n - Query '\ - 'submitted: 0.000ns (0.000ns)\n - Planning finished: 3s278ms (3s278ms)\n - Submit for admission: 3s279ms (1.000ms)'\ - '\n - Completed admission: 3s279ms (0.000ns)\n - Ready to start on 2 backends: 3s279ms (0.000ns)\n - All 2 '\ - 'execution backends (2 fragment instances) started: 3s331ms (52.004ms)\n - Rows available: 3s781ms (450.038ms)\n - '\ - 'First row fetched: 5s232ms (1s451ms)\n - Unregister query: 2m59s (2m54s)\n - ComputeScanRangeAssignmentTimer: '\ - '0.000ns\n ImpalaServer:\n - ClientFetchWaitTimer: 2m55s\n - RowMaterializationTimer: 0.000ns\n Execution Profile '\ - 'd424420e0c44ab9:c637ac2900000000:(Total: 502.042ms, non-child: 0.000ns, % non-child: 0.00%)\n Number of filters: 0\n '\ - 'Filter routing table: \n ID Src. Node Tgt. Node(s) Target type Partition filter Pending (Expected) First arrived Completed'\ - ' Enabled\n-------------------------------------------------------------------------------------------------------------------\n'\ - '\n Backend startup latencies: Count: 2, min / max: 1ms / 52ms, 25th %-ile: 1ms, 50th %-ile: 1ms, 75th %-ile: 52ms, 90th %-ile:'\ - ' 52ms, 95th %-ile: 52ms, 99.9th %-ile: 52ms\n Per Node Peak Memory Usage: nightly512-unsecure-2.gce.cloudera.com:22000'\ - '(255.00 KB) nightly512-unsecure-3.gce.cloudera.com:22000(937.09 KB) \n - FiltersReceived: 0 (0)\n - FinalizationTimer: '\ - '0.000ns\n Averaged Fragment F01:(Total: 1s952ms, non-child: 1s452ms, % non-child: 74.39%)\n split sizes: min: 0, max: '\ - '0, avg: 0, stddev: 0\n completion times: min:1s952ms max:1s952ms mean: 1s952ms stddev:0.000ns\n execution rates: '\ - 'min:0.00 /sec max:0.00 /sec mean:0.00 /sec stddev:0.00 /sec\n num instances: 1\n - AverageThreadTokens: 0.00 \n'\ - ' - BloomFilterBytes: 0\n - PeakMemoryUsage: 255.00 KB (261120)\n - PerHostPeakMemUsage: 255.00 KB (261120)\n '\ - ' - RowsProduced: 28 (28)\n - TotalNetworkReceiveTime: 0.000ns\n - TotalNetworkSendTime: 0.000ns\n - '\ - 'TotalStorageWaitTime: 0.000ns\n - TotalThreadsInvoluntaryContextSwitches: 1 (1)\n - TotalThreadsTotalWallClockTime: '\ - '1s934ms\n - TotalThreadsSysTime: 980.000us\n - TotalThreadsUserTime: 28.421ms\n - '\ - 'TotalThreadsVoluntaryContextSwitches: 3 (3)\n Fragment Instance Lifecycle Timings:\n - ExecTime: 1s451ms\n '\ - ' - ExecTreeExecTime: 0.000ns\n - OpenTime: 483.041ms\n - ExecTreeOpenTime: 452.038ms\n - PrepareTime:'\ - ' 18.001ms\n - ExecTreePrepareTime: 0.000ns\n BlockMgr:\n - BlockWritesOutstanding: 0 (0)\n - '\ - 'BlocksCreated: 0 (0)\n - BlocksRecycled: 0 (0)\n - BufferedPins: 0 (0)\n - MaxBlockSize: 8.00 MB '\ - '(8388608)\n - MemoryLimit: 16.33 GB (17534060544)\n - PeakMemoryUsage: 0\n - ScratchBytesRead: 0\n'\ - ' - ScratchBytesWritten: 0\n - ScratchFileUsedBytes: 0\n - ScratchReads: 0 (0)\n - ScratchWrites: '\ - '0 (0)\n - TotalBufferWaitTime: 0.000ns\n - TotalEncryptionTime: 0.000ns\n - TotalReadBlockTime: 0.000ns\n'\ - ' PLAN_ROOT_SINK:\n - PeakMemoryUsage: 0\n CodeGen:(Total: 48.004ms, non-child: 48.004ms, % non-child: 100.00%)'\ - '\n - CodegenTime: 0.000ns\n - CompileTime: 3.000ms\n - LoadTime: 0.000ns\n - ModuleBitcodeSize: '\ - '1.98 MB (2077616)\n - NumFunctions: 27 (27)\n - NumInstructions: 494 (494)\n - OptimizationTime: 26.002ms'\ - '\n - PeakMemoryUsage: 247.00 KB (252928)\n - PrepareTime: 18.001ms\n EXCHANGE_NODE (id=2):(Total: 452.038ms,'\ - ' non-child: 452.038ms, % non-child: 100.00%)\n - BytesReceived: 923.00 B (923)\n - ConvertRowBatchTime: 0.000ns\n'\ - ' - DeserializeRowBatchTimer: 0.000ns\n - FirstBatchArrivalWaitTime: 452.038ms\n - MergeGetNext: 0.000ns\n'\ - ' - MergeGetNextBatch: 0.000ns\n - PeakMemoryUsage: 0\n - RowsReturned: 28 (28)\n - '\ - 'RowsReturnedRate: 61.00 /sec\n - SendersBlockedTimer: 0.000ns\n - SendersBlockedTotalTimer(*): 0.000ns\n'\ - ' Coordinator Fragment F01:\n Instance d424420e0c44ab9:c637ac2900000000 (host=nightly512-unsecure-2.gce.cloudera.com:'\ - '22000):(Total: 1s952ms, non-child: 1s452ms, % non-child: 74.39%)\n MemoryUsage(500.000ms): 8.09 KB, 12.09 KB, 12.09 KB, '\ - '12.09 KB\n - AverageThreadTokens: 0.00 \n - BloomFilterBytes: 0\n - PeakMemoryUsage: 255.00 KB (261120)\n'\ - ' - PerHostPeakMemUsage: 255.00 KB (261120)\n - RowsProduced: 28 (28)\n - TotalNetworkReceiveTime: 0.000ns'\ - '\n - TotalNetworkSendTime: 0.000ns\n - TotalStorageWaitTime: 0.000ns\n - '\ - 'TotalThreadsInvoluntaryContextSwitches: 1 (1)\n - TotalThreadsTotalWallClockTime: 1s934ms\n - '\ - 'TotalThreadsSysTime: 980.000us\n - TotalThreadsUserTime: 28.421ms\n - TotalThreadsVoluntaryContextSwitches: 3 '\ - '(3)\n Fragment Instance Lifecycle Timings:\n - ExecTime: 1s451ms\n - ExecTreeExecTime: 0.000ns\n'\ - ' - OpenTime: 483.041ms\n - ExecTreeOpenTime: 452.038ms\n - PrepareTime: 18.001ms\n - '\ - 'ExecTreePrepareTime: 0.000ns\n BlockMgr:\n - BlockWritesOutstanding: 0 (0)\n - BlocksCreated: 0 (0)\n'\ - ' - BlocksRecycled: 0 (0)\n - BufferedPins: 0 (0)\n - MaxBlockSize: 8.00 MB (8388608)\n - '\ - 'MemoryLimit: 16.33 GB (17534060544)\n - PeakMemoryUsage: 0\n - ScratchBytesRead: 0\n - '\ - 'ScratchBytesWritten: 0\n - ScratchFileUsedBytes: 0\n - ScratchReads: 0 (0)\n - ScratchWrites: 0 '\ - '(0)\n - TotalBufferWaitTime: 0.000ns\n - TotalEncryptionTime: 0.000ns\n - TotalReadBlockTime: '\ - '0.000ns\n PLAN_ROOT_SINK:\n - PeakMemoryUsage: 0\n CodeGen:(Total: 48.004ms, non-child: 48.004ms, % '\ - 'non-child: 100.00%)\n - CodegenTime: 0.000ns\n - CompileTime: 3.000ms\n - LoadTime: 0.000ns\n '\ - ' - ModuleBitcodeSize: 1.98 MB (2077616)\n - NumFunctions: 27 (27)\n - NumInstructions: 494 (494)\n'\ - ' - OptimizationTime: 26.002ms\n - PeakMemoryUsage: 247.00 KB (252928)\n - PrepareTime: 18.001ms\n'\ - ' EXCHANGE_NODE (id=2):(Total: 452.038ms, non-child: 0.000ns, % non-child: 0.00%)\n ExecOption: Codegen Enabled\n'\ - ' BytesReceived(500.000ms): 0, 923.00 B, 923.00 B, 923.00 B\n - BytesReceived: 923.00 B (923)\n - '\ - 'ConvertRowBatchTime: 0.000ns\n - DeserializeRowBatchTimer: 0.000ns\n - FirstBatchArrivalWaitTime: 452.038ms\n'\ - ' - MergeGetNext: 0.000ns\n - MergeGetNextBatch: 0.000ns\n - PeakMemoryUsage: 0\n - '\ - 'RowsReturned: 28 (28)\n - RowsReturnedRate: 61.00 /sec\n - SendersBlockedTimer: 0.000ns\n - '\ - 'SendersBlockedTotalTimer(*): 0.000ns\n Averaged Fragment F00:(Total: 450.037ms, non-child: 55.004ms, % non-child: 12.22%)\n'\ - ' split sizes: min: 44.98 KB, max: 44.98 KB, avg: 44.98 KB, stddev: 0\n completion times: min:450.038ms max:450.038ms'\ - ' mean: 450.038ms stddev:0.000ns\n execution rates: min:99.94 KB/sec max:99.94 KB/sec mean:99.94 KB/sec stddev:0.68 '\ - 'B/sec\n num instances: 1\n - AverageThreadTokens: 2.00 \n - BloomFilterBytes: 0\n - PeakMemoryUsage: '\ - '937.09 KB (959584)\n - PerHostPeakMemUsage: 937.09 KB (959584)\n - RowsProduced: 28 (28)\n - '\ - 'TotalNetworkReceiveTime: 0.000ns\n - TotalNetworkSendTime: 50.004ms\n - TotalStorageWaitTime: 180.014ms\n - '\ - 'TotalThreadsInvoluntaryContextSwitches: 1 (1)\n - TotalThreadsTotalWallClockTime: 570.046ms\n - '\ - 'TotalThreadsSysTime: 3.300ms\n - TotalThreadsUserTime: 157.428ms\n - TotalThreadsVoluntaryContextSwitches: 9 '\ - '(9)\n Fragment Instance Lifecycle Timings:\n - ExecTime: 51.004ms\n - ExecTreeExecTime: 0.000ns\n '\ - ' - OpenTime: 339.027ms\n - ExecTreeOpenTime: 180.014ms\n - PrepareTime: 60.004ms\n - '\ - 'ExecTreePrepareTime: 35.002ms\n BlockMgr:\n - BlockWritesOutstanding: 0 (0)\n - BlocksCreated: 0 '\ - '(0)\n - BlocksRecycled: 0 (0)\n - BufferedPins: 0 (0)\n - MaxBlockSize: 8.00 MB (8388608)\n - '\ - 'MemoryLimit: 16.33 GB (17534060544)\n - PeakMemoryUsage: 0\n - ScratchBytesRead: 0\n - '\ - 'ScratchBytesWritten: 0\n - ScratchFileUsedBytes: 0\n - ScratchReads: 0 (0)\n - ScratchWrites: 0 (0)\n'\ - ' - TotalBufferWaitTime: 0.000ns\n - TotalEncryptionTime: 0.000ns\n - TotalReadBlockTime: 0.000ns\n'\ - ' DataStreamSender (dst_id=2):\n - BytesSent: 923.00 B (923)\n - NetworkThroughput(*): 0.00 /sec\n'\ - ' - OverallThroughput: 0.00 /sec\n - PeakMemoryUsage: 6.09 KB (6240)\n - RowsReturned: 28 (28)\n '\ - ' - SerializeBatchTime: 0.000ns\n - TransmitDataRPCTime: 0.000ns\n - UncompressedRowBatchSize: 1.30 KB '\ - '(1333)\n CodeGen:(Total: 180.014ms, non-child: 180.014ms, % non-child: 100.00%)\n - CodegenTime: 3.000ms\n '\ - '- CompileTime: 42.003ms\n - LoadTime: 0.000ns\n - ModuleBitcodeSize: 1.98 MB (2077616)\n - NumFunctions: '\ - '94 (94)\n - NumInstructions: 1.85K (1846)\n - OptimizationTime: 116.009ms\n - PeakMemoryUsage: 923.00 KB '\ - '(945152)\n - PrepareTime: 21.001ms\n SORT_NODE (id=1):(Total: 215.017ms, non-child: 0.000ns, % non-child: 0.00%)\n '\ - ' - InsertBatchTime: 0.000ns\n - PeakMemoryUsage: 80.00 KB (81920)\n - RowsReturned: 28 (28)\n - '\ - 'RowsReturnedRate: 130.00 /sec\n HDFS_SCAN_NODE (id=0):(Total: 250.020ms, non-child: 250.020ms, % non-child: 100.00%)\n '\ - ' - AverageHdfsReadThreadConcurrency: 0.00 \n - AverageScannerThreadConcurrency: 1.00 \n - BytesRead: 44.98 '\ - 'KB (46055)\n - BytesReadDataNodeCache: 0\n - BytesReadLocal: 44.98 KB (46055)\n - '\ - 'BytesReadRemoteUnexpected: 0\n - BytesReadShortCircuit: 44.98 KB (46055)\n - DecompressionTime: 0.000ns\n '\ - ' - MaxCompressedTextFileLength: 0\n - NumDisksAccessed: 1 (1)\n - NumScannerThreadsStarted: 1 (1)\n - '\ - 'PeakMemoryUsage: 173.00 KB (177152)\n - PerReadThreadRawHdfsThroughput: 0.00 /sec\n - RemoteScanRanges: 0 (0)\n'\ - ' - RowBatchQueueGetWaitTime: 180.014ms\n - RowBatchQueuePutWaitTime: 0.000ns\n - RowsRead: 823 (823)\n '\ - ' - RowsReturned: 28 (28)\n - RowsReturnedRate: 111.00 /sec\n - ScanRangesComplete: 1 (1)\n - '\ - 'ScannerThreadsInvoluntaryContextSwitches: 0 (0)\n - ScannerThreadsTotalWallClockTime: 180.014ms\n - '\ - 'DelimiterParseTime: 0.000ns\n - MaterializeTupleTime(*): 0.000ns\n - ScannerThreadsSysTime: 324.000us\n'\ - ' - ScannerThreadsUserTime: 0.000ns\n - ScannerThreadsVoluntaryContextSwitches: 4 (4)\n - '\ - 'TotalRawHdfsReadTime(*): 0.000ns\n - TotalReadThroughput: 0.00 /sec\n Fragment F00:\n Instance '\ - 'd424420e0c44ab9:c637ac2900000001 (host=nightly512-unsecure-3.gce.cloudera.com:22000):(Total: 450.037ms, non-child: 55.004ms, '\ - '% non-child: 12.22%)\n Hdfs split stats (:<# splits>/): 0:1/44.98 KB \n '\ - 'MemoryUsage(500.000ms): 130.54 KB\n ThreadUsage(500.000ms): 2\n - AverageThreadTokens: 2.00 \n - '\ - 'BloomFilterBytes: 0\n - PeakMemoryUsage: 937.09 KB (959584)\n - PerHostPeakMemUsage: 937.09 KB (959584)\n '\ - ' - RowsProduced: 28 (28)\n - TotalNetworkReceiveTime: 0.000ns\n - TotalNetworkSendTime: 50.004ms\n '\ - ' - TotalStorageWaitTime: 180.014ms\n - TotalThreadsInvoluntaryContextSwitches: 1 (1)\n - '\ - 'TotalThreadsTotalWallClockTime: 570.046ms\n - TotalThreadsSysTime: 3.300ms\n - TotalThreadsUserTime: '\ - '157.428ms\n - TotalThreadsVoluntaryContextSwitches: 9 (9)\n Fragment Instance Lifecycle Timings:\n '\ - '- ExecTime: 51.004ms\n - ExecTreeExecTime: 0.000ns\n - OpenTime: 339.027ms\n - '\ - 'ExecTreeOpenTime: 180.014ms\n - PrepareTime: 60.004ms\n - ExecTreePrepareTime: 35.002ms\n '\ - 'BlockMgr:\n - BlockWritesOutstanding: 0 (0)\n - BlocksCreated: 0 (0)\n - BlocksRecycled: 0 '\ - '(0)\n - BufferedPins: 0 (0)\n - MaxBlockSize: 8.00 MB (8388608)\n - MemoryLimit: 16.33 GB '\ - '(17534060544)\n - PeakMemoryUsage: 0\n - ScratchBytesRead: 0\n - ScratchBytesWritten: 0\n '\ - ' - ScratchFileUsedBytes: 0\n - ScratchReads: 0 (0)\n - ScratchWrites: 0 (0)\n - '\ - 'TotalBufferWaitTime: 0.000ns\n - TotalEncryptionTime: 0.000ns\n - TotalReadBlockTime: 0.000ns\n '\ - 'DataStreamSender (dst_id=2):\n - BytesSent: 923.00 B (923)\n - NetworkThroughput(*): 0.00 /sec\n '\ - '- OverallThroughput: 0.00 /sec\n - PeakMemoryUsage: 6.09 KB (6240)\n - RowsReturned: 28 (28)\n - '\ - 'SerializeBatchTime: 0.000ns\n - TransmitDataRPCTime: 0.000ns\n - UncompressedRowBatchSize: 1.30 KB (1333)\n'\ - ' CodeGen:(Total: 180.014ms, non-child: 180.014ms, % non-child: 100.00%)\n - CodegenTime: 3.000ms\n - '\ - 'CompileTime: 42.003ms\n - LoadTime: 0.000ns\n - ModuleBitcodeSize: 1.98 MB (2077616)\n - '\ - 'NumFunctions: 94 (94)\n - NumInstructions: 1.85K (1846)\n - OptimizationTime: 116.009ms\n - '\ - 'PeakMemoryUsage: 923.00 KB (945152)\n - PrepareTime: 21.001ms\n SORT_NODE (id=1):(Total: 215.017ms, non-child: '\ - '0.000ns, % non-child: 0.00%)\n ExecOption: Codegen Enabled\n - InsertBatchTime: 0.000ns\n - '\ - 'PeakMemoryUsage: 80.00 KB (81920)\n - RowsReturned: 28 (28)\n - RowsReturnedRate: 130.00 /sec\n '\ - 'HDFS_SCAN_NODE (id=0):(Total: 250.020ms, non-child: 250.020ms, % non-child: 100.00%)\n Hdfs split stats '\ - '(:<# splits>/): 0:1/44.98 KB \n ExecOption: TEXT Codegen Enabled, Codegen enabled: 1 out '\ - 'of 1\n Hdfs Read Thread Concurrency Bucket: 0:100% 1:0% 2:0% 3:0% 4:0% \n File Formats: TEXT/NONE:1 \n '\ - ' BytesRead(500.000ms): 0\n - AverageHdfsReadThreadConcurrency: 0.00 \n - '\ - 'AverageScannerThreadConcurrency: 1.00 \n - BytesRead: 44.98 KB (46055)\n - BytesReadDataNodeCache: 0\n'\ - ' - BytesReadLocal: 44.98 KB (46055)\n - BytesReadRemoteUnexpected: 0\n - BytesReadShortCircuit: '\ - '44.98 KB (46055)\n - DecompressionTime: 0.000ns\n - MaxCompressedTextFileLength: 0\n - '\ - 'NumDisksAccessed: 1 (1)\n - NumScannerThreadsStarted: 1 (1)\n - PeakMemoryUsage: 173.00 KB (177152)\n '\ - ' - PerReadThreadRawHdfsThroughput: 0.00 /sec\n - RemoteScanRanges: 0 (0)\n - '\ - 'RowBatchQueueGetWaitTime: 180.014ms\n - RowBatchQueuePutWaitTime: 0.000ns\n - RowsRead: 823 (823)\n'\ - ' - RowsReturned: 28 (28)\n - RowsReturnedRate: 111.00 /sec\n - ScanRangesComplete: 1 (1)\n '\ - ' - ScannerThreadsInvoluntaryContextSwitches: 0 (0)\n - ScannerThreadsTotalWallClockTime: 180.014ms\n '\ - ' - DelimiterParseTime: 0.000ns\n - MaterializeTupleTime(*): 0.000ns\n - ScannerThreadsSysTime: '\ - '324.000us\n - ScannerThreadsUserTime: 0.000ns\n - ScannerThreadsVoluntaryContextSwitches: 4 (4)\n '\ - ' - TotalRawHdfsReadTime(*): 0.000ns\n - TotalReadThroughput: 0.00 /sec\n', - u'query_id': u'd424420e0c44ab9:c637ac2900000000', - u'__common__': { - u'navbar': [ - {u'link': u'/backends', u'title': u'/backends'}, - {u'link': u'/catalog', u'title': u'/catalog'}, - {u'link': u'/hadoop-varz', u'title': u'/hadoop-varz'}, - {u'link': u'/log_level', u'title': u'/log_level'}, - {u'link': u'/logs', u'title': u'/logs'}, - {u'link': u'/memz', u'title': u'/memz'}, - {u'link': u'/metrics', u'title': u'/metrics'}, - {u'link': u'/queries', u'title': u'/queries'}, - {u'link': u'/rpcz', u'title': u'/rpcz'}, - {u'link': u'/sessions', u'title': u'/sessions'}, - {u'link': u'/threadz', u'title': u'/threadz'}, - {u'link': u'/varz', u'title': u'/varz'} + 'profile': 'Query (id=d424420e0c44ab9:c637ac2900000000):\n Summary:\n Session ID: 3348564c97187569:1c17ce45bdfbf0b2\n ' + 'Session Type: HIVESERVER2\n HiveServer2 Protocol Version: V6\n Start Time: 2017-10-26 11:20:11.971764000\n End Time: ' + '2017-10-26 11:23:11.429110000\n Query Type: QUERY\n Query State: FINISHED\n Query Status: OK\n Impala Version: ' + 'impalad version 2.9.0-cdh5.12.1 RELEASE (build 6dacae08a283a36bb932335ae0c046977e2474e8)\n User: admin\n Connected User: ' + 'admin\n Delegated User: \n Network Address: 10.16.2.226:63745\n Default Db: default\n Sql Statement: SELECT ' + 'sample_07.description, sample_07.salary\r\nFROM\r\n sample_07\r\nWHERE\r\n( sample_07.salary > 100000)\r\nORDER BY ' + 'sample_07.salary DESC\r\nLIMIT 1000\n Coordinator: nightly512-unsecure-2.gce.cloudera.com:22000\n Query Options ' + '(set by configuration): QUERY_TIMEOUT_S=600\n Query Options (set by configuration and planner): QUERY_TIMEOUT_S=600,MT_DOP=0' + '\n Plan: \n----------------\nPer-Host Resource Reservation: Memory=0B\nPer-Host Resource Estimates: Memory=32.00MB\nWARNING: ' + 'The following tables have potentially corrupt table statistics.\nDrop and re-compute statistics to resolve this problem.' + '\ndefault.sample_07\nWARNING: The following tables are missing relevant table and/or column statistics.\ndefault.sample_07\n\n' + 'F01:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1\nPLAN-ROOT SINK\n| mem-estimate=0B mem-reservation=0B\n|\n02:MERGING-' + 'EXCHANGE [UNPARTITIONED]\n| order by: salary DESC\n| limit: 1000\n| mem-estimate=0B mem-reservation=0B\n| tuple-ids=1 ' + 'row-size=19B cardinality=0\n|\nF00:PLAN FRAGMENT [RANDOM] hosts=1 instances=1\n01:TOP-N [LIMIT=1000]\n| order by: salary ' + 'DESC\n| mem-estimate=0B mem-reservation=0B\n| tuple-ids=1 row-size=19B cardinality=0\n|\n00:SCAN HDFS [default.sample_07, ' + 'RANDOM]\n partitions=1/1 files=1 size=44.98KB\n predicates: (sample_07.salary > 100000)\n table stats: 0 rows total\n ' + 'column stats: unavailable\n parquet dictionary predicates: (sample_07.salary > 100000)\n mem-estimate=32.00MB ' + 'mem-reservation=0B\n tuple-ids=0 row-size=19B cardinality=0\n----------------\n Estimated Per-Host Mem: 33554432\n ' + 'Per-Host Memory Reservation: 0\n Tables Missing Stats: default.sample_07\n Tables With Corrupt Table Stats: ' + 'default.sample_07\n Request Pool: root.admin\n Admission result: Admitted immediately\n ExecSummary: \nOperator' + ' #Hosts Avg Time Max Time #Rows Est. #Rows Peak Mem Est. Peak Mem Detail \n-------------------' + '------------------------------------------------------------------------------------------------\n02:MERGING-EXCHANGE 1 ' + ' 0.000ns 0.000ns 28 0 0 0 UNPARTITIONED \n01:TOP-N 1 0.000ns ' + '0.000ns 28 0 80.00 KB 0 \n00:SCAN HDFS 1 250.020ms 250.020ms ' + '28 0 173.00 KB 32.00 MB default.sample_07 \n Errors: \n Planner Timeline: 3s275ms\n - Metadata ' + 'load started: 11.586ms (11.586ms)\n - Metadata load finished: 3s248ms (3s236ms)\n - Analysis finished: 3s254ms ' + '(6.431ms)\n - Equivalence classes computed: 3s255ms (335.173us)\n - Single node plan created: 3s267ms (12.443ms)\n' + ' - Runtime filters computed: 3s267ms (92.906us)\n - Distributed plan created: 3s267ms (223.487us)\n - Lineage ' + 'info computed: 3s268ms (348.540us)\n - Planning finished: 3s275ms (7.378ms)\n Query Timeline: 2m59s\n - Query ' + 'submitted: 0.000ns (0.000ns)\n - Planning finished: 3s278ms (3s278ms)\n - Submit for admission: 3s279ms (1.000ms)' + '\n - Completed admission: 3s279ms (0.000ns)\n - Ready to start on 2 backends: 3s279ms (0.000ns)\n - All 2 ' + 'execution backends (2 fragment instances) started: 3s331ms (52.004ms)\n - Rows available: 3s781ms (450.038ms)\n - ' + 'First row fetched: 5s232ms (1s451ms)\n - Unregister query: 2m59s (2m54s)\n - ComputeScanRangeAssignmentTimer: ' + '0.000ns\n ImpalaServer:\n - ClientFetchWaitTimer: 2m55s\n - RowMaterializationTimer: 0.000ns\n Execution Profile ' + 'd424420e0c44ab9:c637ac2900000000:(Total: 502.042ms, non-child: 0.000ns, % non-child: 0.00%)\n Number of filters: 0\n ' + 'Filter routing table: \n ID Src. Node Tgt. Node(s) Target type Partition filter Pending (Expected) First arrived Completed' + ' Enabled\n-------------------------------------------------------------------------------------------------------------------\n' + '\n Backend startup latencies: Count: 2, min / max: 1ms / 52ms, 25th %-ile: 1ms, 50th %-ile: 1ms, 75th %-ile: 52ms, 90th %-ile:' + ' 52ms, 95th %-ile: 52ms, 99.9th %-ile: 52ms\n Per Node Peak Memory Usage: nightly512-unsecure-2.gce.cloudera.com:22000' + '(255.00 KB) nightly512-unsecure-3.gce.cloudera.com:22000(937.09 KB) \n - FiltersReceived: 0 (0)\n - FinalizationTimer: ' + '0.000ns\n Averaged Fragment F01:(Total: 1s952ms, non-child: 1s452ms, % non-child: 74.39%)\n split sizes: min: 0, max: ' + '0, avg: 0, stddev: 0\n completion times: min:1s952ms max:1s952ms mean: 1s952ms stddev:0.000ns\n execution rates: ' + 'min:0.00 /sec max:0.00 /sec mean:0.00 /sec stddev:0.00 /sec\n num instances: 1\n - AverageThreadTokens: 0.00 \n' + ' - BloomFilterBytes: 0\n - PeakMemoryUsage: 255.00 KB (261120)\n - PerHostPeakMemUsage: 255.00 KB (261120)\n ' + ' - RowsProduced: 28 (28)\n - TotalNetworkReceiveTime: 0.000ns\n - TotalNetworkSendTime: 0.000ns\n - ' + 'TotalStorageWaitTime: 0.000ns\n - TotalThreadsInvoluntaryContextSwitches: 1 (1)\n - TotalThreadsTotalWallClockTime: ' + '1s934ms\n - TotalThreadsSysTime: 980.000us\n - TotalThreadsUserTime: 28.421ms\n - ' + 'TotalThreadsVoluntaryContextSwitches: 3 (3)\n Fragment Instance Lifecycle Timings:\n - ExecTime: 1s451ms\n ' + ' - ExecTreeExecTime: 0.000ns\n - OpenTime: 483.041ms\n - ExecTreeOpenTime: 452.038ms\n - PrepareTime:' + ' 18.001ms\n - ExecTreePrepareTime: 0.000ns\n BlockMgr:\n - BlockWritesOutstanding: 0 (0)\n - ' + 'BlocksCreated: 0 (0)\n - BlocksRecycled: 0 (0)\n - BufferedPins: 0 (0)\n - MaxBlockSize: 8.00 MB ' + '(8388608)\n - MemoryLimit: 16.33 GB (17534060544)\n - PeakMemoryUsage: 0\n - ScratchBytesRead: 0\n' + ' - ScratchBytesWritten: 0\n - ScratchFileUsedBytes: 0\n - ScratchReads: 0 (0)\n - ScratchWrites: ' + '0 (0)\n - TotalBufferWaitTime: 0.000ns\n - TotalEncryptionTime: 0.000ns\n - TotalReadBlockTime: 0.000ns\n' + ' PLAN_ROOT_SINK:\n - PeakMemoryUsage: 0\n CodeGen:(Total: 48.004ms, non-child: 48.004ms, % non-child: 100.00%)' + '\n - CodegenTime: 0.000ns\n - CompileTime: 3.000ms\n - LoadTime: 0.000ns\n - ModuleBitcodeSize: ' + '1.98 MB (2077616)\n - NumFunctions: 27 (27)\n - NumInstructions: 494 (494)\n - OptimizationTime: 26.002ms' + '\n - PeakMemoryUsage: 247.00 KB (252928)\n - PrepareTime: 18.001ms\n EXCHANGE_NODE (id=2):(Total: 452.038ms,' + ' non-child: 452.038ms, % non-child: 100.00%)\n - BytesReceived: 923.00 B (923)\n - ConvertRowBatchTime: 0.000ns\n' + ' - DeserializeRowBatchTimer: 0.000ns\n - FirstBatchArrivalWaitTime: 452.038ms\n - MergeGetNext: 0.000ns\n' + ' - MergeGetNextBatch: 0.000ns\n - PeakMemoryUsage: 0\n - RowsReturned: 28 (28)\n - ' + 'RowsReturnedRate: 61.00 /sec\n - SendersBlockedTimer: 0.000ns\n - SendersBlockedTotalTimer(*): 0.000ns\n' + ' Coordinator Fragment F01:\n Instance d424420e0c44ab9:c637ac2900000000 (host=nightly512-unsecure-2.gce.cloudera.com:' + '22000):(Total: 1s952ms, non-child: 1s452ms, % non-child: 74.39%)\n MemoryUsage(500.000ms): 8.09 KB, 12.09 KB, 12.09 KB, ' + '12.09 KB\n - AverageThreadTokens: 0.00 \n - BloomFilterBytes: 0\n - PeakMemoryUsage: 255.00 KB (261120)\n' + ' - PerHostPeakMemUsage: 255.00 KB (261120)\n - RowsProduced: 28 (28)\n - TotalNetworkReceiveTime: 0.000ns' + '\n - TotalNetworkSendTime: 0.000ns\n - TotalStorageWaitTime: 0.000ns\n - ' + 'TotalThreadsInvoluntaryContextSwitches: 1 (1)\n - TotalThreadsTotalWallClockTime: 1s934ms\n - ' + 'TotalThreadsSysTime: 980.000us\n - TotalThreadsUserTime: 28.421ms\n - TotalThreadsVoluntaryContextSwitches: 3 ' + '(3)\n Fragment Instance Lifecycle Timings:\n - ExecTime: 1s451ms\n - ExecTreeExecTime: 0.000ns\n' + ' - OpenTime: 483.041ms\n - ExecTreeOpenTime: 452.038ms\n - PrepareTime: 18.001ms\n - ' + 'ExecTreePrepareTime: 0.000ns\n BlockMgr:\n - BlockWritesOutstanding: 0 (0)\n - BlocksCreated: 0 (0)\n' + ' - BlocksRecycled: 0 (0)\n - BufferedPins: 0 (0)\n - MaxBlockSize: 8.00 MB (8388608)\n - ' + 'MemoryLimit: 16.33 GB (17534060544)\n - PeakMemoryUsage: 0\n - ScratchBytesRead: 0\n - ' + 'ScratchBytesWritten: 0\n - ScratchFileUsedBytes: 0\n - ScratchReads: 0 (0)\n - ScratchWrites: 0 ' + '(0)\n - TotalBufferWaitTime: 0.000ns\n - TotalEncryptionTime: 0.000ns\n - TotalReadBlockTime: ' + '0.000ns\n PLAN_ROOT_SINK:\n - PeakMemoryUsage: 0\n CodeGen:(Total: 48.004ms, non-child: 48.004ms, % ' + 'non-child: 100.00%)\n - CodegenTime: 0.000ns\n - CompileTime: 3.000ms\n - LoadTime: 0.000ns\n ' + ' - ModuleBitcodeSize: 1.98 MB (2077616)\n - NumFunctions: 27 (27)\n - NumInstructions: 494 (494)\n' + ' - OptimizationTime: 26.002ms\n - PeakMemoryUsage: 247.00 KB (252928)\n - PrepareTime: 18.001ms\n' + ' EXCHANGE_NODE (id=2):(Total: 452.038ms, non-child: 0.000ns, % non-child: 0.00%)\n ExecOption: Codegen Enabled\n' + ' BytesReceived(500.000ms): 0, 923.00 B, 923.00 B, 923.00 B\n - BytesReceived: 923.00 B (923)\n - ' + 'ConvertRowBatchTime: 0.000ns\n - DeserializeRowBatchTimer: 0.000ns\n - FirstBatchArrivalWaitTime: 452.038ms\n' + ' - MergeGetNext: 0.000ns\n - MergeGetNextBatch: 0.000ns\n - PeakMemoryUsage: 0\n - ' + 'RowsReturned: 28 (28)\n - RowsReturnedRate: 61.00 /sec\n - SendersBlockedTimer: 0.000ns\n - ' + 'SendersBlockedTotalTimer(*): 0.000ns\n Averaged Fragment F00:(Total: 450.037ms, non-child: 55.004ms, % non-child: 12.22%)\n' + ' split sizes: min: 44.98 KB, max: 44.98 KB, avg: 44.98 KB, stddev: 0\n completion times: min:450.038ms max:450.038ms' + ' mean: 450.038ms stddev:0.000ns\n execution rates: min:99.94 KB/sec max:99.94 KB/sec mean:99.94 KB/sec stddev:0.68 ' + 'B/sec\n num instances: 1\n - AverageThreadTokens: 2.00 \n - BloomFilterBytes: 0\n - PeakMemoryUsage: ' + '937.09 KB (959584)\n - PerHostPeakMemUsage: 937.09 KB (959584)\n - RowsProduced: 28 (28)\n - ' + 'TotalNetworkReceiveTime: 0.000ns\n - TotalNetworkSendTime: 50.004ms\n - TotalStorageWaitTime: 180.014ms\n - ' + 'TotalThreadsInvoluntaryContextSwitches: 1 (1)\n - TotalThreadsTotalWallClockTime: 570.046ms\n - ' + 'TotalThreadsSysTime: 3.300ms\n - TotalThreadsUserTime: 157.428ms\n - TotalThreadsVoluntaryContextSwitches: 9 ' + '(9)\n Fragment Instance Lifecycle Timings:\n - ExecTime: 51.004ms\n - ExecTreeExecTime: 0.000ns\n ' + ' - OpenTime: 339.027ms\n - ExecTreeOpenTime: 180.014ms\n - PrepareTime: 60.004ms\n - ' + 'ExecTreePrepareTime: 35.002ms\n BlockMgr:\n - BlockWritesOutstanding: 0 (0)\n - BlocksCreated: 0 ' + '(0)\n - BlocksRecycled: 0 (0)\n - BufferedPins: 0 (0)\n - MaxBlockSize: 8.00 MB (8388608)\n - ' + 'MemoryLimit: 16.33 GB (17534060544)\n - PeakMemoryUsage: 0\n - ScratchBytesRead: 0\n - ' + 'ScratchBytesWritten: 0\n - ScratchFileUsedBytes: 0\n - ScratchReads: 0 (0)\n - ScratchWrites: 0 (0)\n' + ' - TotalBufferWaitTime: 0.000ns\n - TotalEncryptionTime: 0.000ns\n - TotalReadBlockTime: 0.000ns\n' + ' DataStreamSender (dst_id=2):\n - BytesSent: 923.00 B (923)\n - NetworkThroughput(*): 0.00 /sec\n' + ' - OverallThroughput: 0.00 /sec\n - PeakMemoryUsage: 6.09 KB (6240)\n - RowsReturned: 28 (28)\n ' + ' - SerializeBatchTime: 0.000ns\n - TransmitDataRPCTime: 0.000ns\n - UncompressedRowBatchSize: 1.30 KB ' + '(1333)\n CodeGen:(Total: 180.014ms, non-child: 180.014ms, % non-child: 100.00%)\n - CodegenTime: 3.000ms\n ' + '- CompileTime: 42.003ms\n - LoadTime: 0.000ns\n - ModuleBitcodeSize: 1.98 MB (2077616)\n - NumFunctions: ' + '94 (94)\n - NumInstructions: 1.85K (1846)\n - OptimizationTime: 116.009ms\n - PeakMemoryUsage: 923.00 KB ' + '(945152)\n - PrepareTime: 21.001ms\n SORT_NODE (id=1):(Total: 215.017ms, non-child: 0.000ns, % non-child: 0.00%)\n ' + ' - InsertBatchTime: 0.000ns\n - PeakMemoryUsage: 80.00 KB (81920)\n - RowsReturned: 28 (28)\n - ' + 'RowsReturnedRate: 130.00 /sec\n HDFS_SCAN_NODE (id=0):(Total: 250.020ms, non-child: 250.020ms, % non-child: 100.00%)\n ' + ' - AverageHdfsReadThreadConcurrency: 0.00 \n - AverageScannerThreadConcurrency: 1.00 \n - BytesRead: 44.98 ' + 'KB (46055)\n - BytesReadDataNodeCache: 0\n - BytesReadLocal: 44.98 KB (46055)\n - ' + 'BytesReadRemoteUnexpected: 0\n - BytesReadShortCircuit: 44.98 KB (46055)\n - DecompressionTime: 0.000ns\n ' + ' - MaxCompressedTextFileLength: 0\n - NumDisksAccessed: 1 (1)\n - NumScannerThreadsStarted: 1 (1)\n - ' + 'PeakMemoryUsage: 173.00 KB (177152)\n - PerReadThreadRawHdfsThroughput: 0.00 /sec\n - RemoteScanRanges: 0 (0)\n' + ' - RowBatchQueueGetWaitTime: 180.014ms\n - RowBatchQueuePutWaitTime: 0.000ns\n - RowsRead: 823 (823)\n ' + ' - RowsReturned: 28 (28)\n - RowsReturnedRate: 111.00 /sec\n - ScanRangesComplete: 1 (1)\n - ' + 'ScannerThreadsInvoluntaryContextSwitches: 0 (0)\n - ScannerThreadsTotalWallClockTime: 180.014ms\n - ' + 'DelimiterParseTime: 0.000ns\n - MaterializeTupleTime(*): 0.000ns\n - ScannerThreadsSysTime: 324.000us\n' + ' - ScannerThreadsUserTime: 0.000ns\n - ScannerThreadsVoluntaryContextSwitches: 4 (4)\n - ' + 'TotalRawHdfsReadTime(*): 0.000ns\n - TotalReadThroughput: 0.00 /sec\n Fragment F00:\n Instance ' + 'd424420e0c44ab9:c637ac2900000001 (host=nightly512-unsecure-3.gce.cloudera.com:22000):(Total: 450.037ms, non-child: 55.004ms, ' + '% non-child: 12.22%)\n Hdfs split stats (:<# splits>/): 0:1/44.98 KB \n ' + 'MemoryUsage(500.000ms): 130.54 KB\n ThreadUsage(500.000ms): 2\n - AverageThreadTokens: 2.00 \n - ' + 'BloomFilterBytes: 0\n - PeakMemoryUsage: 937.09 KB (959584)\n - PerHostPeakMemUsage: 937.09 KB (959584)\n ' + ' - RowsProduced: 28 (28)\n - TotalNetworkReceiveTime: 0.000ns\n - TotalNetworkSendTime: 50.004ms\n ' + ' - TotalStorageWaitTime: 180.014ms\n - TotalThreadsInvoluntaryContextSwitches: 1 (1)\n - ' + 'TotalThreadsTotalWallClockTime: 570.046ms\n - TotalThreadsSysTime: 3.300ms\n - TotalThreadsUserTime: ' + '157.428ms\n - TotalThreadsVoluntaryContextSwitches: 9 (9)\n Fragment Instance Lifecycle Timings:\n ' + '- ExecTime: 51.004ms\n - ExecTreeExecTime: 0.000ns\n - OpenTime: 339.027ms\n - ' + 'ExecTreeOpenTime: 180.014ms\n - PrepareTime: 60.004ms\n - ExecTreePrepareTime: 35.002ms\n ' + 'BlockMgr:\n - BlockWritesOutstanding: 0 (0)\n - BlocksCreated: 0 (0)\n - BlocksRecycled: 0 ' + '(0)\n - BufferedPins: 0 (0)\n - MaxBlockSize: 8.00 MB (8388608)\n - MemoryLimit: 16.33 GB ' + '(17534060544)\n - PeakMemoryUsage: 0\n - ScratchBytesRead: 0\n - ScratchBytesWritten: 0\n ' + ' - ScratchFileUsedBytes: 0\n - ScratchReads: 0 (0)\n - ScratchWrites: 0 (0)\n - ' + 'TotalBufferWaitTime: 0.000ns\n - TotalEncryptionTime: 0.000ns\n - TotalReadBlockTime: 0.000ns\n ' + 'DataStreamSender (dst_id=2):\n - BytesSent: 923.00 B (923)\n - NetworkThroughput(*): 0.00 /sec\n ' + '- OverallThroughput: 0.00 /sec\n - PeakMemoryUsage: 6.09 KB (6240)\n - RowsReturned: 28 (28)\n - ' + 'SerializeBatchTime: 0.000ns\n - TransmitDataRPCTime: 0.000ns\n - UncompressedRowBatchSize: 1.30 KB (1333)\n' + ' CodeGen:(Total: 180.014ms, non-child: 180.014ms, % non-child: 100.00%)\n - CodegenTime: 3.000ms\n - ' + 'CompileTime: 42.003ms\n - LoadTime: 0.000ns\n - ModuleBitcodeSize: 1.98 MB (2077616)\n - ' + 'NumFunctions: 94 (94)\n - NumInstructions: 1.85K (1846)\n - OptimizationTime: 116.009ms\n - ' + 'PeakMemoryUsage: 923.00 KB (945152)\n - PrepareTime: 21.001ms\n SORT_NODE (id=1):(Total: 215.017ms, non-child: ' + '0.000ns, % non-child: 0.00%)\n ExecOption: Codegen Enabled\n - InsertBatchTime: 0.000ns\n - ' + 'PeakMemoryUsage: 80.00 KB (81920)\n - RowsReturned: 28 (28)\n - RowsReturnedRate: 130.00 /sec\n ' + 'HDFS_SCAN_NODE (id=0):(Total: 250.020ms, non-child: 250.020ms, % non-child: 100.00%)\n Hdfs split stats ' + '(:<# splits>/): 0:1/44.98 KB \n ExecOption: TEXT Codegen Enabled, Codegen enabled: 1 out ' + 'of 1\n Hdfs Read Thread Concurrency Bucket: 0:100% 1:0% 2:0% 3:0% 4:0% \n File Formats: TEXT/NONE:1 \n ' + ' BytesRead(500.000ms): 0\n - AverageHdfsReadThreadConcurrency: 0.00 \n - ' + 'AverageScannerThreadConcurrency: 1.00 \n - BytesRead: 44.98 KB (46055)\n - BytesReadDataNodeCache: 0\n' + ' - BytesReadLocal: 44.98 KB (46055)\n - BytesReadRemoteUnexpected: 0\n - BytesReadShortCircuit: ' + '44.98 KB (46055)\n - DecompressionTime: 0.000ns\n - MaxCompressedTextFileLength: 0\n - ' + 'NumDisksAccessed: 1 (1)\n - NumScannerThreadsStarted: 1 (1)\n - PeakMemoryUsage: 173.00 KB (177152)\n ' + ' - PerReadThreadRawHdfsThroughput: 0.00 /sec\n - RemoteScanRanges: 0 (0)\n - ' + 'RowBatchQueueGetWaitTime: 180.014ms\n - RowBatchQueuePutWaitTime: 0.000ns\n - RowsRead: 823 (823)\n' + ' - RowsReturned: 28 (28)\n - RowsReturnedRate: 111.00 /sec\n - ScanRangesComplete: 1 (1)\n ' + ' - ScannerThreadsInvoluntaryContextSwitches: 0 (0)\n - ScannerThreadsTotalWallClockTime: 180.014ms\n ' + ' - DelimiterParseTime: 0.000ns\n - MaterializeTupleTime(*): 0.000ns\n - ScannerThreadsSysTime: ' + '324.000us\n - ScannerThreadsUserTime: 0.000ns\n - ScannerThreadsVoluntaryContextSwitches: 4 (4)\n ' + ' - TotalRawHdfsReadTime(*): 0.000ns\n - TotalReadThroughput: 0.00 /sec\n', + 'query_id': 'd424420e0c44ab9:c637ac2900000000', + '__common__': { + 'navbar': [ + {'link': '/backends', 'title': '/backends'}, + {'link': '/catalog', 'title': '/catalog'}, + {'link': '/hadoop-varz', 'title': '/hadoop-varz'}, + {'link': '/log_level', 'title': '/log_level'}, + {'link': '/logs', 'title': '/logs'}, + {'link': '/memz', 'title': '/memz'}, + {'link': '/metrics', 'title': '/metrics'}, + {'link': '/queries', 'title': '/queries'}, + {'link': '/rpcz', 'title': '/rpcz'}, + {'link': '/sessions', 'title': '/sessions'}, + {'link': '/threadz', 'title': '/threadz'}, + {'link': '/varz', 'title': '/varz'}, ], - u'process-name': u'impalad' - } - } + 'process-name': 'impalad', + }, + }, } MEMORY = { '4d497267f34ff17d:817bdfb500000000': { - u'query_id': u'1a48b5796f8f07f5:49ba9e6b00000000', - u'__common__': { - u'navbar': [ - {u'link': u'/backends', u'title': u'/backends'}, - {u'link': u'/catalog', u'title': u'/catalog'}, - {u'link': u'/hadoop-varz', u'title': u'/hadoop-varz'}, - {u'link': u'/log_level', u'title': u'/log_level'}, - {u'link': u'/logs', u'title': u'/logs'}, - {u'link': u'/memz', u'title': u'/memz'}, - {u'link': u'/metrics', u'title': u'/metrics'}, - {u'link': u'/queries', u'title': u'/queries'}, - {u'link': u'/rpcz', u'title': u'/rpcz'}, - {u'link': u'/sessions', u'title': u'/sessions'}, - {u'link': u'/threadz', u'title': u'/threadz'}, - {u'link': u'/varz', u'title': u'/varz'} + 'query_id': '1a48b5796f8f07f5:49ba9e6b00000000', + '__common__': { + 'navbar': [ + {'link': '/backends', 'title': '/backends'}, + {'link': '/catalog', 'title': '/catalog'}, + {'link': '/hadoop-varz', 'title': '/hadoop-varz'}, + {'link': '/log_level', 'title': '/log_level'}, + {'link': '/logs', 'title': '/logs'}, + {'link': '/memz', 'title': '/memz'}, + {'link': '/metrics', 'title': '/metrics'}, + {'link': '/queries', 'title': '/queries'}, + {'link': '/rpcz', 'title': '/rpcz'}, + {'link': '/sessions', 'title': '/sessions'}, + {'link': '/threadz', 'title': '/threadz'}, + {'link': '/varz', 'title': '/varz'}, ], - u'process-name': u'impalad' + 'process-name': 'impalad', }, - u'mem_usage': u'The query is finished, current memory consumption is not available.' + 'mem_usage': 'The query is finished, current memory consumption is not available.', }, '8a46a8865624698f:b80b211500000000': { - u'query_id': u'd424420e0c44ab9:c637ac2900000000', - u'__common__': { - u'navbar': [ - {u'link': u'/backends', u'title': u'/backends'}, - {u'link': u'/catalog', u'title': u'/catalog'}, - {u'link': u'/hadoop-varz', u'title': u'/hadoop-varz'}, - {u'link': u'/log_level', u'title': u'/log_level'}, - {u'link': u'/logs', u'title': u'/logs'}, - {u'link': u'/memz', u'title': u'/memz'}, - {u'link': u'/metrics', u'title': u'/metrics'}, - {u'link': u'/queries', u'title': u'/queries'}, - {u'link': u'/rpcz', u'title': u'/rpcz'}, - {u'link': u'/sessions', u'title': u'/sessions'}, - {u'link': u'/threadz', u'title': u'/threadz'}, - {u'link': u'/varz', u'title': u'/varz'} + 'query_id': 'd424420e0c44ab9:c637ac2900000000', + '__common__': { + 'navbar': [ + {'link': '/backends', 'title': '/backends'}, + {'link': '/catalog', 'title': '/catalog'}, + {'link': '/hadoop-varz', 'title': '/hadoop-varz'}, + {'link': '/log_level', 'title': '/log_level'}, + {'link': '/logs', 'title': '/logs'}, + {'link': '/memz', 'title': '/memz'}, + {'link': '/metrics', 'title': '/metrics'}, + {'link': '/queries', 'title': '/queries'}, + {'link': '/rpcz', 'title': '/rpcz'}, + {'link': '/sessions', 'title': '/sessions'}, + {'link': '/threadz', 'title': '/threadz'}, + {'link': '/varz', 'title': '/varz'}, ], - u'process-name': u'impalad' + 'process-name': 'impalad', }, - u'mem_usage': u'The query is finished, current memory consumption is not available.' - } + 'mem_usage': 'The query is finished, current memory consumption is not available.', + }, } + def __init__(self, url): self.url = url @@ -1459,12 +1532,12 @@ def get_queries(self, **kwargs): return { 'completed_queries': [ MockImpalaQueryApi.APPS['4d497267f34ff17d:817bdfb500000000'], - MockImpalaQueryApi.APPS['8a46a8865624698f:b80b211500000000'] + MockImpalaQueryApi.APPS['8a46a8865624698f:b80b211500000000'], ], 'in_flight_queries': [], 'num_in_flight_queries': 0, 'num_executing_queries': 0, - 'num_waiting_queries': 0 + 'num_waiting_queries': 0, } def get_query(self, query_id): @@ -1485,21 +1558,33 @@ class MockMapreduce2Api(object): MockMapreduceApi and HistoryServerApi are very similar and inherit from it. """ - def __init__(self, mr_url=None): pass + def __init__(self, mr_url=None): + pass def tasks(self, job_id): return { - u'tasks': { - u'task': [ + 'tasks': { + 'task': [ { - u'finishTime': 1357153330271, u'successfulAttempt': u'attempt_1356251510842_0062_m_000000_0', u'elapsedTime': 1901, - u'state': u'SUCCEEDED', u'startTime': 1357153328370, u'progress': 100.0, u'type': u'MAP', - u'id': u'task_1356251510842_0062_m_000000' + 'finishTime': 1357153330271, + 'successfulAttempt': 'attempt_1356251510842_0062_m_000000_0', + 'elapsedTime': 1901, + 'state': 'SUCCEEDED', + 'startTime': 1357153328370, + 'progress': 100.0, + 'type': 'MAP', + 'id': 'task_1356251510842_0062_m_000000', }, { - u'finishTime': 0, u'successfulAttempt': u'', u'elapsedTime': 0, u'state': u'SCHEDULED', u'startTime': 1357153326322, - u'progress': 0.0, u'type': u'REDUCE', u'id': u'task_1356251510842_0062_r_000000' - } + 'finishTime': 0, + 'successfulAttempt': '', + 'elapsedTime': 0, + 'state': 'SCHEDULED', + 'startTime': 1357153326322, + 'progress': 0.0, + 'type': 'REDUCE', + 'id': 'task_1356251510842_0062_r_000000', + }, ] } } @@ -1509,90 +1594,78 @@ def conf(self, job_id): "conf": { "path": "hdfs://host.domain.com:9000/user/user1/.staging/job_1326232085508_0004/job.xml", "property": [ - { - "name": "dfs.datanode.data.dir", - "value": "/home/hadoop/hdfs/data", - }, { - "name": "mapreduce.job.acl-modify-job", - "value": "test", - }, { - "name": "mapreduce.job.acl-view-job", - "value": "test,test2", - } - ] + { + "name": "dfs.datanode.data.dir", + "value": "/home/hadoop/hdfs/data", + }, + { + "name": "mapreduce.job.acl-modify-job", + "value": "test", + }, + { + "name": "mapreduce.job.acl-view-job", + "value": "test,test2", + }, + ], } } def job_attempts(self, job_id): return { - "jobAttempts": { - "jobAttempt": [ - { - "nodeId": "host.domain.com:8041", - "nodeHttpAddress": "host.domain.com:8042", - "startTime": 1326238773493, - "id": 1, - "logsLink": "http://host.domain.com:8042/node/containerlogs/container_1326232085508_0004_01_000001", - "containerId": "container_1326232085508_0004_01_000001" - } - ] - } + "jobAttempts": { + "jobAttempt": [ + { + "nodeId": "host.domain.com:8041", + "nodeHttpAddress": "host.domain.com:8042", + "startTime": 1326238773493, + "id": 1, + "logsLink": "http://host.domain.com:8042/node/containerlogs/container_1326232085508_0004_01_000001", + "containerId": "container_1326232085508_0004_01_000001", + } + ] + } } def task_attempts(self, job_id, task_id): return { - "taskAttempts": { - "taskAttempt": [ - { - "elapsedMergeTime": 47, - "shuffleFinishTime": 1326238780052, - "assignedContainerId": "container_1326232085508_0004_01_000003", - "progress": 100, - "elapsedTime": 0, - "state": "RUNNING", - "elapsedShuffleTime": 2592, - "mergeFinishTime": 1326238780099, - "rack": "/98.139.92.0", - "elapsedReduceTime": 0, - "nodeHttpAddress": "host.domain.com:8042", - "type": "REDUCE", - "startTime": 1326238777460, - "id": "attempt_1326232085508_4_4_r_0_0", - "finishTime": 0 - } - ] - } + "taskAttempts": { + "taskAttempt": [ + { + "elapsedMergeTime": 47, + "shuffleFinishTime": 1326238780052, + "assignedContainerId": "container_1326232085508_0004_01_000003", + "progress": 100, + "elapsedTime": 0, + "state": "RUNNING", + "elapsedShuffleTime": 2592, + "mergeFinishTime": 1326238780099, + "rack": "/98.139.92.0", + "elapsedReduceTime": 0, + "nodeHttpAddress": "host.domain.com:8042", + "type": "REDUCE", + "startTime": 1326238777460, + "id": "attempt_1326232085508_4_4_r_0_0", + "finishTime": 0, + } + ] + } } def counters(self, job_id): return { - "jobCounters": { - "id": "job_1326232085508_4_4", - "counterGroup": [ - { - "counterGroupName": "org.apache.hadoop.mapreduce.lib.input.FileInputFormatCounter", - "counter": [ - { - "reduceCounterValue": 0, - "mapCounterValue": 0, - "totalCounterValue": 0, - "name": "BYTES_READ" - } - ] - }, - { - "counterGroupName": "org.apache.hadoop.mapreduce.lib.output.FileOutputFormatCounter", - "counter": [ - { - "reduceCounterValue": 0, - "mapCounterValue": 0, - "totalCounterValue": 0, - "name": "BYTES_WRITTEN" - } - ] - } - ] - } + "jobCounters": { + "id": "job_1326232085508_4_4", + "counterGroup": [ + { + "counterGroupName": "org.apache.hadoop.mapreduce.lib.input.FileInputFormatCounter", + "counter": [{"reduceCounterValue": 0, "mapCounterValue": 0, "totalCounterValue": 0, "name": "BYTES_READ"}], + }, + { + "counterGroupName": "org.apache.hadoop.mapreduce.lib.output.FileOutputFormatCounter", + "counter": [{"reduceCounterValue": 0, "mapCounterValue": 0, "totalCounterValue": 0, "name": "BYTES_WRITTEN"}], + }, + ], + } } def kill(self, job_id): @@ -1605,110 +1678,141 @@ class MockMapreduceApi(MockMapreduce2Api): def job(self, user, job_id): if '1356251510842_0009' not in job_id: job = { - u'job': { - u'reducesCompleted': 0, u'mapsRunning': 1, u'id': u'job_1356251510842_0054', u'successfulReduceAttempts': 0, - u'successfulMapAttempts': 0, u'uberized': False, u'reducesTotal': 1, u'elapsedTime': 3426, u'mapsPending': 0, - u'state': u'RUNNING', u'failedReduceAttempts': 0, u'mapsCompleted': 0, u'killedMapAttempts': 0, u'killedReduceAttempts': 0, - u'runningReduceAttempts': 0, u'failedMapAttempts': 0, u'mapsTotal': 1, u'user': u'test', u'startTime': 1357152972886, - u'reducesPending': 1, u'reduceProgress': 0.0, u'finishTime': 0, u'name': u'select avg(salary) from sample_07(Stage-1)', - u'reducesRunning': 0, u'newMapAttempts': 0, u'diagnostics': u'', u'mapProgress': 0.0, - u'runningMapAttempts': 1, u'newReduceAttempts': 1, - # Does not seems to exist in API, we actually skip it in case. - "acls": [{ - "value": "test", - "name": "mapreduce.job.acl-modify-job" - }, { - "value": "test", - "name": "mapreduce.job.acl-view-job" - } - ], - } + 'job': { + 'reducesCompleted': 0, + 'mapsRunning': 1, + 'id': 'job_1356251510842_0054', + 'successfulReduceAttempts': 0, + 'successfulMapAttempts': 0, + 'uberized': False, + 'reducesTotal': 1, + 'elapsedTime': 3426, + 'mapsPending': 0, + 'state': 'RUNNING', + 'failedReduceAttempts': 0, + 'mapsCompleted': 0, + 'killedMapAttempts': 0, + 'killedReduceAttempts': 0, + 'runningReduceAttempts': 0, + 'failedMapAttempts': 0, + 'mapsTotal': 1, + 'user': 'test', + 'startTime': 1357152972886, + 'reducesPending': 1, + 'reduceProgress': 0.0, + 'finishTime': 0, + 'name': 'select avg(salary) from sample_07(Stage-1)', + 'reducesRunning': 0, + 'newMapAttempts': 0, + 'diagnostics': '', + 'mapProgress': 0.0, + 'runningMapAttempts': 1, + 'newReduceAttempts': 1, + # Does not seems to exist in API, we actually skip it in case. + "acls": [{"value": "test", "name": "mapreduce.job.acl-modify-job"}, {"value": "test", "name": "mapreduce.job.acl-view-job"}], + } } job['job']['id'] = job_id return job + class MockSparkHistoryApi(SparkHistoryServerApi): def __init__(self): - self.APPS = [{ + self.APPS = [ + { "id": "application_1513618343677_0018", "name": "Sleep15minPySpark", - "attempts": [{ - "attemptId": "1", - "startTime": "2017-12-20T20:25:19.672GMT", - "endTime": "2017-12-20T20:40:43.768GMT", - "sparkUser": "test", - "completed": True - }] - }, { + "attempts": [ + { + "attemptId": "1", + "startTime": "2017-12-20T20:25:19.672GMT", + "endTime": "2017-12-20T20:40:43.768GMT", + "sparkUser": "test", + "completed": True, + } + ], + }, + { "id": "application_1513618343677_0020", "name": "Sleep15minPySpark", - "attempts": [{ - "attemptId": "2", - "startTime": "2017-12-24T03:19:29.993GMT", - "endTime": "1969-12-31T23:59:59.999GMT", - "sparkUser": "test", - "completed": False - }, { - "attemptId": "1", - "startTime": "2017-12-24T03:12:50.763GMT", - "endTime": "2017-12-24T03:19:22.178GMT", - "sparkUser": "test", - "completed": True - }] - }] + "attempts": [ + { + "attemptId": "2", + "startTime": "2017-12-24T03:19:29.993GMT", + "endTime": "1969-12-31T23:59:59.999GMT", + "sparkUser": "test", + "completed": False, + }, + { + "attemptId": "1", + "startTime": "2017-12-24T03:12:50.763GMT", + "endTime": "2017-12-24T03:19:22.178GMT", + "sparkUser": "test", + "completed": True, + }, + ], + }, + ] def applications(self): return self.APPS def executors(self, job): EXECUTORS_LISTS = { - u'application_1513618343677_0018/1': [{ - u'diskUsed': 0, - u'totalShuffleWrite': 0, - u'totalCores': 0, - u'executorLogs': { - u'stderr': u'http://localhost:8042/node/containerlogs/container_1513618343677_0018_01_000001/test/stderr?start=-4096', - u'stdout': u'http://localhost:8042/node/containerlogs/container_1513618343677_0018_01_000001/test/stdout?start=-4096' - }, - u'totalInputBytes': 0, - u'rddBlocks': 0, - u'maxMemory': 515553361, - u'totalShuffleRead': 0, - u'totalTasks': 0, - u'activeTasks': 0, - u'failedTasks': 0, - u'completedTasks': 0, - u'hostPort': u'172.31.122.54:43234', - u'maxTasks': 0, u'totalGCTime': 0, - u'isBlacklisted': False, - u'memoryUsed': 0, - u'id': u'driver', - u'isActive': True, - u'totalDuration': 0 - }], - u'application_1513618343677_0020/2': [{ - u'diskUsed': 0, - u'totalShuffleWrite': 0, - u'totalCores': 0, - u'executorLogs': { - u'stderr': u'http://localhost:8042/node/containerlogs/container_1513618343677_0020_01_000001/test/stderr?start=-4096', - u'stdout': u'http://localhost:8042/node/containerlogs/container_1513618343677_0020_01_000001/test/stdout?start=-4096'}, - u'totalInputBytes': 0, - u'rddBlocks': 0, - u'maxMemory': 515553361, - u'totalShuffleRead': 0, - u'totalTasks': 0, - u'activeTasks': 0, - u'failedTasks': 0, - u'completedTasks': 0, - u'hostPort': u'172.31.122.65:38210', - u'maxTasks': 0, - u'totalGCTime': 0, - u'isBlacklisted': False, - u'memoryUsed': 0, - u'id': u'driver', - u'isActive': True, - u'totalDuration': 0}] + 'application_1513618343677_0018/1': [ + { + 'diskUsed': 0, + 'totalShuffleWrite': 0, + 'totalCores': 0, + 'executorLogs': { + 'stderr': 'http://localhost:8042/node/containerlogs/container_1513618343677_0018_01_000001/test/stderr?start=-4096', + 'stdout': 'http://localhost:8042/node/containerlogs/container_1513618343677_0018_01_000001/test/stdout?start=-4096', + }, + 'totalInputBytes': 0, + 'rddBlocks': 0, + 'maxMemory': 515553361, + 'totalShuffleRead': 0, + 'totalTasks': 0, + 'activeTasks': 0, + 'failedTasks': 0, + 'completedTasks': 0, + 'hostPort': '172.31.122.54:43234', + 'maxTasks': 0, + 'totalGCTime': 0, + 'isBlacklisted': False, + 'memoryUsed': 0, + 'id': 'driver', + 'isActive': True, + 'totalDuration': 0, + } + ], + 'application_1513618343677_0020/2': [ + { + 'diskUsed': 0, + 'totalShuffleWrite': 0, + 'totalCores': 0, + 'executorLogs': { + 'stderr': 'http://localhost:8042/node/containerlogs/container_1513618343677_0020_01_000001/test/stderr?start=-4096', + 'stdout': 'http://localhost:8042/node/containerlogs/container_1513618343677_0020_01_000001/test/stdout?start=-4096', + }, + 'totalInputBytes': 0, + 'rddBlocks': 0, + 'maxMemory': 515553361, + 'totalShuffleRead': 0, + 'totalTasks': 0, + 'activeTasks': 0, + 'failedTasks': 0, + 'completedTasks': 0, + 'hostPort': '172.31.122.65:38210', + 'maxTasks': 0, + 'totalGCTime': 0, + 'isBlacklisted': False, + 'memoryUsed': 0, + 'id': 'driver', + 'isActive': True, + 'totalDuration': 0, + } + ], } app_id = self.get_real_app_id(job) if not app_id: @@ -1725,89 +1829,128 @@ def download_executor_logs(self, user, executor, name, offset): def get_executors_loglinks(self, job): return None -class HistoryServerApi(MockMapreduce2Api): - def __init__(self, hs_url=None): pass +class HistoryServerApi(MockMapreduce2Api): + def __init__(self, hs_url=None): + pass def job(self, user, job_id): if '1356251510842_0054' == job_id: return { - u'job': { - u'reducesCompleted': 1, u'avgMapTime': 1798, u'avgMergeTime': 1479, u'id': job_id, - u'successfulReduceAttempts': 1, u'successfulMapAttempts': 2, u'uberized': False, u'reducesTotal': 1, - u'state': u'KILLED', u'failedReduceAttempts': 0, u'mapsCompleted': 2, - u'killedMapAttempts': 0, u'diagnostics': u'', u'mapsTotal': 2, u'user': u'test', - u'startTime': 1357151916268, u'avgReduceTime': 137, u'finishTime': 1357151923925, - u'name': u'oozie:action:T=map-reduce:W=MapReduce-copy:A=Sleep:ID=0000004-121223003201296-oozie-oozi-W', - u'avgShuffleTime': 1421, u'queue': u'default', u'killedReduceAttempts': 0, u'failedMapAttempts': 0 - } + 'job': { + 'reducesCompleted': 1, + 'avgMapTime': 1798, + 'avgMergeTime': 1479, + 'id': job_id, + 'successfulReduceAttempts': 1, + 'successfulMapAttempts': 2, + 'uberized': False, + 'reducesTotal': 1, + 'state': 'KILLED', + 'failedReduceAttempts': 0, + 'mapsCompleted': 2, + 'killedMapAttempts': 0, + 'diagnostics': '', + 'mapsTotal': 2, + 'user': 'test', + 'startTime': 1357151916268, + 'avgReduceTime': 137, + 'finishTime': 1357151923925, + 'name': 'oozie:action:T=map-reduce:W=MapReduce-copy:A=Sleep:ID=0000004-121223003201296-oozie-oozi-W', + 'avgShuffleTime': 1421, + 'queue': 'default', + 'killedReduceAttempts': 0, + 'failedMapAttempts': 0, + } } else: return { - u'job': { - u'reducesCompleted': 1, u'avgMapTime': 1798, u'avgMergeTime': 1479, u'id': u'job_1356251510842_0009', - u'successfulReduceAttempts': 1, u'successfulMapAttempts': 2, u'uberized': False, u'reducesTotal': 1, - u'state': u'SUCCEEDED', u'failedReduceAttempts': 0, u'mapsCompleted': 2, - u'killedMapAttempts': 0, u'diagnostics': u'', u'mapsTotal': 2, u'user': u'test', - u'startTime': 0, u'avgReduceTime': 137, u'finishTime': 1357151923925, - u'name': u'oozie:action:T=map-reduce:W=MapReduce-copy:A=Sleep:ID=0000004-121223003201296-oozie-oozi-W', - u'avgShuffleTime': 1421, u'queue': u'default', u'killedReduceAttempts': 0, u'failedMapAttempts': 0 - } + 'job': { + 'reducesCompleted': 1, + 'avgMapTime': 1798, + 'avgMergeTime': 1479, + 'id': 'job_1356251510842_0009', + 'successfulReduceAttempts': 1, + 'successfulMapAttempts': 2, + 'uberized': False, + 'reducesTotal': 1, + 'state': 'SUCCEEDED', + 'failedReduceAttempts': 0, + 'mapsCompleted': 2, + 'killedMapAttempts': 0, + 'diagnostics': '', + 'mapsTotal': 2, + 'user': 'test', + 'startTime': 0, + 'avgReduceTime': 137, + 'finishTime': 1357151923925, + 'name': 'oozie:action:T=map-reduce:W=MapReduce-copy:A=Sleep:ID=0000004-121223003201296-oozie-oozi-W', + 'avgShuffleTime': 1421, + 'queue': 'default', + 'killedReduceAttempts': 0, + 'failedMapAttempts': 0, + } } def test_make_log_links(): """ - Unit test for models.LinkJobLogs._make_links + Unit test for models.LinkJobLogs._make_links """ # FileBrowser assert ( - """hdfs://localhost:8020/user/romain/tmp <dir>""" == - LinkJobLogs._make_links('hdfs://localhost:8020/user/romain/tmp ')) - assert ( - """hdfs://localhost:8020/user/romain/tmp<dir>""" == - LinkJobLogs._make_links('hdfs://localhost:8020/user/romain/tmp')) + """hdfs://localhost:8020/user/romain/tmp <dir>""" + == LinkJobLogs._make_links('hdfs://localhost:8020/user/romain/tmp ') + ) + assert """hdfs://localhost:8020/user/romain/tmp<dir>""" == LinkJobLogs._make_links( + 'hdfs://localhost:8020/user/romain/tmp' + ) + assert """output: /user/romain/tmp <dir>""" == LinkJobLogs._make_links( + 'output: /user/romain/tmp ' + ) assert ( - """output: /user/romain/tmp <dir>""" == - LinkJobLogs._make_links('output: /user/romain/tmp ')) + 'Successfully read 3760 records (112648 bytes) from: "' + '/user/hue/pig/examples/data/midsummer.txt"' + ) == LinkJobLogs._make_links('Successfully read 3760 records (112648 bytes) from: "/user/hue/pig/examples/data/midsummer.txt"') assert ( - ('Successfully read 3760 records (112648 bytes) from: "' - '/user/hue/pig/examples/data/midsummer.txt"') == - LinkJobLogs._make_links('Successfully read 3760 records (112648 bytes) from: "/user/hue/pig/examples/data/midsummer.txt"')) + 'data,upper_case MAP_ONLY hdfs://localhost:8020/user/romain/out/fffff,' + == LinkJobLogs._make_links('data,upper_case MAP_ONLY hdfs://localhost:8020/user/romain/out/fffff,') + ) assert ( - 'data,upper_case MAP_ONLY hdfs://localhost:8020/user/romain/out/fffff,' == - LinkJobLogs._make_links('data,upper_case MAP_ONLY hdfs://localhost:8020/user/romain/out/fffff,')) + 'MAP_ONLY hdfs://localhost:8020/user/romain/out/fffff\n2013' + == LinkJobLogs._make_links('MAP_ONLY hdfs://localhost:8020/user/romain/out/fffff\n2013') + ) + assert ' /jobs.tsv ' == LinkJobLogs._make_links(' /jobs.tsv ') assert ( - 'MAP_ONLY hdfs://localhost:8020/user/romain/out/fffff\n2013' == - LinkJobLogs._make_links('MAP_ONLY hdfs://localhost:8020/user/romain/out/fffff\n2013')) - assert ( - ' /jobs.tsv ' == - LinkJobLogs._make_links(' /jobs.tsv ')) - assert ( - 'hdfs://localhost:8020/user/romain/job_pos_2012.tsv' == - LinkJobLogs._make_links('hdfs://localhost:8020/user/romain/job_pos_2012.tsv')) + 'hdfs://localhost:8020/user/romain/job_pos_2012.tsv' + == LinkJobLogs._make_links('hdfs://localhost:8020/user/romain/job_pos_2012.tsv') + ) # JobBrowser + assert """job_201306261521_0058""" == LinkJobLogs._make_links( + 'job_201306261521_0058' + ) assert ( - """job_201306261521_0058""" == - LinkJobLogs._make_links('job_201306261521_0058')) - assert ( - """Hadoop Job IDs executed by Pig: job_201306261521_0058""" == - LinkJobLogs._make_links('Hadoop Job IDs executed by Pig: job_201306261521_0058')) - assert ( - """MapReduceLauncher - HadoopJobId: job_201306261521_0058""" == - LinkJobLogs._make_links('MapReduceLauncher - HadoopJobId: job_201306261521_0058')) + """Hadoop Job IDs executed by Pig: job_201306261521_0058""" + == LinkJobLogs._make_links('Hadoop Job IDs executed by Pig: job_201306261521_0058') + ) assert ( - ('- More information at: http://localhost:50030/jobdetails.jsp?jobid=' - 'job_201306261521_0058') == - LinkJobLogs._make_links('- More information at: http://localhost:50030/jobdetails.jsp?jobid=job_201306261521_0058')) + """MapReduceLauncher - HadoopJobId: job_201306261521_0058""" + == LinkJobLogs._make_links('MapReduceLauncher - HadoopJobId: job_201306261521_0058') + ) assert ( - ' Logging error messages to: job_201307091553_0028/attempt_201307091553_002' == - LinkJobLogs._make_links(' Logging error messages to: job_201307091553_0028/attempt_201307091553_002')) + '- More information at: http://localhost:50030/jobdetails.jsp?jobid=' + 'job_201306261521_0058' + ) == LinkJobLogs._make_links('- More information at: http://localhost:50030/jobdetails.jsp?jobid=job_201306261521_0058') assert ( - """ pig-job_201307091553_0028.log""" == - LinkJobLogs._make_links(' pig-job_201307091553_0028.log')) + ' Logging error messages to: job_201307091553_0028/attempt_201307091553_002' + == LinkJobLogs._make_links(' Logging error messages to: job_201307091553_0028/attempt_201307091553_002') + ) + assert """ pig-job_201307091553_0028.log""" == LinkJobLogs._make_links( + ' pig-job_201307091553_0028.log' + ) assert ( - 'MapReduceLauncher - HadoopJobId: job_201306261521_0058. Look at the UI' == - LinkJobLogs._make_links('MapReduceLauncher - HadoopJobId: job_201306261521_0058. Look at the UI')) + 'MapReduceLauncher - HadoopJobId: job_201306261521_0058. Look at the UI' + == LinkJobLogs._make_links('MapReduceLauncher - HadoopJobId: job_201306261521_0058. Look at the UI') + ) diff --git a/apps/jobbrowser/src/jobbrowser/urls.py b/apps/jobbrowser/src/jobbrowser/urls.py index 5b348e4d2bd..21d030cbd86 100644 --- a/apps/jobbrowser/src/jobbrowser/urls.py +++ b/apps/jobbrowser/src/jobbrowser/urls.py @@ -17,13 +17,9 @@ import sys -from jobbrowser import views as jobbrowser_views -from jobbrowser import api2 as jobbrowser_api2 +from django.urls import re_path -if sys.version_info[0] > 2: - from django.urls import re_path -else: - from django.conf.urls import url as re_path +from jobbrowser import api2 as jobbrowser_api2, views as jobbrowser_views urlpatterns = [ # "Default" diff --git a/apps/jobbrowser/src/jobbrowser/views.py b/apps/jobbrowser/src/jobbrowser/views.py index 02cb70718ca..1a7e2de2d98 100644 --- a/apps/jobbrowser/src/jobbrowser/views.py +++ b/apps/jobbrowser/src/jobbrowser/views.py @@ -15,29 +15,27 @@ # See the License for the specific language governing permissions and # limitations under the License. -from past.builtins import cmp -from future import standard_library -standard_library.install_aliases() -from builtins import filter -from builtins import str -import functools -import logging import re -import string import sys import time -import urllib.request, urllib.error, urllib.parse +import string +import logging +import functools +import urllib.error import urllib.parse - -from lxml import html +import urllib.request +from builtins import filter, str from urllib.parse import quote_plus from django.http import HttpResponseRedirect -from django.utils.functional import wraps from django.urls import reverse +from django.utils.functional import wraps +from django.utils.translation import gettext as _ +from lxml import html +from past.builtins import cmp from desktop.auth.backend import is_admin -from desktop.lib.django_util import JsonResponse, render_json, render, copy_query_dict +from desktop.lib.django_util import JsonResponse, copy_query_dict, render, render_json from desktop.lib.exceptions import MessageException from desktop.lib.exceptions_renderable import PopupException from desktop.lib.json_utils import JSONEncoderForHTML @@ -45,30 +43,23 @@ from desktop.lib.rest.resource import Resource from desktop.log.access import access_log_level from desktop.views import register_status_bar_view - from hadoop import cluster -from hadoop.yarn.clients import get_log_client from hadoop.yarn import resource_manager_api as resource_manager_api - -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ +from hadoop.yarn.clients import get_log_client +from jobbrowser.api import ApplicationNotRunning, JobExpired, get_api +from jobbrowser.conf import LOG_OFFSET, SHARE_JOBS +from jobbrowser.models import LinkJobLogs, can_kill_job, can_view_job +from jobbrowser.yarn_models import Application LOG = logging.getLogger() try: from beeswax.hive_site import hiveserver2_impersonation_enabled -except: +except Exception: LOG.warning('Hive is not enabled') def hiveserver2_impersonation_enabled(): return True -from jobbrowser.conf import LOG_OFFSET, SHARE_JOBS -from jobbrowser.api import get_api, ApplicationNotRunning, JobExpired -from jobbrowser.models import can_view_job, can_kill_job, LinkJobLogs -from jobbrowser.yarn_models import Application - LOG_OFFSET_BYTES = LOG_OFFSET.get() @@ -259,6 +250,7 @@ def single_spark_job(request, job): 'job': job }) + @check_job_permission def single_job(request, job): def cmp_exec_time(task1, task2): @@ -270,12 +262,8 @@ def cmp_exec_time(task1, task2): failed_tasks = job.filter_tasks(task_states=('failed',)) recent_tasks = job.filter_tasks(task_states=('running', 'succeeded',)) - if sys.version_info[0] > 2: - failed_tasks.sort(key=lambda task: task.execStartTimeMs) - recent_tasks.sort(key=lambda task: task.execStartTimeMs, reverse=True) - else: - failed_tasks.sort(cmp_exec_time) - recent_tasks.sort(cmp_exec_time, reverse=True) + failed_tasks.sort(key=lambda task: task.execStartTimeMs) + recent_tasks.sort(key=lambda task: task.execStartTimeMs, reverse=True) if request.GET.get('format') == 'json': json_failed_tasks = [massage_task_for_json(task) for task in failed_tasks] @@ -341,6 +329,7 @@ def kill_job(request, job): raise Exception(_("Job did not appear as killed within 15 seconds.")) + @check_job_permission def job_executor_logs(request, job, attempt_index=0, name='syslog', offset=LOG_OFFSET_BYTES): response = {'status': -1} @@ -421,7 +410,7 @@ def job_attempt_logs_json(request, job, attempt_index=0, name='syslog', offset=L debug_info += '\nHTML Response: %s' % response response['debug'] = debug_info LOG.error(debug_info) - except: + except Exception: LOG.exception('failed to create debug info') return JsonResponse(response) @@ -441,23 +430,19 @@ def cmp_exec_time(task1, task2): task = None failed_tasks = job.filter_tasks(task_states=('failed',)) - if sys.version_info[0] > 2: - failed_tasks.sort(key=functools.cmp_to_key(cmp_exec_time)) - else: - failed_tasks.sort(cmp_exec_time) + failed_tasks.sort(key=functools.cmp_to_key(cmp_exec_time)) + if failed_tasks: task = failed_tasks[0] - if not task.taskAttemptIds and len(failed_tasks) > 1: # In some cases the last task ends up without any attempt + if not task.taskAttemptIds and len(failed_tasks) > 1: # In some cases the last task ends up without any attempt task = failed_tasks[1] else: task_states = ['running', 'succeeded'] if job.is_mr2: task_states.append('scheduled') recent_tasks = job.filter_tasks(task_states=task_states, task_types=('map', 'reduce',)) - if sys.version_info[0] > 2: - recent_tasks.sort(key=functools.cmp_to_key(cmp_exec_time), reverse=True) - else: - recent_tasks.sort(cmp_exec_time, reverse=True) + recent_tasks.sort(key=functools.cmp_to_key(cmp_exec_time), reverse=True) + if recent_tasks: task = recent_tasks[0] @@ -528,6 +513,7 @@ def single_task(request, job, taskid): 'joblnk': job_link }) + @check_job_permission def single_task_attempt(request, job, taskid, attemptid): jt = get_api(request.user, request.jt) @@ -547,6 +533,7 @@ def single_task_attempt(request, job, taskid, attemptid): "task": task }) + @check_job_permission def single_task_attempt_logs(request, job, taskid, attemptid, offset=LOG_OFFSET_BYTES): jt = get_api(request.user, request.jt) @@ -603,6 +590,7 @@ def single_task_attempt_logs(request, job, taskid, attemptid, offset=LOG_OFFSET_ else: return render("attempt_logs.mako", request, context) + @check_job_permission def task_attempt_counters(request, job, taskid, attemptid): """ @@ -617,6 +605,7 @@ def task_attempt_counters(request, job, taskid, attemptid): counters = attempt.counters return render("counters.html", request, {'counters': counters}) + @access_log_level(logging.WARN) def kill_task_attempt(request, attemptid): """ @@ -626,6 +615,7 @@ def kill_task_attempt(request, attemptid): ret = request.jt.kill_task_attempt(request.jt.thriftattemptid_from_string(attemptid)) return render_json({}) + def trackers(request): """ We get here from /trackers @@ -634,6 +624,7 @@ def trackers(request): return render("tasktrackers.mako", request, {'trackers': trackers}) + def single_tracker(request, trackerid): jt = get_api(request.user, request.jt) @@ -643,6 +634,7 @@ def single_tracker(request, trackerid): raise PopupException(_('The tracker could not be contacted.'), detail=e) return render("tasktracker.mako", request, {'tracker': tracker}) + def container(request, node_manager_http_address, containerid): jt = get_api(request.user, request.jt) @@ -660,12 +652,14 @@ def clusterstatus(request): """ return render("clusterstatus.html", request, Cluster(request.jt)) + def queues(request): """ We get here from /queues """ return render("queues.html", request, {"queuelist": request.jt.queues()}) + @check_job_permission def set_job_priority(request, job): """ @@ -676,8 +670,10 @@ def set_job_priority(request, job): request.jt.set_job_priority(jid, ThriftJobPriority._NAMES_TO_VALUES[priority]) return render_json({}) + CONF_VARIABLE_REGEX = r"\$\{(.+)\}" + def make_substitutions(conf): """ Substitute occurences of ${foo} with conf[foo], recursively, in all the values @@ -687,6 +683,7 @@ def make_substitutions(conf): this code does not have. """ r = re.compile(CONF_VARIABLE_REGEX) + def sub(s, depth=0): # Malformed / malicious confs could make this loop infinitely if depth > 100: @@ -696,7 +693,7 @@ def sub(s, depth=0): if m: for g in [g for g in m.groups() if g in conf]: substr = "${%s}" % g - s = s.replace(substr, sub(conf[g], depth+1)) + s = s.replace(substr, sub(conf[g], depth + 1)) return s for k, v in list(conf.items()): @@ -704,7 +701,8 @@ def sub(s, depth=0): return conf ################################## -## Helper functions +# Helper functions + def get_shorter_id(hadoop_job_id): return "_".join(hadoop_job_id.split("_")[-2:]) @@ -746,7 +744,7 @@ def get_state_link(request, option=None, val='', VALID_OPTIONS=("state", "user", return "&".join(["%s=%s" % (key, quote_plus(value)) for key, value in states.items()]) -## All Unused below +# All Unused below # DEAD? def dock_jobs(request): @@ -755,6 +753,8 @@ def dock_jobs(request): return render("jobs_dock_info.mako", request, { 'jobs': matching_jobs }, force_template=True) + + register_status_bar_view(dock_jobs) @@ -802,7 +802,7 @@ def check_job_state(state): return lambda job: job.status == state status = request.jt.cluster_status() - alljobs = [] #get_matching_jobs(request) + alljobs = [] # get_matching_jobs(request) runningjobs = list(filter(check_job_state('RUNNING'), alljobs)) completedjobs = list(filter(check_job_state('COMPLETED'), alljobs)) failedjobs = list(filter(check_job_state('FAILED'), alljobs)) diff --git a/apps/jobbrowser/src/jobbrowser/yarn_models.py b/apps/jobbrowser/src/jobbrowser/yarn_models.py index 2487930be10..b23e5c1aa65 100644 --- a/apps/jobbrowser/src/jobbrowser/yarn_models.py +++ b/apps/jobbrowser/src/jobbrowser/yarn_models.py @@ -15,36 +15,26 @@ # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import division -from future import standard_library -standard_library.install_aliases() -from builtins import str -from builtins import object -import logging -import math import os import re import sys +import math import time +import logging import urllib.parse +from builtins import object, str +from django.utils.translation import gettext as _ from lxml import html from desktop.lib.exceptions_renderable import PopupException from desktop.lib.rest.http_client import HttpClient from desktop.lib.rest.resource import Resource from desktop.lib.view_util import big_filesizeformat, format_duration_in_millis - from hadoop import cluster from hadoop.yarn.clients import get_log_client - from jobbrowser.models import format_unixtime_ms -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - LOG = logging.getLogger() @@ -257,7 +247,6 @@ def __init__(self, api, attrs): else: self.progress = self.reduces_percent_complete - def _fixup(self): jobid = self.id @@ -343,6 +332,7 @@ def job_attempts(self): self._job_attempts = self.api.job_attempts(self.id)['jobAttempts'] return self._job_attempts + class YarnV2Job(Job): def __init__(self, api, attrs): self.api = api @@ -418,6 +408,7 @@ def job_attempts(self): return self._job_attempts + # There's are tasks for Oozie workflow so we create a dummy one. class YarnTask(object): def __init__(self, job): @@ -427,6 +418,7 @@ def get_attempt(self, attempt_id): json = self.job.api.appattempts_attempt(self.job.id, attempt_id) return YarnV2Attempt(self, json) + class KilledJob(Job): def __init__(self, api, attrs): @@ -629,7 +621,8 @@ def get_task_log(self, offset=0): for name in ('stdout', 'stderr', 'syslog'): link = '/%s/' % name - if self.type == 'Oozie Launcher' and not self.task.job.status == 'FINISHED': # Yarn currently dumps with 500 error with doas in running state + # Yarn currently dumps with 500 error with doas in running state + if self.type == 'Oozie Launcher' and not self.task.job.status == 'FINISHED': params = {} else: params = { @@ -654,13 +647,14 @@ def get_task_log(self, offset=0): if response: debug_info += '\nHTML Response: %s' % response LOG.error(debug_info) - except: + except Exception: LOG.exception('failed to build debug info') logs.append(log) return logs + [''] * (3 - len(logs)) + class YarnV2Attempt(Attempt): def __init__(self, task, attrs): self.task = task @@ -691,6 +685,7 @@ def _fixup(self): setattr(self, 'status', 'RUNNING' if self.finishedTime == 0 else 'SUCCEEDED') setattr(self, 'properties', {}) + class Container(object): def __init__(self, attrs): diff --git a/apps/jobsub/src/jobsub/conf.py b/apps/jobsub/src/jobsub/conf.py index 4af7f476c54..95a1ade4eb2 100644 --- a/apps/jobsub/src/jobsub/conf.py +++ b/apps/jobsub/src/jobsub/conf.py @@ -16,16 +16,11 @@ # limitations under the License. import os.path -import sys -from desktop.lib.conf import Config -from desktop.lib import paths - -if sys.version_info[0] > 2: - from django.utils.translation import gettext_lazy as _ -else: - from django.utils.translation import ugettext_lazy as _ +from django.utils.translation import gettext_lazy as _ +from desktop.lib import paths +from desktop.lib.conf import Config LOCAL_DATA_DIR = Config( key="local_data_dir", diff --git a/apps/jobsub/src/jobsub/forms.py b/apps/jobsub/src/jobsub/forms.py index f3bb9f3c680..5e44a1392ad 100644 --- a/apps/jobsub/src/jobsub/forms.py +++ b/apps/jobsub/src/jobsub/forms.py @@ -15,24 +15,21 @@ # See the License for the specific language governing permissions and # limitations under the License. -from builtins import object -import logging import sys +import logging +from builtins import object from django import forms +from django.utils.translation import gettext as _ from desktop.lib.django_forms import MultiForm from jobsub import models -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - LOG = logging.getLogger() # This aligns with what Oozie accepts as a workflow name -_OOZIE_WORKFLOW_NAME_REGEX = '^([a-zA-Z_]([\-_a-zA-Z0-9])*){1,39}$' +_OOZIE_WORKFLOW_NAME_REGEX = r'^([a-zA-Z_]([\-_a-zA-Z0-9])*){1,39}$' + class WorkflowDesignForm(forms.ModelForm): """Used for specifying a design""" @@ -112,6 +109,7 @@ def design_form_by_type(action_type): cls = _ACTION_TYPE_TO_FORM_CLS[action_type] return MultiForm(wf=WorkflowDesignForm, action=cls) + def design_form_by_instance(design_obj, data=None): action_obj = design_obj.get_root_action() cls = _ACTION_TYPE_TO_FORM_CLS[action_obj.action_type] diff --git a/apps/jobsub/src/jobsub/models.py b/apps/jobsub/src/jobsub/models.py index 315d10e0ed0..d5b1d1201b7 100644 --- a/apps/jobsub/src/jobsub/models.py +++ b/apps/jobsub/src/jobsub/models.py @@ -15,21 +15,17 @@ # See the License for the specific language governing permissions and # limitations under the License. -from builtins import str -import logging import sys +import logging +from builtins import str from django.db import models from django.urls import reverse +from django.utils.translation import gettext_lazy as _ -from desktop.lib.parameterization import find_parameters, bind_parameters +from desktop.lib.parameterization import bind_parameters, find_parameters from useradmin.models import User -if sys.version_info[0] > 2: - from django.utils.translation import gettext_lazy as _ -else: - from django.utils.translation import ugettext_lazy as _ - LOG = logging.getLogger() @@ -63,7 +59,7 @@ def submit_url(self): return reverse("jobsub.views.submit_design", kwargs=dict(id=self.id)) def clone(self): - clone_kwargs = dict([(field.name, getattr(self, field.name)) for field in self._meta.fields if field.name != 'id']); + clone_kwargs = dict([(field.name, getattr(self, field.name)) for field in self._meta.fields if field.name != 'id']) return self.__class__.objects.create(**clone_kwargs) def to_jsonable(self): @@ -75,6 +71,7 @@ def to_jsonable(self): 'data': repr(self.data) } + class CheckForSetup(models.Model): """ A model which should have at most one row, indicating @@ -86,7 +83,7 @@ class CheckForSetup(models.Model): setup_level = models.IntegerField(default=0) -################################## New Models ################################ +# New Models ################################ PATH_MAX = 512 diff --git a/apps/jobsub/src/jobsub/old_migrations/0005_unify_with_oozie.py b/apps/jobsub/src/jobsub/old_migrations/0005_unify_with_oozie.py index 74cac217217..125653df5f8 100644 --- a/apps/jobsub/src/jobsub/old_migrations/0005_unify_with_oozie.py +++ b/apps/jobsub/src/jobsub/old_migrations/0005_unify_with_oozie.py @@ -9,10 +9,7 @@ from oozie.importlib.jobdesigner import convert_jobsub_design from oozie.models import Workflow, Kill, Start, End -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ +from django.utils.translation import gettext as _ class Migration(DataMigration): diff --git a/apps/jobsub/src/jobsub/old_migrations/0006_chg_varchars_to_textfields.py b/apps/jobsub/src/jobsub/old_migrations/0006_chg_varchars_to_textfields.py index 2830c6beb77..50af48541da 100644 --- a/apps/jobsub/src/jobsub/old_migrations/0006_chg_varchars_to_textfields.py +++ b/apps/jobsub/src/jobsub/old_migrations/0006_chg_varchars_to_textfields.py @@ -6,10 +6,7 @@ from south.v2 import SchemaMigration from django.db import models -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ +from django.utils.translation import gettext as _ class Migration(SchemaMigration): diff --git a/apps/jobsub/src/jobsub/urls.py b/apps/jobsub/src/jobsub/urls.py index bbeaca7f838..2a381c50cd0 100644 --- a/apps/jobsub/src/jobsub/urls.py +++ b/apps/jobsub/src/jobsub/urls.py @@ -17,12 +17,9 @@ import sys -from jobsub import views as jobsub_views +from django.urls import re_path -if sys.version_info[0] > 2: - from django.urls import re_path -else: - from django.conf.urls import url as re_path +from jobsub import views as jobsub_views urlpatterns = [ # The base view is the "list" view, which we alias as / diff --git a/apps/jobsub/src/jobsub/views.py b/apps/jobsub/src/jobsub/views.py index 24f13cf2e8d..4b5ceaadb8a 100644 --- a/apps/jobsub/src/jobsub/views.py +++ b/apps/jobsub/src/jobsub/views.py @@ -25,30 +25,24 @@ is a "job submission". Submissions can be "watched". """ -from builtins import str -import logging import sys import time as py_time +import logging +from builtins import str + +from django.utils.translation import gettext as _ from desktop import appmanager +from desktop.auth.backend import is_admin from desktop.lib.django_util import render, render_json from desktop.lib.exceptions import StructuredException from desktop.lib.exceptions_renderable import PopupException from desktop.lib.view_util import is_ajax from desktop.log.access import access_warn from desktop.models import Document - -from oozie.models import Workflow from oozie.forms import design_form_by_type -from oozie.utils import model_to_dict, format_dict_field_values,\ - sanitize_node_dict - -from desktop.auth.backend import is_admin - -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ +from oozie.models import Workflow +from oozie.utils import format_dict_field_values, model_to_dict, sanitize_node_dict LOG = logging.getLogger() MAX_DESIGNS = 250 @@ -115,9 +109,11 @@ def list_designs(request): 'apps': appmanager.get_apps_dict() }) + def not_available(request): return render("not_available.mako", request, {}) + def _get_design(user, design_id): """Raise PopupException if design doesn't exist""" try: @@ -206,7 +202,7 @@ def save_design(request, design_id): data = format_dict_field_values(request.POST.copy()) _save_design(request.user, design_id, data) - return get_design(request, design_id); + return get_design(request, design_id) def _save_design(user, design_id, data): diff --git a/apps/metastore/src/metastore/forms.py b/apps/metastore/src/metastore/forms.py index 384945e004e..2f8196b6853 100644 --- a/apps/metastore/src/metastore/forms.py +++ b/apps/metastore/src/metastore/forms.py @@ -15,22 +15,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys - from django import forms +from django.utils.translation import gettext as _, gettext_lazy as _t -from desktop.lib.django_forms import simple_formset_factory, DependencyAwareForm -from desktop.lib.django_forms import ChoiceOrOtherField, MultiForm, SubmitButton from filebrowser.forms import PathField -from beeswax import common -from beeswax.models import SavedQuery - -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _, gettext_lazy as _t -else: - from django.utils.translation import ugettext as _, ugettext_lazy as _t - class DbForm(forms.Form): """For 'show tables'""" @@ -63,6 +52,6 @@ def __init__(self, table_obj, *args, **kwargs): # We give these numeric names because column names # may be unpleasantly arbitrary. name = "partition_%d" % i - char_field = forms.CharField(required=True, label=_t("%(column_name)s (partition key with type %(column_type)s)") % {'column_name': column.name, 'column_type': column.type}) + char_field = forms.CharField(required=True, label=_t("%(column_name)s (partition key with type %(column_type)s)") % {'column_name': column.name, 'column_type': column.type}) # noqa: E501 self.fields[name] = char_field self.partition_columns[name] = column.name diff --git a/apps/metastore/src/metastore/templates/metastore.mako b/apps/metastore/src/metastore/templates/metastore.mako index 8f33971101d..9f9d0fc50bf 100644 --- a/apps/metastore/src/metastore/templates/metastore.mako +++ b/apps/metastore/src/metastore/templates/metastore.mako @@ -23,7 +23,6 @@ else: from desktop import conf from desktop.conf import USE_NEW_EDITOR -from desktop.lib.i18n import smart_unicode from desktop.views import commonheader, commonfooter, _ko from desktop.webpack_utils import get_hue_bundles from metastore.conf import SHOW_TABLE_ERD diff --git a/apps/metastore/src/metastore/tests.py b/apps/metastore/src/metastore/tests.py index a70fb658c10..c52888e1258 100644 --- a/apps/metastore/src/metastore/tests.py +++ b/apps/metastore/src/metastore/tests.py @@ -16,37 +16,36 @@ # See the License for the specific language governing permissions and # limitations under the License. -from future import standard_library - -standard_library.install_aliases() -from builtins import object +import sys import json import logging -import pytest -import sys -import urllib.request, urllib.parse, urllib.error +import urllib.error +import urllib.parse +import urllib.request +from builtins import object +from unittest.mock import Mock, patch -from django.utils.encoding import smart_str +import pytest from django.urls import reverse - -from desktop.lib.django_test_util import make_logged_in_client, assert_equal_mod_whitespace -from desktop.lib.test_utils import add_permission, grant_access -from hadoop.pseudo_hdfs4 import is_live_cluster -from metastore import parser -from useradmin.models import HuePermission, GroupPermission, User, Group +from django.utils.encoding import smart_str from beeswax.conf import LIST_PARTITIONS_LIMIT -from beeswax.views import collapse_whitespace -from beeswax.test_base import make_query, wait_for_query_to_finish, verify_history, get_query_server_config, \ - fetch_query_result_data from beeswax.models import QueryHistory from beeswax.server import dbms -from beeswax.test_base import BeeswaxSampleProvider - -if sys.version_info[0] > 2: - from unittest.mock import patch, Mock -else: - from mock import patch, Mock +from beeswax.test_base import ( + BeeswaxSampleProvider, + fetch_query_result_data, + get_query_server_config, + make_query, + verify_history, + wait_for_query_to_finish, +) +from beeswax.views import collapse_whitespace +from desktop.lib.django_test_util import assert_equal_mod_whitespace, make_logged_in_client +from desktop.lib.test_utils import add_permission, grant_access +from hadoop.pseudo_hdfs4 import is_live_cluster +from metastore import parser +from useradmin.models import Group, GroupPermission, HuePermission, User LOG = logging.getLogger() @@ -59,21 +58,22 @@ def _make_query(client, query, submission_type="Execute", res = make_query(client, query, submission_type, udfs, settings, resources, wait, name, desc, local, is_parameterized, max, database, email_notify, **kwargs) - + # Should be in the history if it's submitted. if submission_type == 'Execute': fragment = collapse_whitespace(smart_str(query[:20])) verify_history(client, fragment=fragment) - + return res + @pytest.mark.django_db class TestApi(): def setup_method(self): self.client = make_logged_in_client(username="test", groupname="default", recreate=True, is_superuser=False) - + self.user = User.objects.get(username="test") - + def test_show_tables(self): grant_access("test", "default", "metastore") with patch('beeswax.server.dbms.get') as get: @@ -89,11 +89,11 @@ def test_show_tables(self): ), server_name='hive' ) - + response = self.client.post('/metastore/tables/sfdc?format=json') - + get.assert_called() - + assert response.status_code == 200 data = json.loads(response.content) assert data['status'] == 0 @@ -125,52 +125,53 @@ def test_show_tables_hs2(self): ) response = self.client.post('/metastore/tables/sfdc?format=json') get.assert_called() - + assert response.status_code == 200 data = json.loads(response.content) assert data['status'] == 0 assert data['table_names'] == ['customer', 'opportunities'] assert data['tables'] == [{'name': 'customer'}, {'name': 'opportunities'}] + @pytest.mark.django_db @pytest.mark.integration @pytest.mark.requires_hadoop class TestMetastoreWithHadoop(BeeswaxSampleProvider): - + def setup_method(self): user = User.objects.get(username='test') self.db = dbms.get(user, get_query_server_config()) - + add_permission("test", "test", "write", "metastore") - + def test_basic_flow(self): # Default database should exist response = self.client.get("/metastore/databases") assert self.db_name in response.context[0]["databases"] - + # Table should have been created response = self.client.get("/metastore/tables/") assert 200 == response.status_code - + # Switch databases response = self.client.get("/metastore/tables/%s?format=json" % self.db_name) data = json.loads(response.content) assert 'name' in data["tables"][0] assert "test" in data["table_names"] - + # Should default to "default" database response = self.client.get("/metastore/tables/not_there") assert 200 == response.status_code - + # And have detail response = self.client.post("/metastore/table/%s/test/?format=json" % self.db_name, {'format': 'json'}) data = json.loads(response.content) assert "foo" in [col['name'] for col in data['cols']] assert "SerDe Library:" in [prop['col_name'] for prop in data['properties']], data - + # Remember the number of history items. Use a generic fragment 'test' to pass verification. history_cnt = verify_history(self.client, fragment='test') - + # Show table data. response = self.client.get("/metastore/table/%s/test/read" % self.db_name, follow=True) response = self.client.get( @@ -181,7 +182,7 @@ def test_basic_flow(self): assert len(results['results']) > 0 # This should NOT go into the query history. assert verify_history(self.client, fragment='test') == history_cnt, 'Implicit queries should not be saved in the history' - + def test_show_tables(self): hql = """ CREATE TABLE test_show_tables_1 (a int) COMMENT 'Test for show_tables'; @@ -190,7 +191,7 @@ def test_show_tables(self): """ resp = _make_query(self.client, hql, database=self.db_name) resp = wait_for_query_to_finish(self.client, resp, max=30.0) - + # Table should have been created response = self.client.get("/metastore/tables/%s?filter=show_tables&format=json" % self.db_name) assert 200 == response.status_code @@ -199,14 +200,14 @@ def test_show_tables(self): assert 'name' in data["tables"][0] assert 'comment' in data["tables"][0] assert 'type' in data["tables"][0] - + hql = """ CREATE TABLE test_show_tables_4 (a int) COMMENT 'Test for show_tables'; CREATE TABLE test_show_tables_5 (a int) COMMENT 'Test for show_tables'; """ resp = _make_query(self.client, hql, database=self.db_name) resp = wait_for_query_to_finish(self.client, resp, max=30.0) - + # Table should have been created response = self.client.get("/metastore/tables/%s?filter=show_tables&format=json" % self.db_name) assert 200 == response.status_code @@ -215,30 +216,30 @@ def test_show_tables(self): assert 'name' in data["tables"][0] assert 'comment' in data["tables"][0] assert 'type' in data["tables"][0] - + hql = """ CREATE INDEX test_index ON TABLE test_show_tables_1 (a) AS 'COMPACT' WITH DEFERRED REBUILD; """ resp = _make_query(self.client, hql, wait=True, local=False, max=30.0, database=self.db_name) - + # By default, index table should not appear in show tables view response = self.client.get("/metastore/tables/%s?format=json" % self.db_name) assert 200 == response.status_code data = json.loads(response.content) - assert not 'test_index' in data['tables'] - + assert 'test_index' not in data['tables'] + def test_describe_view(self): resp = self.client.post('/metastore/table/%s/myview' % self.db_name, data={'format': 'json'}) assert 200 == resp.status_code, resp.content data = json.loads(resp.content) assert data['is_view'] assert "myview" == data['name'] - + def test_describe_partitions(self): response = self.client.post("/metastore/table/%s/test_partitions" % self.db_name, data={'format': 'json'}) data = json.loads(response.content) assert 2 == len(data['partition_keys']), data - + response = self.client.post("/metastore/table/%s/test_partitions/partitions" % self.db_name, data={'format': 'json'}, follow=True) data = json.loads(response.content) @@ -247,11 +248,11 @@ def test_describe_partitions(self): assert '12345' in partition_columns, partition_columns assert "baz_foo" in partition_columns assert '67890' in partition_columns - + # Not partitioned response = self.client.get("/metastore/table/%s/test/partitions" % self.db_name, follow=True) assert "is not partitioned." in response.content - + def test_describe_partitioned_table_with_limit(self): # We have 2 partitions in the test table finish = LIST_PARTITIONS_LIMIT.set_for_testing("1") @@ -261,7 +262,7 @@ def test_describe_partitioned_table_with_limit(self): assert 1 == len(partition_values_json) finally: finish() - + finish = LIST_PARTITIONS_LIMIT.set_for_testing("3") try: response = self.client.get("/metastore/table/%s/test_partitions/partitions" % self.db_name) @@ -269,11 +270,11 @@ def test_describe_partitioned_table_with_limit(self): assert 2 == len(partition_values_json) finally: finish() - + def test_read_partitions(self): if not is_live_cluster(): raise SkipTest - + partition_spec = "baz='baz_one',boom=12345" response = self.client.get( "/metastore/table/%s/test_partitions/partitions/%s/read" % (self.db_name, partition_spec), follow=True) @@ -282,7 +283,7 @@ def test_read_partitions(self): response = wait_for_query_to_finish(self.client, response, max=30.0) results = fetch_query_result_data(self.client, response) assert len(results['results']) > 0, results - + def test_browse_partition(self): partition_spec = "baz='baz_one',boom=12345" response = self.client.get( @@ -293,19 +294,19 @@ def test_browse_partition(self): path = '/user/hive/warehouse/test_partitions/baz=baz_one/boom=12345' filebrowser_path = urllib.parse.unquote(reverse("filebrowser:filebrowser.views.view", kwargs={'path': path})) assert response.request['PATH_INFO'] == filebrowser_path - + def test_drop_partition(self): # Create partition first partition_spec = "baz='baz_drop',boom=54321" hql = 'ALTER TABLE `%s`.`test_partitions` ADD IF NOT EXISTS PARTITION (%s);' % (self.db_name, partition_spec) resp = _make_query(self.client, hql, database=self.db_name) wait_for_query_to_finish(self.client, resp, max=30.0) - + # Assert partition exists response = self.client.get("/metastore/table/%s/test_partitions/partitions" % self.db_name, {'format': 'json'}) data = json.loads(response.content) assert "baz_drop" in [part['columns'][0] for part in data['partition_values_json']], data - + # Drop partition self.client.post( "/metastore/table/%s/test_partitions/partitions/drop" % self.db_name, @@ -319,8 +320,8 @@ def test_drop_partition(self): ) response = self.client.get("/metastore/table/%s/test_partitions/partitions" % self.db_name, {'format': 'json'}) data = json.loads(response.content) - assert not "baz_drop" in [part['columns'][0] for part in data['partition_values_json']], data - + assert "baz_drop" not in [part['columns'][0] for part in data['partition_values_json']], data + def test_drop_multi_tables(self): hql = """ CREATE TABLE test_drop_1 (a int); @@ -329,7 +330,7 @@ def test_drop_multi_tables(self): """ resp = _make_query(self.client, hql, database=self.db_name) resp = wait_for_query_to_finish(self.client, resp, max=30.0) - + # Drop them resp = self.client.get('/metastore/tables/drop/%s' % self.db_name, follow=True) assert 'want to delete' in resp.content, resp.content @@ -338,7 +339,7 @@ def test_drop_multi_tables(self): {u'table_selection': [u'test_drop_1', u'test_drop_2', u'test_drop_3'], 'is_embeddable': True} ) assert resp.status_code == 302 - + def test_drop_multi_tables_with_skip_trash(self): hql = """ CREATE TABLE test_drop_multi_tables_with_skip_trash_1 (a int); @@ -347,7 +348,7 @@ def test_drop_multi_tables_with_skip_trash(self): """ resp = _make_query(self.client, hql, database=self.db_name) resp = wait_for_query_to_finish(self.client, resp, max=30.0) - + # Drop them resp = self.client.get('/metastore/tables/drop/%s' % self.db_name, follow=True) assert 'want to delete' in resp.content, resp.content @@ -361,19 +362,19 @@ def test_drop_multi_tables_with_skip_trash(self): } ) assert resp.status_code == 302 - + response = self.client.get("/metastore/tables/%s?format=json" % self.db_name) assert 200 == response.status_code data = json.loads(response.content) - assert not 'test_drop_multi_tables_with_skip_trash_1' in data['tables'] - assert not 'test_drop_multi_tables_with_skip_trash_2' in data['tables'] - assert not 'test_drop_multi_tables_with_skip_trash_3' in data['tables'] - + assert 'test_drop_multi_tables_with_skip_trash_1' not in data['tables'] + assert 'test_drop_multi_tables_with_skip_trash_2' not in data['tables'] + assert 'test_drop_multi_tables_with_skip_trash_3' not in data['tables'] + def test_drop_multi_databases(self): db1 = '%s_test_drop_1' % self.db_name db2 = '%s_test_drop_2' % self.db_name db3 = '%s_test_drop_3' % self.db_name - + try: hql = """ CREATE DATABASE %(db1)s; @@ -382,13 +383,13 @@ def test_drop_multi_databases(self): """ % {'db1': db1, 'db2': db2, 'db3': db3} resp = _make_query(self.client, hql) resp = wait_for_query_to_finish(self.client, resp, max=30.0) - + # Add a table to db1 hql = "CREATE TABLE " + "`" + db1 + "`." + "`test_drop_1` (a int);" resp = _make_query(self.client, hql, database=db1) resp = wait_for_query_to_finish(self.client, resp, max=30.0) assert resp.status_code == 200 - + # Drop them resp = self.client.get('/metastore/databases/drop', follow=True) assert 'want to delete' in resp.content, resp.content @@ -399,34 +400,34 @@ def test_drop_multi_databases(self): make_query(self.client, 'DROP DATABASE IF EXISTS %(db)s' % {'db': db1}, wait=True) make_query(self.client, 'DROP DATABASE IF EXISTS %(db)s' % {'db': db2}, wait=True) make_query(self.client, 'DROP DATABASE IF EXISTS %(db)s' % {'db': db3}, wait=True) - + def test_load_data(self): """ Test load data queries. These require Hadoop, because they ask the metastore about whether a table is partitioned. """ - + # Check that view works resp = self.client.get("/metastore/table/%s/test/load" % self.db_name, follow=True) assert 'Path' in resp.content - + data_dir = '%(prefix)s/tmp' % {'prefix': self.cluster.fs_prefix} data_path = data_dir + '/foo' self.cluster.fs.mkdir(data_dir) self.cluster.fs.create(data_path, data='123') - + # Try the submission response = self.client.post("/metastore/table/%s/test/load" % self.db_name, {'path': data_path, 'overwrite': True}, follow=True) data = json.loads(response.content) query = QueryHistory.objects.get(id=data['query_history_id']) - + assert_equal_mod_whitespace( "LOAD DATA INPATH '%(data_path)s' OVERWRITE INTO TABLE `%(db)s`.`test`" % {'data_path': data_path, 'db': self.db_name}, query.query ) - + resp = self.client.post("/metastore/table/%s/test/load" % self.db_name, {'path': data_path, 'overwrite': False}, follow=True) query = QueryHistory.objects.latest('id') @@ -434,7 +435,7 @@ def test_load_data(self): "LOAD DATA INPATH '%(data_path)s' INTO TABLE `%(db)s`.`test`" % {'data_path': data_path, 'db': self.db_name}, query.query ) - + # Try it with partitions resp = self.client.post( "/metastore/table/%s/test_partitions/load" % self.db_name, @@ -448,21 +449,21 @@ def test_load_data(self): 'data_path': data_path, 'db': self.db_name } ) - + def test_has_write_access_frontend(self): client = make_logged_in_client(username='write_access_frontend', groupname='write_access_frontend', is_superuser=False) grant_access("write_access_frontend", "write_access_frontend", "metastore") user = User.objects.get(username='write_access_frontend') - + response = client.get("/metastore/databases") - assert not "Drop" in response.content, response.content - assert not "Create a new database" in response.content, response.content - + assert "Drop" not in response.content, response.content + assert "Create a new database" not in response.content, response.content + response = client.get("/metastore/tables/") - assert not "Drop" in response.content, response.content - assert not "Create a new table" in response.content, response.content - + assert "Drop" not in response.content, response.content + assert "Create a new table" not in response.content, response.content + # Add access group, created = Group.objects.get_or_create(name='write_access_frontend') perm, created = HuePermission.objects.get_or_create(app='metastore', action='write') @@ -471,75 +472,75 @@ def test_has_write_access_frontend(self): response = client.get("/metastore/databases") assert "Drop" in response.content, response.content assert "Create a new database" in response.content, response.content - + response = client.get("/metastore/tables/") assert "Drop" in response.content, response.content assert "Create a new table" in response.content, response.content - + def test_has_write_access_backend(self): client = make_logged_in_client(username='write_access_backend', groupname='write_access_backend', is_superuser=False) grant_access("write_access_backend", "write_access_backend", "metastore") grant_access("write_access_backend", "write_access_backend", "beeswax") user = User.objects.get(username='write_access_backend') - + # Only fails if we were using Sentry and won't allow SELECT to user resp = _make_query(client, 'CREATE TABLE test_perm_1 (a int);', database=self.db_name) resp = wait_for_query_to_finish(client, resp, max=30.0) - + def check(client, http_codes): resp = client.get('/metastore/tables/drop/%s' % self.db_name) assert resp.status_code in http_codes, resp.content - + resp = client.post('/metastore/tables/drop/%s' % self.db_name, {u'table_selection': [u'test_perm_1']}) assert resp.status_code in http_codes, resp.content - + check(client, [301]) # Denied - + # Add access group, created = Group.objects.get_or_create(name='write_access_backend') perm, created = HuePermission.objects.get_or_create(app='metastore', action='write') GroupPermission.objects.get_or_create(group=group, hue_permission=perm) - + check(client, [200, 302]) # Ok - + def test_alter_database(self): resp = self.client.post(reverse("metastore:get_database_metadata", kwargs={'database': self.db_name})) json_resp = json.loads(resp.content) assert 'data' in json_resp, json_resp assert 'parameters' in json_resp['data'], json_resp - assert not 'message=After Alter' in json_resp['data']['parameters'], json_resp - + assert 'message=After Alter' not in json_resp['data']['parameters'], json_resp + # Alter message resp = self.client.post(reverse("metastore:alter_database", kwargs={'database': self.db_name}), {'properties': json.dumps({'message': 'After Alter'})}) json_resp = json.loads(resp.content) assert 0 == json_resp['status'], json_resp assert '{message=After Alter}' == json_resp['data']['parameters'], json_resp - + def test_alter_table(self): resp = _make_query(self.client, "CREATE TABLE test_alter_table (a int) COMMENT 'Before Alter';", database=self.db_name) resp = wait_for_query_to_finish(self.client, resp, max=30.0) - + resp = self.client.get('/metastore/table/%s/test_alter_table' % self.db_name) assert 'test_alter_table', resp.content assert 'Before Alter', resp.content - + # Alter name resp = self.client.post(reverse("metastore:alter_table", kwargs={'database': self.db_name, 'table': 'test_alter_table'}), {'new_table_name': 'table_altered'}) json_resp = json.loads(resp.content) assert 'table_altered' == json_resp['data']['name'], json_resp - + # Alter comment resp = self.client.post(reverse("metastore:alter_table", kwargs={'database': self.db_name, 'table': 'table_altered'}), {'comment': 'After Alter'}) json_resp = json.loads(resp.content) assert 'After Alter' == json_resp['data']['comment'], json_resp - + # Invalid table name returns error response resp = self.client.post(reverse("metastore:alter_table", kwargs={'database': self.db_name, 'table': 'table_altered'}), @@ -547,15 +548,15 @@ def test_alter_table(self): json_resp = json.loads(resp.content) assert 1 == json_resp['status'], json_resp assert 'Failed to alter table' in json_resp['data'], json_resp - + def test_alter_column(self): resp = _make_query(self.client, 'CREATE TABLE test_alter_column (before_alter int);', database=self.db_name) resp = wait_for_query_to_finish(self.client, resp, max=30.0) - + resp = self.client.get('/metastore/table/%s/test_alter_column' % self.db_name) assert 'before_alter', resp.content assert 'int', resp.content - + # Alter name, type and comment resp = self.client.post( reverse("metastore:alter_column", kwargs={'database': self.db_name, 'table': 'test_alter_column'}), @@ -566,7 +567,7 @@ def test_alter_column(self): assert 'after_alter' == json_resp['data']['name'], json_resp assert 'string' == json_resp['data']['type'], json_resp assert 'alter comment' == json_resp['data']['comment'], json_resp - + # Invalid column type returns error response resp = self.client.post(reverse("metastore:alter_column", kwargs={'database': self.db_name, 'table': 'test_alter_column'}), @@ -584,7 +585,7 @@ def test_parse_simple(self): column = {'name': name, 'type': type, 'comment': comment} parse_tree = parser.parse_column(name, type, comment) assert parse_tree == column - + def test_parse_varchar(self): name = 'varchar' type = 'varchar(1000)' @@ -592,7 +593,7 @@ def test_parse_varchar(self): column = {'name': name, 'type': type, 'comment': comment} parse_tree = parser.parse_column(name, type, comment) assert parse_tree == column - + def test_parse_decimal(self): name = 'simple' type = 'decimal(12,2)' @@ -600,7 +601,7 @@ def test_parse_decimal(self): column = {'name': name, 'type': type, 'comment': comment} parse_tree = parser.parse_column(name, type, comment) assert parse_tree == column - + def test_parse_array(self): name = 'array' type = 'array' @@ -608,7 +609,7 @@ def test_parse_array(self): column = {'name': name, 'type': 'array', 'comment': comment, 'item': {'type': 'string'}} parse_tree = parser.parse_column(name, type, comment) assert parse_tree == column - + def test_parse_map(self): name = 'map' type = 'map' @@ -616,7 +617,7 @@ def test_parse_map(self): column = {'name': name, 'type': 'map', 'comment': comment, 'key': {'type': 'string'}, 'value': {'type': 'int'}} parse_tree = parser.parse_column(name, type, comment) assert parse_tree == column - + def test_parse_struct(self): name = 'struct' type = 'struct' @@ -627,7 +628,7 @@ def test_parse_struct(self): } parse_tree = parser.parse_column(name, type, comment) assert parse_tree == column - + def test_parse_nested(self): name = 'nested' type = 'array>' @@ -638,7 +639,7 @@ def test_parse_nested(self): } parse_tree = parser.parse_column(name, type, comment) assert parse_tree == column - + def test_parse_nested_with_array(self): name = 'nested' type = ('struct,' diff --git a/apps/metastore/src/metastore/urls.py b/apps/metastore/src/metastore/urls.py index 119356d2e00..7fb39b78d59 100644 --- a/apps/metastore/src/metastore/urls.py +++ b/apps/metastore/src/metastore/urls.py @@ -17,12 +17,9 @@ import sys -from metastore import views as metastore_views +from django.urls import re_path -if sys.version_info[0] > 2: - from django.urls import re_path -else: - from django.conf.urls import url as re_path +from metastore import views as metastore_views urlpatterns = [ re_path(r'^$', metastore_views.index, name='index'), diff --git a/apps/metastore/src/metastore/views.py b/apps/metastore/src/metastore/views.py index 0a591a488eb..2b6e8e7d52b 100644 --- a/apps/metastore/src/metastore/views.py +++ b/apps/metastore/src/metastore/views.py @@ -15,47 +15,39 @@ # See the License for the specific language governing permissions and # limitations under the License. -from future import standard_library -standard_library.install_aliases() -from builtins import str +import sys import json import logging -import sys -import urllib.request, urllib.parse, urllib.error +import urllib.error +import urllib.parse +import urllib.request +from builtins import str from django.db.models import Q -from django.urls import reverse from django.shortcuts import redirect +from django.urls import reverse from django.utils.functional import wraps +from django.utils.translation import gettext as _ from django.views.decorators.http import require_http_methods -from desktop.conf import has_connectors -from desktop.context_processors import get_app_name -from desktop.lib.django_util import JsonResponse, render -from desktop.lib.exceptions_renderable import PopupException -from desktop.models import Document2, get_cluster_config, _get_apps - -from beeswax.design import hql_query from beeswax.common import find_compute +from beeswax.design import hql_query from beeswax.models import SavedQuery from beeswax.server import dbms from beeswax.server.dbms import get_query_server_config +from desktop.auth.backend import is_admin +from desktop.conf import has_connectors +from desktop.context_processors import get_app_name +from desktop.lib.django_util import JsonResponse, render +from desktop.lib.exceptions_renderable import PopupException from desktop.lib.view_util import location_to_url -from metadata.conf import has_optimizer, has_catalog, get_optimizer_url, get_catalog_url -from notebook.connectors.base import Notebook, QueryError -from notebook.models import make_notebook - +from desktop.models import Document2, _get_apps, get_cluster_config +from metadata.conf import get_catalog_url, get_optimizer_url, has_catalog, has_optimizer from metastore.conf import FORCE_HS2_METADATA -from metastore.forms import LoadDataForm, DbForm +from metastore.forms import DbForm, LoadDataForm from metastore.settings import DJANGO_APPS - -from desktop.auth.backend import is_admin - -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - +from notebook.connectors.base import Notebook, QueryError +from notebook.models import make_notebook LOG = logging.getLogger() SAVE_RESULTS_CTAS_TIMEOUT = 300 # seconds @@ -84,6 +76,7 @@ def index(request): Database Views """ + def databases(request): search_filter = request.GET.get('filter', '') @@ -235,13 +228,15 @@ def table_queries(request, database, table): """ Table Views """ + + def show_tables(request, database=None): cluster = _find_cluster(request) db = _get_db(user=request.user, cluster=cluster) if database is None: - database = 'default' # Assume always 'default' + database = 'default' # Assume always 'default' if request.GET.get("format", "html") == "json": try: @@ -259,7 +254,7 @@ def show_tables(request, database=None): search_filter = request.GET.get('filter', '') - tables = db.get_tables_meta(database=database, table_names=search_filter) # SparkSql returns [] + tables = db.get_tables_meta(database=database, table_names=search_filter) # SparkSql returns [] table_names = [table['name'] for table in tables] except Exception as e: raise PopupException(_('Failed to retrieve tables for database: %s' % database), detail=e) @@ -304,7 +299,7 @@ def get_table_metadata(request, database, table): 'hdfs_link': table_metadata.hdfs_link, 'is_view': table_metadata.is_view } - except: + except Exception: msg = "Cannot get metadata for table: `%s`.`%s`" LOG.exception(msg) % (database, table) response['status'] = 1 @@ -347,7 +342,7 @@ def describe_table(request, database, table): if app_name != 'impala' and table.partition_keys: try: partitions = [_massage_partition(database, table, partition) for partition in db.get_partitions(database, table)] - except: + except Exception: LOG.exception('Table partitions could not be retrieved') return render(renderable, request, { @@ -522,6 +517,7 @@ def read_table(request, database, table): except Exception as e: raise PopupException(_('Cannot read table'), detail=e) + @check_has_write_access_permission def load_table(request, database, table): response = {'status': -1, 'data': 'None'} @@ -552,10 +548,7 @@ def load_table(request, database, table): } query_history = db.load_data(database, table.name, form_data, design, generate_ddl_only=generate_ddl_only) if generate_ddl_only: - if sys.version_info[0] > 2: - last_executed = json.loads(request.POST.get('start_time')) - else: - last_executed = json.loads(request.POST.get('start_time'), '-1') + last_executed = json.loads(request.POST.get('start_time')) job = make_notebook( name=_('Load data in %s.%s') % (database, table.name), editor_type=source_type, @@ -592,10 +585,8 @@ def load_table(request, database, table): 'database': database, 'app_name': 'beeswax' }, force_template=True).content - if sys.version_info[0] > 2: - response['data'] = popup.decode() - else: - response['data'] = popup + + response['data'] = popup.decode() return JsonResponse(response) @@ -622,7 +613,7 @@ def describe_partitions(request, database, table): try: partitions = db.get_partitions(database, table_obj, partition_spec, reverse_sort=reverse_sort) - except: + except Exception: LOG.exception('Table partitions could not be retrieved') partitions = [] massaged_partitions = [_massage_partition(database, table_obj, partition) for partition in partitions] @@ -763,7 +754,6 @@ def has_write_access(user): return is_admin(user) or user.has_hue_permission(action="write", app=DJANGO_APPS[0]) - def _get_db(user, source_type=None, cluster=None): if source_type is None: cluster_config = get_cluster_config(user) @@ -786,6 +776,7 @@ def _find_cluster(request): cluster = find_compute(cluster=cluster, user=request.user, namespace_id=namespace_id, dialect=source_type) return cluster + def _get_servername(db): if has_connectors(): return db.client.query_server['server_name'] diff --git a/apps/oozie/src/oozie/conf.py b/apps/oozie/src/oozie/conf.py index e001861aabf..ef15c28f7d1 100644 --- a/apps/oozie/src/oozie/conf.py +++ b/apps/oozie/src/oozie/conf.py @@ -15,21 +15,16 @@ # See the License for the specific language governing permissions and # limitations under the License. -import os.path import sys +import os.path + +from django.utils.translation import gettext as _, gettext_lazy as _t -from desktop.lib.conf import Config, coerce_bool from desktop.lib import paths +from desktop.lib.conf import Config, coerce_bool from liboozie.conf import get_oozie_status - from oozie.settings import NICE_NAME -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _, gettext_lazy as _t -else: - from django.utils.translation import ugettext as _, ugettext_lazy as _t - - DEFINITION_XSLT_DIR = Config( key="definition_xslt_dir", default=os.path.join(os.path.dirname(__file__), "importlib", "xslt"), @@ -61,7 +56,9 @@ REMOTE_SAMPLE_DIR = Config( key="remote_data_dir", default="/user/hue/oozie/workspaces", - help=_t("Location on HDFS where the Oozie workflows are stored. Parameters are $TIME and $USER, e.g. /user/$USER/hue/workspaces/workflow-$TIME") + help=_t( + "Location on HDFS where the Oozie workflows are stored. Parameters are $TIME and $USER, e.g. /user/$USER/hue/workspaces/workflow-$TIME" + ), ) @@ -69,6 +66,7 @@ def get_oozie_job_count(): '''Returns the maximum of jobs fetched by the API depending on the Hue version''' return 100 + OOZIE_JOBS_COUNT = Config( key='oozie_jobs_count', dynamic_default=get_oozie_job_count, @@ -76,14 +74,14 @@ def get_oozie_job_count(): help=_t('Maximum number of Oozie workflows or coodinators or bundles to retrieve in one API call.') ) -ENABLE_V2 = Config( # Until Hue 4 +ENABLE_V2 = Config( # Until Hue 4 key='enable_v2', default=True, type=coerce_bool, help=_t('Use version 2 of Editor.') ) -ENABLE_CRON_SCHEDULING = Config( # Until Hue 3.8 +ENABLE_CRON_SCHEDULING = Config( # Until Hue 3.8 key='enable_cron_scheduling', default=True, type=coerce_bool, diff --git a/apps/oozie/src/oozie/decorators.py b/apps/oozie/src/oozie/decorators.py index b9f574abc2f..43ab42f1979 100644 --- a/apps/oozie/src/oozie/decorators.py +++ b/apps/oozie/src/oozie/decorators.py @@ -15,25 +15,18 @@ # See the License for the specific language governing permissions and # limitations under the License. +import sys import json import logging -import sys from django.utils.functional import wraps +from django.utils.translation import gettext as _ +from desktop.auth.backend import is_admin from desktop.conf import USE_NEW_EDITOR from desktop.lib.exceptions_renderable import PopupException from desktop.models import Document, Document2 - -from oozie.models import Job, Node, Dataset - -from desktop.auth.backend import is_admin - -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - +from oozie.models import Dataset, Job, Node LOG = logging.getLogger() @@ -121,7 +114,7 @@ def decorate(request, *args, **kwargs): return wraps(view_func)(decorate) -## Oozie v1 below +# Oozie v1 below def check_job_access_permission(exception_class=PopupException): diff --git a/apps/oozie/src/oozie/forms.py b/apps/oozie/src/oozie/forms.py index cc47656e6a3..ea8d515a0d4 100644 --- a/apps/oozie/src/oozie/forms.py +++ b/apps/oozie/src/oozie/forms.py @@ -15,30 +15,45 @@ # See the License for the specific language governing permissions and # limitations under the License. -from builtins import object +import sys import logging +from builtins import object from datetime import datetime, timedelta -import sys +from functools import partial from time import mktime, struct_time from django import forms from django.core.exceptions import ValidationError from django.forms.widgets import TextInput -from functools import partial +from django.utils.translation import gettext_lazy as _t from desktop.lib.django_forms import MultiForm, SplitDateTimeWidget from desktop.models import Document - from oozie.conf import ENABLE_CRON_SCHEDULING -from oozie.models import Workflow, Node, Java, Mapreduce, Streaming, Coordinator,\ - Dataset, DataInput, DataOutput, Pig, Link, Hive, Sqoop, Ssh, Shell, DistCp, Fs,\ - Email, SubWorkflow, Generic, Bundle, BundledCoordinator - -if sys.version_info[0] > 2: - from django.utils.translation import gettext_lazy as _t -else: - from django.utils.translation import ugettext_lazy as _t - +from oozie.models import ( + Bundle, + BundledCoordinator, + Coordinator, + DataInput, + DataOutput, + Dataset, + DistCp, + Email, + Fs, + Generic, + Hive, + Java, + Link, + Mapreduce, + Node, + Pig, + Shell, + Sqoop, + Ssh, + Streaming, + SubWorkflow, + Workflow, +) LOG = logging.getLogger() @@ -78,6 +93,7 @@ def get_non_parameters(conf_dict): params = filter(lambda key: key in ParameterForm.NON_PARAMETERS, conf_dict.keys()) return [{'name': name, 'value': conf_dict[name]} for name in params] + class WorkflowForm(forms.ModelForm): class Meta(object): model = Workflow @@ -97,6 +113,7 @@ def __init__(self, *args, **kwargs): SCHEMA_VERSION_CHOICES = ['0.4'] + class ImportWorkflowForm(WorkflowForm): definition_file = forms.FileField(label=_t("Local workflow.xml file")) resource_archive = forms.FileField(label=_t("Workflow resource archive (zip)"), required=False) @@ -122,6 +139,7 @@ class Meta(object): model = Node exclude = ALWAYS_HIDE + ('name', 'description') + class JavaForm(forms.ModelForm): class Meta(object): model = Java @@ -285,6 +303,7 @@ class Meta(object): 'body': forms.Textarea(attrs={'class': 'span8'}), } + class SubWorkflowForm(forms.ModelForm): def __init__(self, *args, **kwargs): @@ -304,7 +323,7 @@ class Meta(object): def clean_sub_workflow(self): try: return Workflow.objects.get(id=int(self.cleaned_data.get('sub_workflow'))) - except: + except Exception: LOG.exception('The sub-workflow could not be found.') return None @@ -565,6 +584,7 @@ class Meta(object): 'schema_version': forms.widgets.HiddenInput(), } + class UpdateCoordinatorForm(forms.Form): endTime = forms.SplitDateTimeField( label='End Time', input_time_formats=[TIME_FORMAT], required=False, initial=datetime.today() + timedelta(days=3), @@ -586,12 +606,11 @@ def __init__(self, *args, **kwargs): super(UpdateCoordinatorForm, self).__init__(*args, **kwargs) self.fields['endTime'].initial = datetime.fromtimestamp(mktime(oozie_coordinator.endTime)) - if type(oozie_coordinator.pauseTime) == struct_time: + if type(oozie_coordinator.pauseTime) is struct_time: self.fields['pauseTime'].initial = datetime.fromtimestamp(mktime(oozie_coordinator.pauseTime)) self.fields['concurrency'].initial = oozie_coordinator.concurrency - def design_form_by_type(node_type, user, workflow): klass_form = _node_type_TO_FORM_CLS[node_type] diff --git a/apps/oozie/src/oozie/importlib/coordinators.py b/apps/oozie/src/oozie/importlib/coordinators.py index 8642a0b8f16..9e1e1df2847 100644 --- a/apps/oozie/src/oozie/importlib/coordinators.py +++ b/apps/oozie/src/oozie/importlib/coordinators.py @@ -15,24 +15,19 @@ # See the License for the specific language governing permissions and # limitations under the License. -import json -import logging import os -from lxml import etree import sys +import json +import logging from django.core import serializers +from django.utils.translation import gettext as _ +from lxml import etree from oozie import conf -from oozie.models import Workflow, Dataset, DataInput, DataOutput +from oozie.models import DataInput, DataOutput, Dataset, Workflow from oozie.utils import oozie_to_django_datetime, oozie_to_hue_frequency -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - - LOG = logging.getLogger() OOZIE_NAMESPACES = ['uri:oozie:coordinator:0.1', 'uri:oozie:coordinator:0.2', 'uri:oozie:coordinator:0.3', 'uri:oozie:coordinator:0.4'] @@ -127,6 +122,7 @@ def _reconcile_datasets(coordinator, objects, root, namespace): dataoutput.save() # @TODO(abe): reconcile instance times + def _set_properties(coordinator, root, namespace): namespaces = { 'n': namespace diff --git a/apps/oozie/src/oozie/importlib/workflows.py b/apps/oozie/src/oozie/importlib/workflows.py index 2a323b60338..02f643a30c7 100644 --- a/apps/oozie/src/oozie/importlib/workflows.py +++ b/apps/oozie/src/oozie/importlib/workflows.py @@ -31,29 +31,22 @@ Every action extension will have its own version via /xslt//extensions/..xslt """ -from builtins import str -from past.builtins import basestring -import json -import logging -from lxml import etree import os import re import sys +import json +import logging +from builtins import str from django.core import serializers from django.utils.encoding import smart_str +from django.utils.translation import gettext as _ +from lxml import etree +from past.builtins import basestring from desktop.models import Document - -from oozie.conf import DEFINITION_XSLT_DIR, DEFINITION_XSLT2_DIR -from oozie.models import Workflow, Node, Link, Start, End,\ - Decision, DecisionEnd, Fork, Join,\ - Kill - -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ +from oozie.conf import DEFINITION_XSLT2_DIR, DEFINITION_XSLT_DIR +from oozie.models import Decision, DecisionEnd, End, Fork, Join, Kill, Link, Node, Start, Workflow LOG = logging.getLogger() @@ -552,7 +545,7 @@ def _preprocess_nodes(workflow, transformed_root, workflow_definition_root, node if 'cred' in action_el.attrib: for full_node in nodes: if full_node.name == action_el.attrib['name']: - full_node.credentials = [{"name": cred, "value": True} for cred in action_el.attrib['cred'].split(',')]; + full_node.credentials = [{"name": cred, "value": True} for cred in action_el.attrib['cred'].split(',')] for full_node in nodes: if full_node.node_type == 'start': @@ -703,13 +696,12 @@ def import_workflow_root(workflow, workflow_definition_root, metadata=None, fs=N def import_workflow(workflow, workflow_definition, metadata=None, fs=None): # Parse Workflow Definition - if sys.version_info[0] > 2: - # In Py3 anything like at the beginning - # of a workflow XML cannot be parsed via etree.fromstring(), since the - # workflow_definition string needs to be encoded. - workflow_definition_root = etree.XML(workflow_definition.encode()) - else: - workflow_definition_root = etree.fromstring(workflow_definition) + + # In Py3 anything like at the beginning + # of a workflow XML cannot be parsed via etree.fromstring(), since the + # workflow_definition string needs to be encoded. + workflow_definition_root = etree.XML(workflow_definition.encode()) + if workflow_definition_root is None: raise RuntimeError( _("Could not find any nodes in Workflow definition. Maybe it's malformed?")) @@ -719,13 +711,12 @@ def import_workflow(workflow, workflow_definition, metadata=None, fs=None): def generate_v2_graph_nodes(workflow_definition): # Parse Workflow Definition - if sys.version_info[0] > 2: - # In Py3 anything like at the beginning - # of a workflow XML cannot be parsed via etree.fromstring(), since the - # workflow_definition string needs to be encoded. - workflow_definition_root = etree.XML(workflow_definition.encode()) - else: - workflow_definition_root = etree.fromstring(workflow_definition) + + # In Py3 anything like at the beginning + # of a workflow XML cannot be parsed via etree.fromstring(), since the + # workflow_definition string needs to be encoded. + workflow_definition_root = etree.XML(workflow_definition.encode()) + if workflow_definition_root is None: raise MalformedWfDefException() @@ -748,13 +739,12 @@ def generate_v2_graph_nodes(workflow_definition): # Transform XML using XSLT transformed_root = transform(workflow_definition_root) - node_list = re.sub('[\s]', '', str(transformed_root)) + node_list = re.sub(r'[\s]', '', str(transformed_root)) node_list = json.loads(node_list) return [node for node in node_list if node] - class MalformedWfDefException(Exception): pass diff --git a/apps/oozie/src/oozie/management/commands/oozie_setup.py b/apps/oozie/src/oozie/management/commands/oozie_setup.py index 9e3f47dde5e..4831611e726 100644 --- a/apps/oozie/src/oozie/management/commands/oozie_setup.py +++ b/apps/oozie/src/oozie/management/commands/oozie_setup.py @@ -15,35 +15,28 @@ # See the License for the specific language governing permissions and # limitations under the License. -import json -import logging import os -from lxml import etree import sys +import json +import logging from django.core import management from django.core.management.base import BaseCommand from django.db import transaction +from django.utils.translation import gettext as _ +from lxml import etree from desktop.conf import USE_NEW_EDITOR from desktop.models import Directory, Document, Document2, Document2Permission from hadoop import cluster from liboozie.submittion import create_directories from notebook.models import make_notebook - -from useradmin.models import get_default_user_group, install_sample_user - -from oozie.conf import LOCAL_SAMPLE_DATA_DIR, LOCAL_SAMPLE_DIR, REMOTE_SAMPLE_DIR, ENABLE_V2 -from oozie.models import Workflow, Coordinator, Bundle -from oozie.importlib.workflows import import_workflow_root -from oozie.importlib.coordinators import import_coordinator_root +from oozie.conf import ENABLE_V2, LOCAL_SAMPLE_DATA_DIR, LOCAL_SAMPLE_DIR, REMOTE_SAMPLE_DIR from oozie.importlib.bundles import import_bundle_root - -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - +from oozie.importlib.coordinators import import_coordinator_root +from oozie.importlib.workflows import import_workflow_root +from oozie.models import Bundle, Coordinator, Workflow +from useradmin.models import get_default_user_group, install_sample_user LOG = logging.getLogger() @@ -71,8 +64,7 @@ def _import_workflows(self, directory, managed=True): workflow.save() Workflow.objects.initialize(workflow) import_workflow_root(workflow=workflow, workflow_definition_root=workflow_root, metadata=metadata, fs=self.fs) - workflow.doc.all().delete() # Delete doc as it messes up the example sharing - + workflow.doc.all().delete() # Delete doc as it messes up the example sharing def _import_coordinators(self, directory): @@ -93,7 +85,6 @@ def _import_coordinators(self, directory): coordinator.save() import_coordinator_root(coordinator=coordinator, coordinator_definition_root=coordinator_root, metadata=metadata) - def _import_bundles(self, directory): for example_directory_name in os.listdir(directory): @@ -114,7 +105,6 @@ def _import_bundles(self, directory): bundle.save() import_bundle_root(bundle=bundle, bundle_definition_root=bundle_root, metadata=metadata) - def _install_mapreduce_example(self): doc2 = None name = _('MapReduce Sleep Job') @@ -275,7 +265,6 @@ def _install_spark_example(self): return doc2 - def _install_pyspark_example(self): doc2 = None name = _('PySpark Pi Estimator Job') @@ -332,7 +321,6 @@ def install_examples(self): unmanaged_dir = os.path.join(data_dir, 'unmanaged') self._import_workflows(unmanaged_dir, managed=False) - def handle(self, *args, **options): self.user = install_sample_user() self.fs = cluster.get_hdfs() @@ -369,10 +357,7 @@ def handle(self, *args, **options): if ENABLE_V2.get(): with transaction.atomic(): - if sys.version_info[0] > 2: - management.call_command('loaddata', 'initial_oozie_examples.json', verbosity=2) - else: - management.call_command('loaddata', 'initial_oozie_examples.json', verbosity=2, commit=False) + management.call_command('loaddata', 'initial_oozie_examples.json', verbosity=2) # Install editor oozie examples without doc1 link LOG.info("Using Hue 4, will install oozie editor samples.") diff --git a/apps/oozie/src/oozie/models.py b/apps/oozie/src/oozie/models.py index da76490b2ab..c32733b91a9 100644 --- a/apps/oozie/src/oozie/models.py +++ b/apps/oozie/src/oozie/models.py @@ -15,63 +15,49 @@ # See the License for the specific language governing permissions and # limitations under the License. -from future import standard_library -standard_library.install_aliases() -from builtins import range -from past.builtins import basestring -from builtins import object -import json -import copy -import logging import re import sys +import copy +import json import time +import logging import zipfile - +from builtins import object, range from datetime import datetime, timedelta -from string import Template +from io import BytesIO as string_io from itertools import chain +from string import Template -from django.db import models, transaction -from django.db.models import Q -from django.urls import reverse -from django.core.validators import RegexValidator +import django.utils.timezone as dtz from django.contrib.contenttypes.fields import GenericRelation from django.contrib.contenttypes.models import ContentType +from django.core.validators import RegexValidator +from django.db import models, transaction +from django.db.models import Q from django.forms.models import inlineformset_factory -from django.utils.encoding import smart_str -import django.utils.timezone as dtz +from django.urls import reverse +from django.utils.encoding import force_str, smart_str +from django.utils.translation import gettext as _, gettext_lazy as _t +from past.builtins import basestring from desktop.auth.backend import is_admin -from desktop.log.access import access_warn from desktop.lib import django_mako from desktop.lib.exceptions_renderable import PopupException from desktop.lib.json_utils import JSONEncoderForHTML +from desktop.log.access import access_warn from desktop.models import Document from hadoop.fs.exceptions import WebHdfsException from hadoop.fs.hadoopfs import Hdfs -from liboozie.submittion import Submission -from liboozie.submittion import create_directories -from useradmin.models import User - +from liboozie.submittion import Submission, create_directories from oozie.conf import REMOTE_SAMPLE_DIR -from oozie.utils import utc_datetime_format from oozie.timezones import TIMEZONES - -if sys.version_info[0] > 2: - from io import BytesIO as string_io - from django.utils.encoding import force_str - from django.utils.translation import gettext as _, gettext_lazy as _t -else: - from cStringIO import StringIO as string_io - from django.utils.encoding import force_unicode as force_str - from django.utils.translation import ugettext as _, ugettext_lazy as _t - +from oozie.utils import utc_datetime_format +from useradmin.models import User LOG = logging.getLogger() PATH_MAX = 512 -name_validator = RegexValidator(regex='^[a-zA-Z_][\-_a-zA-Z0-9]{1,39}$', +name_validator = RegexValidator(regex=r'^[a-zA-Z_][\-_a-zA-Z0-9]{1,39}$', message=_('Enter a valid value: combination of 2 - 40 letters and digits starting by a letter')) # To sync in worklow.models.js DEFAULT_SLA = [ @@ -127,10 +113,10 @@ class Job(models.Model): db_index=True, verbose_name=_t('Owner'), help_text=_t('Person who can modify the job.') - ) # Deprecated - name = models.CharField(max_length=255, blank=False, validators=[name_validator], # Deprecated + ) # Deprecated + name = models.CharField(max_length=255, blank=False, validators=[name_validator], # Deprecated help_text=_t('Name of the job, which must be unique per user.'), verbose_name=_t('Name')) - description = models.CharField(max_length=1024, blank=True, verbose_name=_t('Description'), # Deprecated + description = models.CharField(max_length=1024, blank=True, verbose_name=_t('Description'), # Deprecated help_text=_t('The purpose of the job.')) last_modified = models.DateTimeField(auto_now=True, db_index=True, verbose_name=_t('Last modified')) schema_version = models.CharField(max_length=128, verbose_name=_t('Schema version'), @@ -138,11 +124,11 @@ class Job(models.Model): deployment_dir = models.CharField(max_length=1024, blank=True, verbose_name=_t('HDFS deployment directory'), help_text=_t('The path on the HDFS where all the workflows and ' 'dependencies must be uploaded.')) - is_shared = models.BooleanField(default=False, db_index=True, verbose_name=_t('Is shared'), # Deprecated + is_shared = models.BooleanField(default=False, db_index=True, verbose_name=_t('Is shared'), # Deprecated help_text=_t('Enable other users to have access to this job.')) parameters = models.TextField(default='[{"name":"oozie.use.system.libpath","value":"true"}]', verbose_name=_t('Oozie parameters'), help_text=_t('Parameters used at the submission time (e.g. market=US, oozie.use.system.libpath=true).')) - is_trashed = models.BooleanField(default=False, db_index=True, verbose_name=_t('Is trashed'), blank=True, # Deprecated + is_trashed = models.BooleanField(default=False, db_index=True, verbose_name=_t('Is trashed'), blank=True, # Deprecated help_text=_t('If this job is trashed.')) doc = GenericRelation(Document, related_query_name='oozie_doc') data = models.TextField(blank=True, default=json.dumps({})) # e.g. data=json.dumps({'sla': [python data], ...}) @@ -231,7 +217,7 @@ def find_all_parameters(self): for param in self.get_parameters(): params[param['name'].strip()] = param['value'] - return [{'name': name, 'value': value} for name, value in params.items()] + return [{'name': name, 'value': value} for name, value in params.items()] def can_read(self, user): try: @@ -271,7 +257,7 @@ def sla(self, sla): @property def sla_enabled(self): - return self.sla[0]['value'] # #1 is enabled + return self.sla[0]['value'] # #1 is enabled class WorkflowManager(models.Manager): @@ -331,8 +317,8 @@ def check_workspace(self, workflow, fs): def destroy(self, workflow, fs): Submission(workflow.owner, workflow, fs, None, {}).remove_deployment_dir() try: - workflow.coordinator_set.update(workflow=None) # In Django 1.3 could do ON DELETE set NULL - except: + workflow.coordinator_set.update(workflow=None) # In Django 1.3 could do ON DELETE set NULL + except Exception: LOG.exception('failed to destroy workflow') workflow.save() @@ -371,7 +357,7 @@ def get_properties(self): return json.loads(self.job_properties) def clone(self, fs, new_owner=None): - source_deployment_dir = self.deployment_dir # Needed + source_deployment_dir = self.deployment_dir # Needed nodes = self.node_set.all() links = Link.objects.filter(parent__workflow=self) @@ -458,7 +444,7 @@ def has_cycle(self): graph_edges = set([edge for node in self.node_set.all() for edge in node.get_children_links()]) - return len(graph_edges - removed_edges) > 0 # Graph does not have unseen edges + return len(graph_edges - removed_edges) > 0 # Graph does not have unseen edges def find_parameters(self): params = set() @@ -482,7 +468,7 @@ def node_list(self): """Return a flatten node list ordered by the hierarchy of the nodes in the workflow""" def flatten(nodes): flat = [] - if type(nodes) == list: + if type(nodes) is list: for node in nodes: flat.extend(flatten(node)) else: @@ -512,7 +498,7 @@ def get_absolute_url(self): return reverse('oozie:edit_workflow', kwargs={'workflow': self.id}) + '#editWorkflow' def get_hierarchy(self): - node = Start.objects.get(workflow=self) # Uncached version of start. + node = Start.objects.get(workflow=self) # Uncached version of start. kill = Kill.objects.get(workflow=node.workflow) # Special case: manage error email actions separately try: @@ -531,7 +517,7 @@ def get_hierarchy_rec(self, node=None): parents = node.get_parents() if isinstance(node, End): - return [] # Not returning the end node + return [] # Not returning the end node elif isinstance(node, Decision): children = node.get_children('start') return [[node] + [[self.get_hierarchy_rec(node=child) for child in children], @@ -567,7 +553,7 @@ def gen_status_graph(self, oozie_workflow): @classmethod def gen_status_graph_from_xml(cls, user, oozie_workflow): - from oozie.importlib.workflows import import_workflow # Circular dependency + from oozie.importlib.workflows import import_workflow # Circular dependency try: with transaction.atomic(): @@ -588,7 +574,7 @@ def to_xml(self, mapping=None): if mapping is None: mapping = {} tmpl = 'editor/gen/workflow.xml.mako' - xml = re.sub(re.compile('\s*\n+', re.MULTILINE), '\n', django_mako.render_to_string(tmpl, {'workflow': self, 'mapping': mapping})) + xml = re.sub(re.compile('\\s*\n+', re.MULTILINE), '\n', django_mako.render_to_string(tmpl, {'workflow': self, 'mapping': mapping})) return force_str(xml) def compress(self, mapping=None, fp=string_io()): @@ -632,7 +618,7 @@ def sla_workflow_enabled(self): @property def credentials(self): sub_lists = [node.credentials for node in self.node_list if hasattr(node, 'credentials')] - return set([item['name'] for l in sub_lists for item in l if item['value']]) + return set([item['name'] for sub in sub_lists for item in sub if item['value']]) class Link(models.Model): @@ -835,7 +821,7 @@ def sla(self, sla): @property def sla_enabled(self): - return self.sla[0]['value'] # #1 is enabled + return self.sla[0]['value'] # #1 is enabled @property def credentials(self): @@ -1033,7 +1019,7 @@ class Hive(Action): params = models.TextField( default="[]", verbose_name=_t('Parameters'), - help_text=_t('The %(type)s parameters of the script. E.g. N=5, INPUT=${inputDir}') % {'type': node_type.title()} + help_text=_t('The %(type)s parameters of the script. E.g. N=5, INPUT=${inputDir}') % {'type': node_type.title()} ) files = models.TextField(default="[]", verbose_name=_t('Files'), help_text=_t('List of names or paths of files to be added to the distributed cache and the task running directory.')) @@ -1126,7 +1112,7 @@ class Ssh(Action): command = models.CharField(max_length=256, verbose_name=_t('%(type)s command') % {'type': node_type.title()}, help_text=_t('The command that will be executed.')) params = models.TextField(default="[]", verbose_name=_t('Arguments'), - help_text=_t('The arguments of the %(type)s command.') % {'type': node_type.title()}) + help_text=_t('The arguments of the %(type)s command.') % {'type': node_type.title()}) capture_output = models.BooleanField( default=False, verbose_name=_t('Capture output'), @@ -1206,7 +1192,6 @@ class DistCp(Action): 'Properties specified in the Job Properties element override properties specified in the ' 'files specified in the Job XML element.')) - def get_properties(self): return json.loads(self.job_properties) @@ -1239,7 +1224,6 @@ class Fs(Action): touchzs = models.TextField(default="[]", verbose_name=_t('Create or touch a file'), blank=True, help_text=_t('Creates a zero length file in the specified path if none exists or touch it.')) - def get_deletes(self): return json.loads(self.deletes) @@ -1467,9 +1451,9 @@ def to_xml(self, mapping): class Coordinator(Job): frequency_number = models.SmallIntegerField(default=1, choices=FREQUENCY_NUMBERS, verbose_name=_t('Frequency number'), help_text=_t('The number of units of the rate at which ' - 'data is periodically created.')) # unused + 'data is periodically created.')) # unused frequency_unit = models.CharField(max_length=20, choices=FREQUENCY_UNITS, default='days', verbose_name=_t('Frequency unit'), - help_text=_t('The unit of the rate at which data is periodically created.')) # unused + help_text=_t('The unit of the rate at which data is periodically created.')) # unused timezone = models.CharField( max_length=32, choices=TIMEZONES, @@ -1532,7 +1516,7 @@ def to_xml(self, mapping=None): if mapping is None: mapping = {} tmpl = "editor/gen/coordinator.xml.mako" - return re.sub(re.compile('\s*\n+', re.MULTILINE), '\n', + return re.sub(re.compile('\\s*\n+', re.MULTILINE), '\n', django_mako.render_to_string(tmpl, {'coord': self, 'mapping': mapping})).encode('utf-8', 'xmlcharrefreplace') def clone(self, new_owner=None): @@ -1601,7 +1585,7 @@ def get_properties(self): index = [prop['name'] for prop in props] for prop in self.coordinatorworkflow.get_parameters(): - if not prop['name'] in index: + if prop['name'] not in index: props.append(prop) index.append(prop['name']) @@ -1726,6 +1710,7 @@ class Meta(object): if sys.version_info[0] < 3: manager_inheritance_from_future = True + class DatasetManager(models.Manager): def can_read_or_exception(self, request, dataset_id): if dataset_id is None: @@ -1897,7 +1882,7 @@ def to_xml(self, mapping=None): tmpl = "editor/gen/bundle.xml.mako" return force_str( - re.sub(re.compile('\s*\n+', re.MULTILINE), '\n', django_mako.render_to_string(tmpl, { + re.sub(re.compile('\\s*\n+', re.MULTILINE), '\n', django_mako.render_to_string(tmpl, { 'bundle': self, 'mapping': mapping }))) @@ -1992,6 +1977,7 @@ class Meta(object): if sys.version_info[0] < 3: manager_inheritance_from_future = True + class HistoryManager(models.Manager): def create_from_submission(self, submission): History.objects.create(submitter=submission.user, @@ -2099,6 +2085,7 @@ def find_parameters(instance, fields=None): return params + def find_json_parameters(fields): # To make smarter # Input is list of json dict diff --git a/apps/oozie/src/oozie/models2.py b/apps/oozie/src/oozie/models2.py index a976575ca84..ab7694558ef 100644 --- a/apps/oozie/src/oozie/models2.py +++ b/apps/oozie/src/oozie/models2.py @@ -16,54 +16,43 @@ # limitations under the License. from __future__ import division -from builtins import str -from past.builtins import basestring -from builtins import object -import json -import logging -import math + import os import re import sys +import json +import math import time import uuid - +import logging +from builtins import object, str from datetime import datetime, timedelta -from dateutil.parser import parse from string import Template from xml.sax.saxutils import escape -from django.urls import reverse +from dateutil.parser import parse from django.db.models import Q +from django.urls import reverse +from django.utils.encoding import force_str +from django.utils.translation import gettext as _ +from past.builtins import basestring from azure.abfs.__init__ import abfspath - from desktop.conf import USE_DEFAULT_CONFIGURATION from desktop.lib import django_mako from desktop.lib.exceptions_renderable import PopupException from desktop.lib.i18n import smart_str from desktop.lib.json_utils import JSONEncoderForHTML -from desktop.models import DefaultConfiguration, Document2, Document - -from hadoop.fs.hadoopfs import Hdfs +from desktop.models import DefaultConfiguration, Document, Document2 from hadoop.fs.exceptions import WebHdfsException - +from hadoop.fs.hadoopfs import Hdfs from liboozie.conf import SECURITY_ENABLED from liboozie.oozie_api import get_oozie -from liboozie.submission2 import Submission -from liboozie.submission2 import create_directories +from liboozie.submission2 import Submission, create_directories from notebook.models import Notebook - from oozie.conf import REMOTE_SAMPLE_DIR -from oozie.utils import utc_datetime_format, UTC_TIME_FORMAT, convert_to_server_timezone -from oozie.importlib.workflows import generate_v2_graph_nodes, MalformedWfDefException, InvalidTagWithNamespaceException - -if sys.version_info[0] > 2: - from django.utils.encoding import force_str - from django.utils.translation import gettext as _ -else: - from django.utils.encoding import force_unicode as force_str - from django.utils.translation import ugettext as _ +from oozie.importlib.workflows import InvalidTagWithNamespaceException, MalformedWfDefException, generate_v2_graph_nodes +from oozie.utils import UTC_TIME_FORMAT, convert_to_server_timezone, utc_datetime_format WORKFLOW_DEPTH_LIMIT = 24 LOG = logging.getLogger() @@ -80,7 +69,7 @@ def find_all_parameters(self, with_lib_path=True): if params.get('nominal_time') == '': params['nominal_time'] = datetime.today().strftime(UTC_TIME_FORMAT) - return [{'name': name, 'value': value} for name, value in params.items() if with_lib_path or name != 'oozie.use.system.libpath'] + return [{'name': name, 'value': value} for name, value in params.items() if with_lib_path or name != 'oozie.use.system.libpath'] @classmethod def get_workspace(cls, user): @@ -296,7 +285,7 @@ def gen_workflow_data_from_xml(cls, user, oozie_workflow): try: _get_hierarchy_from_adj_list(adj_list, adj_list['start']['ok_to'], node_hierarchy) except WorkflowDepthReached: - LOG.warning("The Workflow: %s with id: %s, has reached the maximum allowed depth for Graph display " \ + LOG.warning("The Workflow: %s with id: %s, has reached the maximum allowed depth for Graph display " % (oozie_workflow.appName, oozie_workflow.id)) # Hide graph same as when total nodes > 30 return {} @@ -492,7 +481,7 @@ def to_xml(self, mapping=None): [(workflow.uuid, Workflow(document=workflow, user=self.user)) for workflow in Document2.objects.filter(uuid__in=sub_wfs_ids)] ) - xml = re.sub(re.compile('>\s*\n+', re.MULTILINE), '>\n', django_mako.render_to_string(tmpl, { + xml = re.sub(re.compile('>\\s*\n+', re.MULTILINE), '>\n', django_mako.render_to_string(tmpl, { 'wf': self, 'workflow': data['workflow'], 'nodes': nodes, @@ -555,7 +544,6 @@ def _get_node(rows, node_id): elif row['widgets'][0]['id'] == node_id: return row - # Create wf data with above nodes return json.dumps({ 'layout': [{ @@ -626,7 +614,7 @@ def _update_adj_list(adj_list): def _dig_nodes(nodes, adj_list, user, wf_nodes, nodes_uuid_set): for node in nodes: - if type(node) != list: + if type(node) is not list: node = adj_list[node] if node['uuid'] not in nodes_uuid_set: properties = {} @@ -698,9 +686,9 @@ def _dig_nodes(nodes, adj_list, user, wf_nodes, nodes_uuid_set): def _create_workflow_layout(nodes, adj_list, nodes_uuid_set, size=12): wf_rows = [] for node in nodes: - if type(node) == list and len(node) == 1: + if type(node) is list and len(node) == 1: node = node[0] - if type(node) != list: + if type(node) is not list: _append_to_wf_rows( wf_rows, nodes_uuid_set, row_id=adj_list[node]['uuid'], row={ @@ -813,6 +801,7 @@ def _get_hierarchy_from_adj_list_helper(adj_list, curr_node, node_hierarchy, wor node_hierarchy.append(curr_node) return _get_hierarchy_from_adj_list_helper(adj_list, adj_list[curr_node]['ok_to'], node_hierarchy, workflow_depth - 1) + def _create_graph_adjaceny_list(nodes): start_node = [node for node in nodes if node.get('node_type') == 'start'][0] adj_list = {'start': start_node} @@ -846,11 +835,13 @@ def to_xml(self, mapping=None, node_mapping=None, workflow_mapping=None): if self.data['type'] == 'fork': links = [link for link in self.data['children'] if link['to'] in node_mapping] if len(links) != len(self.data['children']): - LOG.warning('Fork has some children links that do not exist, ignoring them: links %s, existing links %s, links %s, existing links %s' \ - % (len(links), len(self.data['children']), links, self.data['children'])) + LOG.warning( + 'Fork has some children links that do not exist, ignoring them: links %s, existing links %s, links %s, existing links %s' + % (len(links), len(self.data['children']), links, self.data['children']) + ) self.data['children'] = links - if self.data['type'] == AltusAction.TYPE or (('altus' in mapping.get('cluster', '') and (self.data['type'] == SparkDocumentAction.TYPE \ + if self.data['type'] == AltusAction.TYPE or (('altus' in mapping.get('cluster', '') and (self.data['type'] == SparkDocumentAction.TYPE or self.data['type'] == 'spark-document'))) or mapping.get('auto-cluster'): shell_command_name = self.data['name'] + '.sh' self.data['properties']['shell_command'] = shell_command_name @@ -866,14 +857,14 @@ def to_xml(self, mapping=None, node_mapping=None, workflow_mapping=None): properties = notebook.get_data()['snippets'][0]['properties'] self.data['properties']['main_class'] = properties['class'] - self.data['properties']['app_jar'] = properties['app_jar'] # Not used here + self.data['properties']['app_jar'] = properties['app_jar'] # Not used here self.data['properties']['files'] = [{'value': f['path']} for f in properties['files']] self.data['properties']['arguments'] = [{'value': prop} for prop in properties['arguments']] elif self.data['type'] == SparkDocumentAction.TYPE or self.data['type'] == 'spark-document': notebook = Notebook(document=Document2.objects.get_by_uuid(user=self.user, uuid=self.data['properties']['uuid'])) properties = notebook.get_data()['snippets'][0]['properties'] - if self.data['type'] == 'spark-document': # Oozie Document Action + if self.data['type'] == 'spark-document': # Oozie Document Action self.data['properties']['app_name'] = properties['app_name'] self.data['properties']['class'] = properties['class'] @@ -893,7 +884,7 @@ def to_xml(self, mapping=None, node_mapping=None, workflow_mapping=None): self.data['properties']['parameters'] = [] for param in action['variables']: self.data['properties']['parameters'].insert(0, {'value': '%(name)s=%(value)s' % param}) - self.data['properties']['arguments'] = [] # Not Picked yet + self.data['properties']['arguments'] = [] # Not Picked yet job_properties = [] for prop in action['properties']['hadoopProperties']: @@ -944,7 +935,6 @@ def to_xml(self, mapping=None, node_mapping=None, workflow_mapping=None): self.data['properties']['files'].append({'value': shell_command}) self.data['properties']['shell_command'] = Hdfs.basename(shell_command) - elif self.data['type'] == MapReduceDocumentAction.TYPE: notebook = Notebook(document=Document2.objects.get_by_uuid(user=self.user, uuid=self.data['properties']['uuid'])) action = notebook.get_data()['snippets'][0] @@ -1090,9 +1080,9 @@ def get_template_name(self, mapping=None): node_type = ShellAction.TYPE elif self.data['type'] == AltusAction.TYPE: node_type = ShellAction.TYPE - elif mapping.get('cluster') and 'document' in node_type: # Workflow + elif mapping.get('cluster') and 'document' in node_type: # Workflow node_type = ShellAction.TYPE - elif mapping.get('auto-cluster') and 'document' in node_type: # Scheduled workflow + elif mapping.get('auto-cluster') and 'document' in node_type: # Scheduled workflow node_type = ShellAction.TYPE return 'editor2/gen/workflow-%s.xml.mako' % node_type @@ -1347,7 +1337,7 @@ class HiveAction(Action): 'name': 'parameters', 'label': _('Parameters'), 'value': [], - 'help_text': _('The %(type)s parameters of the script. E.g. N=5, INPUT=${inputDir}') % {'type': TYPE.title()}, + 'help_text': _('The %(type)s parameters of the script. E.g. N=5, INPUT=${inputDir}') % {'type': TYPE.title()}, 'type': '' }, # Common @@ -1432,7 +1422,7 @@ class HiveServer2Action(Action): 'name': 'parameters', 'label': _('Parameters'), 'value': [], - 'help_text': _('The %(type)s parameters of the script. E.g. N=5, INPUT=${inputDir}') % {'type': TYPE.title()}, + 'help_text': _('The %(type)s parameters of the script. E.g. N=5, INPUT=${inputDir}') % {'type': TYPE.title()}, 'type': '' }, 'arguments': { @@ -1529,7 +1519,7 @@ def _get_impala_url(): class ImpalaAction(HiveServer2Action): # Executed as shell action until Oozie supports an Impala Action TYPE = 'impala' - DEFAULT_CREDENTIALS = '' # None at this time, need to upload user keytab + DEFAULT_CREDENTIALS = '' # None at this time, need to upload user keytab FIELDS = HiveServer2Action.FIELDS.copy() del FIELDS['jdbc_url'] @@ -2258,7 +2248,6 @@ def get_mandatory_fields(cls): return [cls.FIELDS['files'], cls.FIELDS['jars']] - class AltusAction(Action): TYPE = 'altus' FIELDS = { @@ -2441,7 +2430,7 @@ def get_mandatory_fields(cls): class ImpalaDocumentAction(HiveDocumentAction): TYPE = 'impala-document' - DEFAULT_CREDENTIALS = '' # None at this time, need to upload user keytab + DEFAULT_CREDENTIALS = '' # None at this time, need to upload user keytab FIELDS = HiveServer2Action.FIELDS.copy() del FIELDS['jdbc_url'] @@ -2606,11 +2595,11 @@ class SparkDocumentAction(Action): 'value': [], 'help_text': _('Arguments, one by one, e.g. 1000, /path/a.') }, - 'parameters': { # For Oozie Action Document + 'parameters': { # For Oozie Action Document 'name': 'parameters', 'label': _('Parameters'), 'value': [], - 'help_text': _('The %(type)s parameters of the script. E.g. N=5, INPUT=${inputDir}') % {'type': TYPE.title()}, + 'help_text': _('The %(type)s parameters of the script. E.g. N=5, INPUT=${inputDir}') % {'type': TYPE.title()}, 'type': '' }, # Common @@ -2668,7 +2657,7 @@ class PigDocumentAction(Action): 'name': 'parameters', 'label': _('Parameters'), 'value': [], - 'help_text': _('The %(type)s parameters of the script. E.g. N=5, INPUT=${inputDir}') % {'type': TYPE.title()}, + 'help_text': _('The %(type)s parameters of the script. E.g. N=5, INPUT=${inputDir}') % {'type': TYPE.title()}, 'type': '' }, # Common @@ -2742,7 +2731,7 @@ class SqoopDocumentAction(Action): 'name': 'parameters', 'label': _('Parameters'), 'value': [], - 'help_text': _('The %(type)s parameters of the script. E.g. N=5, INPUT=${inputDir}') % {'type': TYPE.title()}, + 'help_text': _('The %(type)s parameters of the script. E.g. N=5, INPUT=${inputDir}') % {'type': TYPE.title()}, 'type': '' }, # Common @@ -2816,7 +2805,7 @@ class DistCpDocumentAction(Action): 'name': 'parameters', 'label': _('Parameters'), 'value': [], - 'help_text': _('The %(type)s parameters of the script. E.g. N=5, INPUT=${inputDir}') % {'type': TYPE.title()}, + 'help_text': _('The %(type)s parameters of the script. E.g. N=5, INPUT=${inputDir}') % {'type': TYPE.title()}, 'type': '' }, # Common @@ -2873,7 +2862,7 @@ class ShellDocumentAction(Action): 'name': 'parameters', 'label': _('Parameters'), 'value': [], - 'help_text': _('The %(type)s parameters of the script. E.g. N=5, INPUT=${inputDir}') % {'type': TYPE.title()}, + 'help_text': _('The %(type)s parameters of the script. E.g. N=5, INPUT=${inputDir}') % {'type': TYPE.title()}, 'type': '' }, # Common @@ -2947,7 +2936,7 @@ class MapReduceDocumentAction(Action): 'name': 'parameters', 'label': _('Parameters'), 'value': [], - 'help_text': _('The %(type)s parameters of the script. E.g. N=5, INPUT=${inputDir}') % {'type': TYPE.title()}, + 'help_text': _('The %(type)s parameters of the script. E.g. N=5, INPUT=${inputDir}') % {'type': TYPE.title()}, 'type': '' }, # Common @@ -3057,7 +3046,6 @@ def get_mandatory_fields(cls): WORKFLOW_NODE_PROPERTIES.update(node.FIELDS) - def find_parameters(instance, fields=None): """Find parameters in the given fields""" if fields is None: @@ -3078,6 +3066,7 @@ def find_parameters(instance, fields=None): return params + def find_json_parameters(fields): # Input is list of json dict params = [] @@ -3092,13 +3081,15 @@ def find_json_parameters(fields): return params + def find_dollar_variables(text): - return re.findall('[^\n\\\\]\$([^\{ \'\"\-;\(\)]+)', text, re.MULTILINE) + return re.findall('[^\n\\\\]\\$([^\\{ \'\"\\-;\\(\\)]+)', text, re.MULTILINE) + def find_dollar_braced_variables(text): vars = set() - for var in re.findall('\$\{([A-Za-z0-9:_-]+)\}', text, re.MULTILINE): + for var in re.findall(r'\$\{([A-Za-z0-9:_-]+)\}', text, re.MULTILINE): if ':' in var: var = var.split(':', 1)[1] vars.add(var) @@ -3162,9 +3153,9 @@ def _create_layout(nodes, size=12): wf_rows = [] for node in nodes: - if type(node) == list and len(node) == 1: + if type(node) is list and len(node) == 1: node = node[0] - if type(node) != list: + if type(node) is not list: wf_rows.append({ "widgets": [{ "size": size, "name": node.name.title(), "id": uuids[node.id], "widgetType": "%s-widget" % node.node_type, @@ -3193,7 +3184,7 @@ def _create_layout(nodes, size=12): "widgets": c['widgets'], "columns": [] } - for c in col] if type(col) == list else [{ + for c in col] if type(col) is list else [{ "id": str(uuid.uuid4()), "widgets": col['widgets'], "columns": [] @@ -3222,11 +3213,10 @@ def _create_layout(nodes, size=12): if wf_rows: data['layout'][0]['rows'] = [data['layout'][0]['rows'][0]] + wf_rows + [data['layout'][0]['rows'][-1]] - # Content def _dig_nodes(nodes): for node in nodes: - if type(node) != list: + if type(node) is not list: properties = {} if '%s-widget' % node.node_type in NODES: properties = dict(NODES['%s-widget' % node.node_type].get_fields()) @@ -3368,7 +3358,6 @@ def _dig_nodes(nodes): return Workflow(data=json.dumps(data)) - class Coordinator(Job): XML_FILE_NAME = 'coordinator.xml' PROPERTY_APP_PATH = 'oozie.coord.application.path' @@ -3388,7 +3377,7 @@ def __init__(self, data=None, json_data=None, document=None): 'id': None, 'uuid': None, 'name': 'My Schedule', - 'variables': [], # Aka workflow parameters + 'variables': [], # Aka workflow parameters 'properties': { 'description': '', 'deployment_dir': '', @@ -3429,11 +3418,11 @@ def get_data_for_json(self): _data = self.data.copy() start_date = [a for a in self._data['properties']['parameters'] if a['name'] == 'start_date'] - if start_date and type(start_date[0]['value']) == datetime: + if start_date and type(start_date[0]['value']) is datetime: start_date[0]['value'] = start_date[0]['value'].strftime('%Y-%m-%dT%H:%M:%S') end_date = [a for a in self._data['properties']['parameters'] if a['name'] == 'end_date'] - if end_date and type(end_date[0]['value']) == datetime: + if end_date and type(end_date[0]['value']) is datetime: end_date[0]['value'] = end_date[0]['value'].strftime('%Y-%m-%dT%H:%M:%S') return _data @@ -3446,10 +3435,10 @@ def to_json_for_html(self): @property def data(self): - if type(self._data['properties']['start']) != datetime and not '$' in self._data['properties']['start']: + if type(self._data['properties']['start']) is not datetime and '$' not in self._data['properties']['start']: self._data['properties']['start'] = parse(self._data['properties']['start']) - if type(self._data['properties']['end']) != datetime and not '$' in self._data['properties']['end']: + if type(self._data['properties']['end']) is not datetime and '$' not in self._data['properties']['end']: self._data['properties']['end'] = parse(self._data['properties']['end']) if self.document is not None: @@ -3462,7 +3451,7 @@ def data(self): @property def name(self): - from notebook.connectors.oozie_batch import OozieApi # Import dependency + from notebook.connectors.oozie_batch import OozieApi # Import dependency if self.data['properties']['document']: return _("%s for %s") % (OozieApi.SCHEDULE_JOB_PREFIX, self.data['name'] or self.data['type']) @@ -3495,7 +3484,7 @@ def find_parameters(self): # Get missed params from workflow for prop in self.workflow.find_parameters(): - if not prop in params: + if prop not in params: params.add(prop) # Remove the ones filled up by coordinator @@ -3566,7 +3555,7 @@ def to_xml(self, mapping=None): tmpl = "editor2/gen/coordinator.xml.mako" return re.sub( - re.compile('\s*\n+', re.MULTILINE), '\n', django_mako.render_to_string(tmpl, {'coord': self, 'mapping': mapping}) + re.compile('\\s*\n+', re.MULTILINE), '\n', django_mako.render_to_string(tmpl, {'coord': self, 'mapping': mapping}) ).encode('utf-8', 'xmlcharrefreplace') def clear_workflow_params(self): @@ -3575,7 +3564,7 @@ def clear_workflow_params(self): @property def properties(self): - props = [{'name': dataset['workflow_variable'], 'value': dataset['dataset_variable']} \ + props = [{'name': dataset['workflow_variable'], 'value': dataset['dataset_variable']} for dataset in self.data['variables'] if dataset['dataset_type'] == 'parameter'] props += self.data['properties']['parameters'] return props @@ -3681,7 +3670,6 @@ def is_advanced_end_instance(self): return not self.is_int(self.data['advanced_end_instance']) - class Bundle(Job): XML_FILE_NAME = 'bundle.xml' PROPERTY_APP_PATH = 'oozie.bundle.application.path' @@ -3735,7 +3723,7 @@ def to_json_for_html(self): @property def data(self): - if type(self._data['properties']['kickoff']) == str and sys.version_info[2] == 2: + if type(self._data['properties']['kickoff']) is str and sys.version_info[2] == 2: self._data['properties']['kickoff'] = parse(self._data['properties']['kickoff']) if self.document is not None: @@ -3750,7 +3738,7 @@ def to_xml(self, mapping=None): mapping.update(dict(list(self.get_coordinator_docs().values('uuid', 'name')))) tmpl = "editor2/gen/bundle.xml.mako" return force_str( - re.sub(re.compile('\s*\n+', re.MULTILINE), '\n', django_mako.render_to_string(tmpl, { + re.sub(re.compile('\\s*\n+', re.MULTILINE), '\n', django_mako.render_to_string(tmpl, { 'bundle': self, 'mapping': mapping }))) @@ -3794,7 +3782,7 @@ def find_parameters(self): params.add(param) # Remove the ones filled up by bundle - removable_names = [p['name'] for coord in self.data['coordinators'] for p in coord['properties']] + removable_names = [p['name'] for coord in self.data['coordinators'] for p in coord['properties']] return dict([(param, '') for param in list(params) if param not in removable_names]) @@ -3864,7 +3852,7 @@ def _save_workflow(workflow, layout, user, fs=None): dependency_docs = Document2.objects.filter(uuid__in=dependencies) workflow_doc.dependencies.add(*dependency_docs) - if workflow['properties'].get('imported'): # We convert from and old workflow format (3.8 <) to the latest + if workflow['properties'].get('imported'): # We convert from and old workflow format (3.8 <) to the latest workflow['properties']['imported'] = False workflow_instance = Workflow(workflow=workflow, user=user) _import_workspace(fs, user, workflow_instance) @@ -3922,7 +3910,6 @@ def create_workflow(self, user, document=None, name=None, managed=False): return workflow_doc - def create_notebook_workflow(self, user, notebook=None, name=None, managed=False): nodes = [] @@ -3945,7 +3932,7 @@ def create_notebook_workflow(self, user, notebook=None, name=None, managed=False nodes.append(node) - workflow_doc = self.get_workflow(nodes, name, notebook['uuid'], user, managed=managed) # TODO optionally save + workflow_doc = self.get_workflow(nodes, name, notebook['uuid'], user, managed=managed) # TODO optionally save return workflow_doc @@ -3963,7 +3950,7 @@ def _get_hive_node(self, node_id, user, is_document_node=False): return { u'id': node_id, u'name': u'hive-%s' % node_id[:4], - u"type": u"hive-document-widget", # if is_document_node else u"hive2-widget", + u"type": u"hive-document-widget", # if is_document_node else u"hive2-widget", u'properties': { u'files': [], u'job_xml': u'', @@ -4113,7 +4100,7 @@ def get_spark_snippet_node(self, snippet, user): node = self._get_spark_node(node_id, user) node['properties']['class'] = snippet['properties']['class'] - node['properties']['jars'] = snippet['properties']['app_jar'] # Not used, submission add it to oozie.libpath instead + node['properties']['jars'] = snippet['properties']['app_jar'] # Not used, submission add it to oozie.libpath instead node['properties']['files'] = [{'value': f['path']} for f in snippet['properties']['files']] node['properties']['spark_opts'] = snippet['properties']['spark_opts'] node['properties']['spark_arguments'] = [{'value': f} for f in snippet['properties']['arguments']] @@ -4427,7 +4414,7 @@ def get_java_snippet_node(self, snippet): node = self._get_java_node(node_id, credentials) node['properties']['main_class'] = snippet['properties']['class'] - node['properties']['app_jar'] = snippet['properties']['app_jar'] # Not used, submission add it to oozie.libpath instead + node['properties']['app_jar'] = snippet['properties']['app_jar'] # Not used, submission add it to oozie.libpath instead node['properties']['files'] = [{'value': f['path']} for f in snippet['properties']['files']] node['properties']['arguments'] = [{'value': f} for f in snippet['properties']['arguments']] @@ -4520,7 +4507,7 @@ def get_workflow(self, nodes, name, doc_uuid, user, managed=False): for node in nodes: data['workflow']['nodes'].append(node) - _prev_node['children'][0]['to'] = node['id'] # We link nodes + _prev_node['children'][0]['to'] = node['id'] # We link nodes _prev_node = node workflow_doc = _save_workflow(data['workflow'], {}, user) diff --git a/apps/oozie/src/oozie/models2_tests.py b/apps/oozie/src/oozie/models2_tests.py index a550f1a4e27..76ba72ca4f6 100644 --- a/apps/oozie/src/oozie/models2_tests.py +++ b/apps/oozie/src/oozie/models2_tests.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -## -*- coding: utf-8 -*- +# -*- coding: utf-8 -*- # Licensed to Cloudera, Inc. under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information @@ -16,36 +16,43 @@ # See the License for the specific language governing permissions and # limitations under the License. -from builtins import str, object -import json -import logging -import pytest import re import sys +import json +import logging +from builtins import object, str -from django.urls import reverse +import pytest from django.db.models import Q +from django.urls import reverse from desktop.conf import USE_DEFAULT_CONFIGURATION, USE_NEW_EDITOR from desktop.lib.django_test_util import make_logged_in_client from desktop.lib.test_utils import add_permission, add_to_group, grant_access, remove_from_group from desktop.models import DefaultConfiguration, Document, Document2 -from notebook.models import make_notebook, make_notebook2 from notebook.api import _save_notebook -from useradmin.models import User - +from notebook.models import make_notebook, make_notebook2 from oozie.conf import ENABLE_V2 from oozie.importlib.workflows import generate_v2_graph_nodes -from oozie.models2 import Node, Workflow, WorkflowConfiguration, find_dollar_variables, find_dollar_braced_variables, \ - _create_graph_adjaceny_list, _get_hierarchy_from_adj_list, WorkflowBuilder, WorkflowDepthReached -from oozie.tests import OozieMockBase, save_temp_workflow, MockOozieApi - +from oozie.models2 import ( + Node, + Workflow, + WorkflowBuilder, + WorkflowConfiguration, + WorkflowDepthReached, + _create_graph_adjaceny_list, + _get_hierarchy_from_adj_list, + find_dollar_braced_variables, + find_dollar_variables, +) +from oozie.tests import MockOozieApi, OozieMockBase, save_temp_workflow +from useradmin.models import User LOG = logging.getLogger() + @pytest.mark.django_db class TestEditor(OozieMockBase): - def setup_method(self): super(TestEditor, self).setup_method() self.wf = Workflow() @@ -53,23 +60,19 @@ def setup_method(self): self.client_not_me = make_logged_in_client(username="not_perm_user", groupname="default", recreate=True, is_superuser=False) self.user_not_me = User.objects.get(username="not_perm_user") - @pytest.mark.integration def test_create_new_workflow(self): response = self.c.get(reverse('oozie:new_workflow')) assert 200 == response.status_code - def test_create_new_coordinator(self): response = self.c.get(reverse('oozie:new_coordinator')) assert 200 == response.status_code - def test_create_new_bundle(self): response = self.c.get(reverse('oozie:new_bundle')) assert 200 == response.status_code - def test_parsing(self): assert ['input', 'LIMIT', 'out'] == find_dollar_variables(""" data = '$input'; @@ -86,143 +89,256 @@ def test_parsing(self): ORDER BY sample_07.salary DESC LIMIT $limit""") - def test_hive_script_parsing(self): - assert sorted(['field', 'tablename', 'LIMIT']) == sorted(find_dollar_braced_variables(""" + assert sorted(['field', 'tablename', 'LIMIT']) == sorted( + find_dollar_braced_variables(""" SELECT ${field} FROM ${hivevar:tablename} LIMIT ${hiveconf:LIMIT} - """)) - - assert sorted(['field', 'tablename', 'LIMIT']) == sorted(find_dollar_braced_variables("SELECT ${field} FROM ${hivevar:tablename} LIMIT ${hiveconf:LIMIT}")) + """) + ) + assert sorted(['field', 'tablename', 'LIMIT']) == sorted( + find_dollar_braced_variables("SELECT ${field} FROM ${hivevar:tablename} LIMIT ${hiveconf:LIMIT}") + ) def test_workflow_gen_xml(self): - assert ([ - u'', u'', u'', u'Action', u'failed,', - u'error', u'message[${wf:errorMessage(wf:lastErrorNode())}]', u'', u'', u''] == - self.wf.to_xml({'output': '/path'}).split()) + assert [ + '', + '', + '', + 'Action', + 'failed,', + 'error', + 'message[${wf:errorMessage(wf:lastErrorNode())}]', + '', + '', + '', + ] == self.wf.to_xml({'output': '/path'}).split() def test_workflow_map_reduce_gen_xml(self): - wf = Workflow(data="{\"layout\": [{\"oozieRows\": [{\"enableOozieDropOnBefore\": true, \"enableOozieDropOnSide\": true, \"enableOozieDrop\": false, \"widgets\": [{\"status\": \"\", \"logsURL\": \"\", \"name\": \"MapReduce job\", \"widgetType\": \"mapreduce-widget\", \"oozieMovable\": true, \"ooziePropertiesExpanded\": false, \"properties\": {}, \"isLoading\": true, \"offset\": 0, \"actionURL\": \"\", \"progress\": 0, \"klass\": \"card card-widget span12\", \"oozieExpanded\": false, \"id\": \"0cf2d5d5-2315-0bda-bd53-0eec257e943f\", \"size\": 12}], \"id\": \"e2caca14-8afc-d7e0-287c-88accd0b4253\", \"columns\": []}], \"rows\": [{\"enableOozieDropOnBefore\": true, \"enableOozieDropOnSide\": true, \"enableOozieDrop\": false, \"widgets\": [{\"status\": \"\", \"logsURL\": \"\", \"name\": \"Start\", \"widgetType\": \"start-widget\", \"oozieMovable\": false, \"ooziePropertiesExpanded\": false, \"properties\": {}, \"isLoading\": true, \"offset\": 0, \"actionURL\": \"\", \"progress\": 0, \"klass\": \"card card-widget span12\", \"oozieExpanded\": false, \"id\": \"3f107997-04cc-8733-60a9-a4bb62cebffc\", \"size\": 12}], \"id\": \"ff63ee3f-df54-2fa3-477b-65f5e0f0632c\", \"columns\": []}, {\"enableOozieDropOnBefore\": true, \"enableOozieDropOnSide\": true, \"enableOozieDrop\": false, \"widgets\": [{\"status\": \"\", \"logsURL\": \"\", \"name\": \"MapReduce job\", \"widgetType\": \"mapreduce-widget\", \"oozieMovable\": true, \"ooziePropertiesExpanded\": false, \"properties\": {}, \"isLoading\": true, \"offset\": 0, \"actionURL\": \"\", \"progress\": 0, \"klass\": \"card card-widget span12\", \"oozieExpanded\": false, \"id\": \"0cf2d5d5-2315-0bda-bd53-0eec257e943f\", \"size\": 12}], \"id\": \"e2caca14-8afc-d7e0-287c-88accd0b4253\", \"columns\": []}, {\"enableOozieDropOnBefore\": true, \"enableOozieDropOnSide\": true, \"enableOozieDrop\": false, \"widgets\": [{\"status\": \"\", \"logsURL\": \"\", \"name\": \"End\", \"widgetType\": \"end-widget\", \"oozieMovable\": false, \"ooziePropertiesExpanded\": false, \"properties\": {}, \"isLoading\": true, \"offset\": 0, \"actionURL\": \"\", \"progress\": 0, \"klass\": \"card card-widget span12\", \"oozieExpanded\": false, \"id\": \"33430f0f-ebfa-c3ec-f237-3e77efa03d0a\", \"size\": 12}], \"id\": \"6a13d869-d04c-8431-6c5c-dbe67ea33889\", \"columns\": []}, {\"enableOozieDropOnBefore\": true, \"enableOozieDropOnSide\": true, \"enableOozieDrop\": false, \"widgets\": [{\"status\": \"\", \"logsURL\": \"\", \"name\": \"Kill\", \"widgetType\": \"kill-widget\", \"oozieMovable\": true, \"ooziePropertiesExpanded\": false, \"properties\": {}, \"isLoading\": true, \"offset\": 0, \"actionURL\": \"\", \"progress\": 0, \"klass\": \"card card-widget span12\", \"oozieExpanded\": false, \"id\": \"17c9c895-5a16-7443-bb81-f34b30b21548\", \"size\": 12}], \"id\": \"e3b56553-7a4f-43d2-b1e2-4dc433280095\", \"columns\": []}], \"oozieEndRow\": {\"enableOozieDropOnBefore\": true, \"enableOozieDropOnSide\": true, \"enableOozieDrop\": false, \"widgets\": [{\"status\": \"\", \"logsURL\": \"\", \"name\": \"End\", \"widgetType\": \"end-widget\", \"oozieMovable\": false, \"ooziePropertiesExpanded\": false, \"properties\": {}, \"isLoading\": true, \"offset\": 0, \"actionURL\": \"\", \"progress\": 0, \"klass\": \"card card-widget span12\", \"oozieExpanded\": false, \"id\": \"33430f0f-ebfa-c3ec-f237-3e77efa03d0a\", \"size\": 12}], \"id\": \"6a13d869-d04c-8431-6c5c-dbe67ea33889\", \"columns\": []}, \"oozieKillRow\": {\"enableOozieDropOnBefore\": true, \"enableOozieDropOnSide\": true, \"enableOozieDrop\": false, \"widgets\": [{\"status\": \"\", \"logsURL\": \"\", \"name\": \"Kill\", \"widgetType\": \"kill-widget\", \"oozieMovable\": true, \"ooziePropertiesExpanded\": false, \"properties\": {}, \"isLoading\": true, \"offset\": 0, \"actionURL\": \"\", \"progress\": 0, \"klass\": \"card card-widget span12\", \"oozieExpanded\": false, \"id\": \"17c9c895-5a16-7443-bb81-f34b30b21548\", \"size\": 12}], \"id\": \"e3b56553-7a4f-43d2-b1e2-4dc433280095\", \"columns\": []}, \"enableOozieDropOnAfter\": true, \"oozieStartRow\": {\"enableOozieDropOnBefore\": true, \"enableOozieDropOnSide\": true, \"enableOozieDrop\": false, \"widgets\": [{\"status\": \"\", \"logsURL\": \"\", \"name\": \"Start\", \"widgetType\": \"start-widget\", \"oozieMovable\": false, \"ooziePropertiesExpanded\": false, \"properties\": {}, \"isLoading\": true, \"offset\": 0, \"actionURL\": \"\", \"progress\": 0, \"klass\": \"card card-widget span12\", \"oozieExpanded\": false, \"id\": \"3f107997-04cc-8733-60a9-a4bb62cebffc\", \"size\": 12}], \"id\": \"ff63ee3f-df54-2fa3-477b-65f5e0f0632c\", \"columns\": []}, \"klass\": \"card card-home card-column span12\", \"enableOozieDropOnBefore\": true, \"drops\": [\"temp\"], \"id\": \"0c1908e7-0096-46e7-a16b-b17b1142a730\", \"size\": 12}], \"workflow\": {\"properties\": {\"job_xml\": \"\", \"description\": \"\", \"wf1_id\": null, \"sla_enabled\": false, \"deployment_dir\": \"/user/hue/oozie/workspaces/hue-oozie-1430228904.58\", \"schema_version\": \"uri:oozie:workflow:0.5\", \"sla\": [{\"key\": \"enabled\", \"value\": false}, {\"key\": \"nominal-time\", \"value\": \"${nominal_time}\"}, {\"key\": \"should-start\", \"value\": \"\"}, {\"key\": \"should-end\", \"value\": \"${30 * MINUTES}\"}, {\"key\": \"max-duration\", \"value\": \"\"}, {\"key\": \"alert-events\", \"value\": \"\"}, {\"key\": \"alert-contact\", \"value\": \"\"}, {\"key\": \"notification-msg\", \"value\": \"\"}, {\"key\": \"upstream-apps\", \"value\": \"\"}], \"show_arrows\": true, \"parameters\": [{\"name\": \"oozie.use.system.libpath\", \"value\": true}], \"properties\": []}, \"name\": \"My Workflow\", \"versions\": [\"uri:oozie:workflow:0.4\", \"uri:oozie:workflow:0.4.5\", \"uri:oozie:workflow:0.5\"], \"isDirty\": true, \"movedNode\": null, \"linkMapping\": {\"0cf2d5d5-2315-0bda-bd53-0eec257e943f\": [\"33430f0f-ebfa-c3ec-f237-3e77efa03d0a\"], \"33430f0f-ebfa-c3ec-f237-3e77efa03d0a\": [], \"3f107997-04cc-8733-60a9-a4bb62cebffc\": [\"0cf2d5d5-2315-0bda-bd53-0eec257e943f\"], \"17c9c895-5a16-7443-bb81-f34b30b21548\": []}, \"nodeIds\": [\"3f107997-04cc-8733-60a9-a4bb62cebffc\", \"33430f0f-ebfa-c3ec-f237-3e77efa03d0a\", \"17c9c895-5a16-7443-bb81-f34b30b21548\", \"0cf2d5d5-2315-0bda-bd53-0eec257e943f\"], \"nodes\": [{\"properties\": {}, \"name\": \"Start\", \"children\": [{\"to\": \"0cf2d5d5-2315-0bda-bd53-0eec257e943f\"}], \"actionParametersFetched\": false, \"type\": \"start-widget\", \"id\": \"3f107997-04cc-8733-60a9-a4bb62cebffc\", \"actionParameters\": []}, {\"properties\": {}, \"name\": \"End\", \"children\": [], \"actionParametersFetched\": false, \"type\": \"end-widget\", \"id\": \"33430f0f-ebfa-c3ec-f237-3e77efa03d0a\", \"actionParameters\": []}, {\"properties\": {\"message\": \"Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]\"}, \"name\": \"Kill\", \"children\": [], \"actionParametersFetched\": false, \"type\": \"kill-widget\", \"id\": \"17c9c895-5a16-7443-bb81-f34b30b21548\", \"actionParameters\": []}, {\"properties\": {\"retry_max\": [{\"value\": \"5\"}], \"files\": [], \"job_xml\": \"\", \"jar_path\": \"my_jar\", \"job_properties\": [{\"name\": \"prop_1_name\", \"value\": \"prop_1_value\"}], \"archives\": [], \"prepares\": [], \"credentials\": [], \"sla\": [{\"key\": \"enabled\", \"value\": false}, {\"key\": \"nominal-time\", \"value\": \"${nominal_time}\"}, {\"key\": \"should-start\", \"value\": \"\"}, {\"key\": \"should-end\", \"value\": \"${30 * MINUTES}\"}, {\"key\": \"max-duration\", \"value\": \"\"}, {\"key\": \"alert-events\", \"value\": \"\"}, {\"key\": \"alert-contact\", \"value\": \"\"}, {\"key\": \"notification-msg\", \"value\": \"\"}, {\"key\": \"upstream-apps\", \"value\": \"\"}]}, \"name\": \"mapreduce-0cf2\", \"children\": [{\"to\": \"33430f0f-ebfa-c3ec-f237-3e77efa03d0a\"}, {\"error\": \"17c9c895-5a16-7443-bb81-f34b30b21548\"}], \"actionParametersFetched\": false, \"type\": \"mapreduce-widget\", \"id\": \"0cf2d5d5-2315-0bda-bd53-0eec257e943f\", \"actionParameters\": []}], \"id\": 50019, \"nodeNamesMapping\": {\"0cf2d5d5-2315-0bda-bd53-0eec257e943f\": \"mapreduce-0cf2\", \"33430f0f-ebfa-c3ec-f237-3e77efa03d0a\": \"End\", \"3f107997-04cc-8733-60a9-a4bb62cebffc\": \"Start\", \"17c9c895-5a16-7443-bb81-f34b30b21548\": \"Kill\"}, \"uuid\": \"084f4d4c-00f1-62d2-e27e-e153c1f9acfb\"}}") - - assert ([ - u'', - u'', - u'', u'Action', u'failed,', u'error', u'message[${wf:errorMessage(wf:lastErrorNode())}]', u'', - u'', - u'', - u'${jobTracker}', - u'${nameNode}', - u'', - u'', - u'prop_1_name', - u'prop_1_value', - u'', - u'', - u'', - u'', - u'', - u'', - u'', - u'' - ] == - wf.to_xml({'output': '/path'}).split()) + wf = Workflow( + data="{\"layout\": [{\"oozieRows\": [{\"enableOozieDropOnBefore\": true, \"enableOozieDropOnSide\": true, \"enableOozieDrop\": false, \"widgets\": [{\"status\": \"\", \"logsURL\": \"\", \"name\": \"MapReduce job\", \"widgetType\": \"mapreduce-widget\", \"oozieMovable\": true, \"ooziePropertiesExpanded\": false, \"properties\": {}, \"isLoading\": true, \"offset\": 0, \"actionURL\": \"\", \"progress\": 0, \"klass\": \"card card-widget span12\", \"oozieExpanded\": false, \"id\": \"0cf2d5d5-2315-0bda-bd53-0eec257e943f\", \"size\": 12}], \"id\": \"e2caca14-8afc-d7e0-287c-88accd0b4253\", \"columns\": []}], \"rows\": [{\"enableOozieDropOnBefore\": true, \"enableOozieDropOnSide\": true, \"enableOozieDrop\": false, \"widgets\": [{\"status\": \"\", \"logsURL\": \"\", \"name\": \"Start\", \"widgetType\": \"start-widget\", \"oozieMovable\": false, \"ooziePropertiesExpanded\": false, \"properties\": {}, \"isLoading\": true, \"offset\": 0, \"actionURL\": \"\", \"progress\": 0, \"klass\": \"card card-widget span12\", \"oozieExpanded\": false, \"id\": \"3f107997-04cc-8733-60a9-a4bb62cebffc\", \"size\": 12}], \"id\": \"ff63ee3f-df54-2fa3-477b-65f5e0f0632c\", \"columns\": []}, {\"enableOozieDropOnBefore\": true, \"enableOozieDropOnSide\": true, \"enableOozieDrop\": false, \"widgets\": [{\"status\": \"\", \"logsURL\": \"\", \"name\": \"MapReduce job\", \"widgetType\": \"mapreduce-widget\", \"oozieMovable\": true, \"ooziePropertiesExpanded\": false, \"properties\": {}, \"isLoading\": true, \"offset\": 0, \"actionURL\": \"\", \"progress\": 0, \"klass\": \"card card-widget span12\", \"oozieExpanded\": false, \"id\": \"0cf2d5d5-2315-0bda-bd53-0eec257e943f\", \"size\": 12}], \"id\": \"e2caca14-8afc-d7e0-287c-88accd0b4253\", \"columns\": []}, {\"enableOozieDropOnBefore\": true, \"enableOozieDropOnSide\": true, \"enableOozieDrop\": false, \"widgets\": [{\"status\": \"\", \"logsURL\": \"\", \"name\": \"End\", \"widgetType\": \"end-widget\", \"oozieMovable\": false, \"ooziePropertiesExpanded\": false, \"properties\": {}, \"isLoading\": true, \"offset\": 0, \"actionURL\": \"\", \"progress\": 0, \"klass\": \"card card-widget span12\", \"oozieExpanded\": false, \"id\": \"33430f0f-ebfa-c3ec-f237-3e77efa03d0a\", \"size\": 12}], \"id\": \"6a13d869-d04c-8431-6c5c-dbe67ea33889\", \"columns\": []}, {\"enableOozieDropOnBefore\": true, \"enableOozieDropOnSide\": true, \"enableOozieDrop\": false, \"widgets\": [{\"status\": \"\", \"logsURL\": \"\", \"name\": \"Kill\", \"widgetType\": \"kill-widget\", \"oozieMovable\": true, \"ooziePropertiesExpanded\": false, \"properties\": {}, \"isLoading\": true, \"offset\": 0, \"actionURL\": \"\", \"progress\": 0, \"klass\": \"card card-widget span12\", \"oozieExpanded\": false, \"id\": \"17c9c895-5a16-7443-bb81-f34b30b21548\", \"size\": 12}], \"id\": \"e3b56553-7a4f-43d2-b1e2-4dc433280095\", \"columns\": []}], \"oozieEndRow\": {\"enableOozieDropOnBefore\": true, \"enableOozieDropOnSide\": true, \"enableOozieDrop\": false, \"widgets\": [{\"status\": \"\", \"logsURL\": \"\", \"name\": \"End\", \"widgetType\": \"end-widget\", \"oozieMovable\": false, \"ooziePropertiesExpanded\": false, \"properties\": {}, \"isLoading\": true, \"offset\": 0, \"actionURL\": \"\", \"progress\": 0, \"klass\": \"card card-widget span12\", \"oozieExpanded\": false, \"id\": \"33430f0f-ebfa-c3ec-f237-3e77efa03d0a\", \"size\": 12}], \"id\": \"6a13d869-d04c-8431-6c5c-dbe67ea33889\", \"columns\": []}, \"oozieKillRow\": {\"enableOozieDropOnBefore\": true, \"enableOozieDropOnSide\": true, \"enableOozieDrop\": false, \"widgets\": [{\"status\": \"\", \"logsURL\": \"\", \"name\": \"Kill\", \"widgetType\": \"kill-widget\", \"oozieMovable\": true, \"ooziePropertiesExpanded\": false, \"properties\": {}, \"isLoading\": true, \"offset\": 0, \"actionURL\": \"\", \"progress\": 0, \"klass\": \"card card-widget span12\", \"oozieExpanded\": false, \"id\": \"17c9c895-5a16-7443-bb81-f34b30b21548\", \"size\": 12}], \"id\": \"e3b56553-7a4f-43d2-b1e2-4dc433280095\", \"columns\": []}, \"enableOozieDropOnAfter\": true, \"oozieStartRow\": {\"enableOozieDropOnBefore\": true, \"enableOozieDropOnSide\": true, \"enableOozieDrop\": false, \"widgets\": [{\"status\": \"\", \"logsURL\": \"\", \"name\": \"Start\", \"widgetType\": \"start-widget\", \"oozieMovable\": false, \"ooziePropertiesExpanded\": false, \"properties\": {}, \"isLoading\": true, \"offset\": 0, \"actionURL\": \"\", \"progress\": 0, \"klass\": \"card card-widget span12\", \"oozieExpanded\": false, \"id\": \"3f107997-04cc-8733-60a9-a4bb62cebffc\", \"size\": 12}], \"id\": \"ff63ee3f-df54-2fa3-477b-65f5e0f0632c\", \"columns\": []}, \"klass\": \"card card-home card-column span12\", \"enableOozieDropOnBefore\": true, \"drops\": [\"temp\"], \"id\": \"0c1908e7-0096-46e7-a16b-b17b1142a730\", \"size\": 12}], \"workflow\": {\"properties\": {\"job_xml\": \"\", \"description\": \"\", \"wf1_id\": null, \"sla_enabled\": false, \"deployment_dir\": \"/user/hue/oozie/workspaces/hue-oozie-1430228904.58\", \"schema_version\": \"uri:oozie:workflow:0.5\", \"sla\": [{\"key\": \"enabled\", \"value\": false}, {\"key\": \"nominal-time\", \"value\": \"${nominal_time}\"}, {\"key\": \"should-start\", \"value\": \"\"}, {\"key\": \"should-end\", \"value\": \"${30 * MINUTES}\"}, {\"key\": \"max-duration\", \"value\": \"\"}, {\"key\": \"alert-events\", \"value\": \"\"}, {\"key\": \"alert-contact\", \"value\": \"\"}, {\"key\": \"notification-msg\", \"value\": \"\"}, {\"key\": \"upstream-apps\", \"value\": \"\"}], \"show_arrows\": true, \"parameters\": [{\"name\": \"oozie.use.system.libpath\", \"value\": true}], \"properties\": []}, \"name\": \"My Workflow\", \"versions\": [\"uri:oozie:workflow:0.4\", \"uri:oozie:workflow:0.4.5\", \"uri:oozie:workflow:0.5\"], \"isDirty\": true, \"movedNode\": null, \"linkMapping\": {\"0cf2d5d5-2315-0bda-bd53-0eec257e943f\": [\"33430f0f-ebfa-c3ec-f237-3e77efa03d0a\"], \"33430f0f-ebfa-c3ec-f237-3e77efa03d0a\": [], \"3f107997-04cc-8733-60a9-a4bb62cebffc\": [\"0cf2d5d5-2315-0bda-bd53-0eec257e943f\"], \"17c9c895-5a16-7443-bb81-f34b30b21548\": []}, \"nodeIds\": [\"3f107997-04cc-8733-60a9-a4bb62cebffc\", \"33430f0f-ebfa-c3ec-f237-3e77efa03d0a\", \"17c9c895-5a16-7443-bb81-f34b30b21548\", \"0cf2d5d5-2315-0bda-bd53-0eec257e943f\"], \"nodes\": [{\"properties\": {}, \"name\": \"Start\", \"children\": [{\"to\": \"0cf2d5d5-2315-0bda-bd53-0eec257e943f\"}], \"actionParametersFetched\": false, \"type\": \"start-widget\", \"id\": \"3f107997-04cc-8733-60a9-a4bb62cebffc\", \"actionParameters\": []}, {\"properties\": {}, \"name\": \"End\", \"children\": [], \"actionParametersFetched\": false, \"type\": \"end-widget\", \"id\": \"33430f0f-ebfa-c3ec-f237-3e77efa03d0a\", \"actionParameters\": []}, {\"properties\": {\"message\": \"Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]\"}, \"name\": \"Kill\", \"children\": [], \"actionParametersFetched\": false, \"type\": \"kill-widget\", \"id\": \"17c9c895-5a16-7443-bb81-f34b30b21548\", \"actionParameters\": []}, {\"properties\": {\"retry_max\": [{\"value\": \"5\"}], \"files\": [], \"job_xml\": \"\", \"jar_path\": \"my_jar\", \"job_properties\": [{\"name\": \"prop_1_name\", \"value\": \"prop_1_value\"}], \"archives\": [], \"prepares\": [], \"credentials\": [], \"sla\": [{\"key\": \"enabled\", \"value\": false}, {\"key\": \"nominal-time\", \"value\": \"${nominal_time}\"}, {\"key\": \"should-start\", \"value\": \"\"}, {\"key\": \"should-end\", \"value\": \"${30 * MINUTES}\"}, {\"key\": \"max-duration\", \"value\": \"\"}, {\"key\": \"alert-events\", \"value\": \"\"}, {\"key\": \"alert-contact\", \"value\": \"\"}, {\"key\": \"notification-msg\", \"value\": \"\"}, {\"key\": \"upstream-apps\", \"value\": \"\"}]}, \"name\": \"mapreduce-0cf2\", \"children\": [{\"to\": \"33430f0f-ebfa-c3ec-f237-3e77efa03d0a\"}, {\"error\": \"17c9c895-5a16-7443-bb81-f34b30b21548\"}], \"actionParametersFetched\": false, \"type\": \"mapreduce-widget\", \"id\": \"0cf2d5d5-2315-0bda-bd53-0eec257e943f\", \"actionParameters\": []}], \"id\": 50019, \"nodeNamesMapping\": {\"0cf2d5d5-2315-0bda-bd53-0eec257e943f\": \"mapreduce-0cf2\", \"33430f0f-ebfa-c3ec-f237-3e77efa03d0a\": \"End\", \"3f107997-04cc-8733-60a9-a4bb62cebffc\": \"Start\", \"17c9c895-5a16-7443-bb81-f34b30b21548\": \"Kill\"}, \"uuid\": \"084f4d4c-00f1-62d2-e27e-e153c1f9acfb\"}}" # noqa: E501 + ) + + assert [ + '', + '', + '', + 'Action', + 'failed,', + 'error', + 'message[${wf:errorMessage(wf:lastErrorNode())}]', + '', + '', + '', + '${jobTracker}', + '${nameNode}', + '', + '', + 'prop_1_name', + 'prop_1_value', + '', + '', + '', + '', + '', + '', + '', + '', + ] == wf.to_xml({'output': '/path'}).split() def test_workflow_java_gen_xml(self): - wf = Workflow(data="{\"layout\": [{\"oozieRows\": [{\"enableOozieDropOnBefore\": true, \"enableOozieDropOnSide\": true, \"enableOozieDrop\": false, \"widgets\": [{\"status\": \"\", \"logsURL\": \"\", \"name\": \"Java program\", \"widgetType\": \"java-widget\", \"oozieMovable\": true, \"ooziePropertiesExpanded\": true, \"properties\": {}, \"isLoading\": true, \"offset\": 0, \"actionURL\": \"\", \"progress\": 0, \"klass\": \"card card-widget span12\", \"oozieExpanded\": false, \"id\": \"6ddafdc4-c070-95f0-4211-328e9f31daf6\", \"size\": 12}], \"id\": \"badb3c81-78d6-8099-38fc-87a9904ba78c\", \"columns\": []}], \"rows\": [{\"enableOozieDropOnBefore\": true, \"enableOozieDropOnSide\": true, \"enableOozieDrop\": false, \"widgets\": [{\"status\": \"\", \"logsURL\": \"\", \"name\": \"Start\", \"widgetType\": \"start-widget\", \"oozieMovable\": false, \"ooziePropertiesExpanded\": false, \"properties\": {}, \"isLoading\": true, \"offset\": 0, \"actionURL\": \"\", \"progress\": 0, \"klass\": \"card card-widget span12\", \"oozieExpanded\": false, \"id\": \"3f107997-04cc-8733-60a9-a4bb62cebffc\", \"size\": 12}], \"id\": \"adc3fe69-36eb-20f8-09ac-38fada1582b2\", \"columns\": []}, {\"enableOozieDropOnBefore\": true, \"enableOozieDropOnSide\": true, \"enableOozieDrop\": false, \"widgets\": [{\"status\": \"\", \"logsURL\": \"\", \"name\": \"Java program\", \"widgetType\": \"java-widget\", \"oozieMovable\": true, \"ooziePropertiesExpanded\": true, \"properties\": {}, \"isLoading\": true, \"offset\": 0, \"actionURL\": \"\", \"progress\": 0, \"klass\": \"card card-widget span12\", \"oozieExpanded\": false, \"id\": \"6ddafdc4-c070-95f0-4211-328e9f31daf6\", \"size\": 12}], \"id\": \"badb3c81-78d6-8099-38fc-87a9904ba78c\", \"columns\": []}, {\"enableOozieDropOnBefore\": true, \"enableOozieDropOnSide\": true, \"enableOozieDrop\": false, \"widgets\": [{\"status\": \"\", \"logsURL\": \"\", \"name\": \"End\", \"widgetType\": \"end-widget\", \"oozieMovable\": false, \"ooziePropertiesExpanded\": false, \"properties\": {}, \"isLoading\": true, \"offset\": 0, \"actionURL\": \"\", \"progress\": 0, \"klass\": \"card card-widget span12\", \"oozieExpanded\": false, \"id\": \"33430f0f-ebfa-c3ec-f237-3e77efa03d0a\", \"size\": 12}], \"id\": \"107bdacf-a37a-d69e-98dd-5801407cb57e\", \"columns\": []}, {\"enableOozieDropOnBefore\": true, \"enableOozieDropOnSide\": true, \"enableOozieDrop\": false, \"widgets\": [{\"status\": \"\", \"logsURL\": \"\", \"name\": \"Kill\", \"widgetType\": \"kill-widget\", \"oozieMovable\": true, \"ooziePropertiesExpanded\": false, \"properties\": {}, \"isLoading\": true, \"offset\": 0, \"actionURL\": \"\", \"progress\": 0, \"klass\": \"card card-widget span12\", \"oozieExpanded\": false, \"id\": \"17c9c895-5a16-7443-bb81-f34b30b21548\", \"size\": 12}], \"id\": \"81e1869c-a2c3-66d2-c703-719335ea45cb\", \"columns\": []}], \"oozieEndRow\": {\"enableOozieDropOnBefore\": true, \"enableOozieDropOnSide\": true, \"enableOozieDrop\": false, \"widgets\": [{\"status\": \"\", \"logsURL\": \"\", \"name\": \"End\", \"widgetType\": \"end-widget\", \"oozieMovable\": false, \"ooziePropertiesExpanded\": false, \"properties\": {}, \"isLoading\": true, \"offset\": 0, \"actionURL\": \"\", \"progress\": 0, \"klass\": \"card card-widget span12\", \"oozieExpanded\": false, \"id\": \"33430f0f-ebfa-c3ec-f237-3e77efa03d0a\", \"size\": 12}], \"id\": \"107bdacf-a37a-d69e-98dd-5801407cb57e\", \"columns\": []}, \"oozieKillRow\": {\"enableOozieDropOnBefore\": true, \"enableOozieDropOnSide\": true, \"enableOozieDrop\": false, \"widgets\": [{\"status\": \"\", \"logsURL\": \"\", \"name\": \"Kill\", \"widgetType\": \"kill-widget\", \"oozieMovable\": true, \"ooziePropertiesExpanded\": false, \"properties\": {}, \"isLoading\": true, \"offset\": 0, \"actionURL\": \"\", \"progress\": 0, \"klass\": \"card card-widget span12\", \"oozieExpanded\": false, \"id\": \"17c9c895-5a16-7443-bb81-f34b30b21548\", \"size\": 12}], \"id\": \"81e1869c-a2c3-66d2-c703-719335ea45cb\", \"columns\": []}, \"enableOozieDropOnAfter\": true, \"oozieStartRow\": {\"enableOozieDropOnBefore\": true, \"enableOozieDropOnSide\": true, \"enableOozieDrop\": false, \"widgets\": [{\"status\": \"\", \"logsURL\": \"\", \"name\": \"Start\", \"widgetType\": \"start-widget\", \"oozieMovable\": false, \"ooziePropertiesExpanded\": false, \"properties\": {}, \"isLoading\": true, \"offset\": 0, \"actionURL\": \"\", \"progress\": 0, \"klass\": \"card card-widget span12\", \"oozieExpanded\": false, \"id\": \"3f107997-04cc-8733-60a9-a4bb62cebffc\", \"size\": 12}], \"id\": \"adc3fe69-36eb-20f8-09ac-38fada1582b2\", \"columns\": []}, \"klass\": \"card card-home card-column span12\", \"enableOozieDropOnBefore\": true, \"drops\": [\"temp\"], \"id\": \"8e0f37a5-2dfb-7329-be44-78e60b2cf62b\", \"size\": 12}], \"workflow\": {\"properties\": {\"job_xml\": \"\", \"description\": \"\", \"wf1_id\": null, \"sla_enabled\": false, \"deployment_dir\": \"/user/hue/oozie/workspaces/hue-oozie-1449080135.8\", \"schema_version\": \"uri:oozie:workflow:0.5\", \"properties\": [], \"show_arrows\": true, \"parameters\": [{\"name\": \"oozie.use.system.libpath\", \"value\": true}], \"sla\": [{\"value\": false, \"key\": \"enabled\"}, {\"value\": \"${nominal_time}\", \"key\": \"nominal-time\"}, {\"value\": \"\", \"key\": \"should-start\"}, {\"value\": \"${30 * MINUTES}\", \"key\": \"should-end\"}, {\"value\": \"\", \"key\": \"max-duration\"}, {\"value\": \"\", \"key\": \"alert-events\"}, {\"value\": \"\", \"key\": \"alert-contact\"}, {\"value\": \"\", \"key\": \"notification-msg\"}, {\"value\": \"\", \"key\": \"upstream-apps\"}]}, \"name\": \"My Workflow\", \"versions\": [\"uri:oozie:workflow:0.4\", \"uri:oozie:workflow:0.4.5\", \"uri:oozie:workflow:0.5\"], \"isDirty\": false, \"movedNode\": null, \"linkMapping\": {\"6ddafdc4-c070-95f0-4211-328e9f31daf6\": [\"33430f0f-ebfa-c3ec-f237-3e77efa03d0a\"], \"33430f0f-ebfa-c3ec-f237-3e77efa03d0a\": [], \"3f107997-04cc-8733-60a9-a4bb62cebffc\": [\"6ddafdc4-c070-95f0-4211-328e9f31daf6\"], \"17c9c895-5a16-7443-bb81-f34b30b21548\": []}, \"nodeIds\": [\"3f107997-04cc-8733-60a9-a4bb62cebffc\", \"33430f0f-ebfa-c3ec-f237-3e77efa03d0a\", \"17c9c895-5a16-7443-bb81-f34b30b21548\", \"6ddafdc4-c070-95f0-4211-328e9f31daf6\"], \"nodes\": [{\"properties\": {}, \"name\": \"Start\", \"children\": [{\"to\": \"6ddafdc4-c070-95f0-4211-328e9f31daf6\"}], \"actionParametersFetched\": false, \"type\": \"start-widget\", \"id\": \"3f107997-04cc-8733-60a9-a4bb62cebffc\", \"actionParameters\": []}, {\"properties\": {}, \"name\": \"End\", \"children\": [], \"actionParametersFetched\": false, \"type\": \"end-widget\", \"id\": \"33430f0f-ebfa-c3ec-f237-3e77efa03d0a\", \"actionParameters\": []}, {\"properties\": {\"body\": \"\", \"cc\": \"\", \"to\": \"\", \"enableMail\": false, \"message\": \"Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]\", \"subject\": \"\"}, \"name\": \"Kill\", \"children\": [], \"actionParametersFetched\": false, \"type\": \"kill-widget\", \"id\": \"17c9c895-5a16-7443-bb81-f34b30b21548\", \"actionParameters\": []}, {\"properties\": {\"files\": [{\"value\": \"/my_file\"}], \"job_xml\": [], \"jar_path\": \"/my/jar\", \"java_opts\": [{\"value\": \"-Dsun.security.jgss.debug=true\"}], \"retry_max\": [], \"retry_interval\": [], \"job_properties\": [], \"capture_output\": false, \"main_class\": \"MyClass\", \"arguments\": [{\"value\": \"my_arg\"}], \"prepares\": [], \"credentials\": [], \"sla\": [{\"value\": false, \"key\": \"enabled\"}, {\"value\": \"${nominal_time}\", \"key\": \"nominal-time\"}, {\"value\": \"\", \"key\": \"should-start\"}, {\"value\": \"${30 * MINUTES}\", \"key\": \"should-end\"}, {\"value\": \"\", \"key\": \"max-duration\"}, {\"value\": \"\", \"key\": \"alert-events\"}, {\"value\": \"\", \"key\": \"alert-contact\"}, {\"value\": \"\", \"key\": \"notification-msg\"}, {\"value\": \"\", \"key\": \"upstream-apps\"}], \"archives\": []}, \"name\": \"java-6dda\", \"children\": [{\"to\": \"33430f0f-ebfa-c3ec-f237-3e77efa03d0a\"}, {\"error\": \"17c9c895-5a16-7443-bb81-f34b30b21548\"}], \"actionParametersFetched\": false, \"type\": \"java-widget\", \"id\": \"6ddafdc4-c070-95f0-4211-328e9f31daf6\", \"actionParameters\": []}], \"id\": 50247, \"nodeNamesMapping\": {\"6ddafdc4-c070-95f0-4211-328e9f31daf6\": \"java-6dda\", \"33430f0f-ebfa-c3ec-f237-3e77efa03d0a\": \"End\", \"3f107997-04cc-8733-60a9-a4bb62cebffc\": \"Start\", \"17c9c895-5a16-7443-bb81-f34b30b21548\": \"Kill\"}, \"uuid\": \"2667d60e-d894-c27b-6e6f-0333704c0989\"}}") - - assert ([ - u'', - u'', - u'', - u'Action', u'failed,', - u'error', u'message[${wf:errorMessage(wf:lastErrorNode())}]', - u'', - u'', - u'', - u'${jobTracker}', - u'${nameNode}', - u'MyClass', - u'-Dsun.security.jgss.debug=true', - u'my_arg', - u'/my_file#my_file', - u'', - u'', - u'', - u'', - u'', - u'' - ] == - wf.to_xml({'output': '/path'}).split()) + wf = Workflow( + data="{\"layout\": [{\"oozieRows\": [{\"enableOozieDropOnBefore\": true, \"enableOozieDropOnSide\": true, \"enableOozieDrop\": false, \"widgets\": [{\"status\": \"\", \"logsURL\": \"\", \"name\": \"Java program\", \"widgetType\": \"java-widget\", \"oozieMovable\": true, \"ooziePropertiesExpanded\": true, \"properties\": {}, \"isLoading\": true, \"offset\": 0, \"actionURL\": \"\", \"progress\": 0, \"klass\": \"card card-widget span12\", \"oozieExpanded\": false, \"id\": \"6ddafdc4-c070-95f0-4211-328e9f31daf6\", \"size\": 12}], \"id\": \"badb3c81-78d6-8099-38fc-87a9904ba78c\", \"columns\": []}], \"rows\": [{\"enableOozieDropOnBefore\": true, \"enableOozieDropOnSide\": true, \"enableOozieDrop\": false, \"widgets\": [{\"status\": \"\", \"logsURL\": \"\", \"name\": \"Start\", \"widgetType\": \"start-widget\", \"oozieMovable\": false, \"ooziePropertiesExpanded\": false, \"properties\": {}, \"isLoading\": true, \"offset\": 0, \"actionURL\": \"\", \"progress\": 0, \"klass\": \"card card-widget span12\", \"oozieExpanded\": false, \"id\": \"3f107997-04cc-8733-60a9-a4bb62cebffc\", \"size\": 12}], \"id\": \"adc3fe69-36eb-20f8-09ac-38fada1582b2\", \"columns\": []}, {\"enableOozieDropOnBefore\": true, \"enableOozieDropOnSide\": true, \"enableOozieDrop\": false, \"widgets\": [{\"status\": \"\", \"logsURL\": \"\", \"name\": \"Java program\", \"widgetType\": \"java-widget\", \"oozieMovable\": true, \"ooziePropertiesExpanded\": true, \"properties\": {}, \"isLoading\": true, \"offset\": 0, \"actionURL\": \"\", \"progress\": 0, \"klass\": \"card card-widget span12\", \"oozieExpanded\": false, \"id\": \"6ddafdc4-c070-95f0-4211-328e9f31daf6\", \"size\": 12}], \"id\": \"badb3c81-78d6-8099-38fc-87a9904ba78c\", \"columns\": []}, {\"enableOozieDropOnBefore\": true, \"enableOozieDropOnSide\": true, \"enableOozieDrop\": false, \"widgets\": [{\"status\": \"\", \"logsURL\": \"\", \"name\": \"End\", \"widgetType\": \"end-widget\", \"oozieMovable\": false, \"ooziePropertiesExpanded\": false, \"properties\": {}, \"isLoading\": true, \"offset\": 0, \"actionURL\": \"\", \"progress\": 0, \"klass\": \"card card-widget span12\", \"oozieExpanded\": false, \"id\": \"33430f0f-ebfa-c3ec-f237-3e77efa03d0a\", \"size\": 12}], \"id\": \"107bdacf-a37a-d69e-98dd-5801407cb57e\", \"columns\": []}, {\"enableOozieDropOnBefore\": true, \"enableOozieDropOnSide\": true, \"enableOozieDrop\": false, \"widgets\": [{\"status\": \"\", \"logsURL\": \"\", \"name\": \"Kill\", \"widgetType\": \"kill-widget\", \"oozieMovable\": true, \"ooziePropertiesExpanded\": false, \"properties\": {}, \"isLoading\": true, \"offset\": 0, \"actionURL\": \"\", \"progress\": 0, \"klass\": \"card card-widget span12\", \"oozieExpanded\": false, \"id\": \"17c9c895-5a16-7443-bb81-f34b30b21548\", \"size\": 12}], \"id\": \"81e1869c-a2c3-66d2-c703-719335ea45cb\", \"columns\": []}], \"oozieEndRow\": {\"enableOozieDropOnBefore\": true, \"enableOozieDropOnSide\": true, \"enableOozieDrop\": false, \"widgets\": [{\"status\": \"\", \"logsURL\": \"\", \"name\": \"End\", \"widgetType\": \"end-widget\", \"oozieMovable\": false, \"ooziePropertiesExpanded\": false, \"properties\": {}, \"isLoading\": true, \"offset\": 0, \"actionURL\": \"\", \"progress\": 0, \"klass\": \"card card-widget span12\", \"oozieExpanded\": false, \"id\": \"33430f0f-ebfa-c3ec-f237-3e77efa03d0a\", \"size\": 12}], \"id\": \"107bdacf-a37a-d69e-98dd-5801407cb57e\", \"columns\": []}, \"oozieKillRow\": {\"enableOozieDropOnBefore\": true, \"enableOozieDropOnSide\": true, \"enableOozieDrop\": false, \"widgets\": [{\"status\": \"\", \"logsURL\": \"\", \"name\": \"Kill\", \"widgetType\": \"kill-widget\", \"oozieMovable\": true, \"ooziePropertiesExpanded\": false, \"properties\": {}, \"isLoading\": true, \"offset\": 0, \"actionURL\": \"\", \"progress\": 0, \"klass\": \"card card-widget span12\", \"oozieExpanded\": false, \"id\": \"17c9c895-5a16-7443-bb81-f34b30b21548\", \"size\": 12}], \"id\": \"81e1869c-a2c3-66d2-c703-719335ea45cb\", \"columns\": []}, \"enableOozieDropOnAfter\": true, \"oozieStartRow\": {\"enableOozieDropOnBefore\": true, \"enableOozieDropOnSide\": true, \"enableOozieDrop\": false, \"widgets\": [{\"status\": \"\", \"logsURL\": \"\", \"name\": \"Start\", \"widgetType\": \"start-widget\", \"oozieMovable\": false, \"ooziePropertiesExpanded\": false, \"properties\": {}, \"isLoading\": true, \"offset\": 0, \"actionURL\": \"\", \"progress\": 0, \"klass\": \"card card-widget span12\", \"oozieExpanded\": false, \"id\": \"3f107997-04cc-8733-60a9-a4bb62cebffc\", \"size\": 12}], \"id\": \"adc3fe69-36eb-20f8-09ac-38fada1582b2\", \"columns\": []}, \"klass\": \"card card-home card-column span12\", \"enableOozieDropOnBefore\": true, \"drops\": [\"temp\"], \"id\": \"8e0f37a5-2dfb-7329-be44-78e60b2cf62b\", \"size\": 12}], \"workflow\": {\"properties\": {\"job_xml\": \"\", \"description\": \"\", \"wf1_id\": null, \"sla_enabled\": false, \"deployment_dir\": \"/user/hue/oozie/workspaces/hue-oozie-1449080135.8\", \"schema_version\": \"uri:oozie:workflow:0.5\", \"properties\": [], \"show_arrows\": true, \"parameters\": [{\"name\": \"oozie.use.system.libpath\", \"value\": true}], \"sla\": [{\"value\": false, \"key\": \"enabled\"}, {\"value\": \"${nominal_time}\", \"key\": \"nominal-time\"}, {\"value\": \"\", \"key\": \"should-start\"}, {\"value\": \"${30 * MINUTES}\", \"key\": \"should-end\"}, {\"value\": \"\", \"key\": \"max-duration\"}, {\"value\": \"\", \"key\": \"alert-events\"}, {\"value\": \"\", \"key\": \"alert-contact\"}, {\"value\": \"\", \"key\": \"notification-msg\"}, {\"value\": \"\", \"key\": \"upstream-apps\"}]}, \"name\": \"My Workflow\", \"versions\": [\"uri:oozie:workflow:0.4\", \"uri:oozie:workflow:0.4.5\", \"uri:oozie:workflow:0.5\"], \"isDirty\": false, \"movedNode\": null, \"linkMapping\": {\"6ddafdc4-c070-95f0-4211-328e9f31daf6\": [\"33430f0f-ebfa-c3ec-f237-3e77efa03d0a\"], \"33430f0f-ebfa-c3ec-f237-3e77efa03d0a\": [], \"3f107997-04cc-8733-60a9-a4bb62cebffc\": [\"6ddafdc4-c070-95f0-4211-328e9f31daf6\"], \"17c9c895-5a16-7443-bb81-f34b30b21548\": []}, \"nodeIds\": [\"3f107997-04cc-8733-60a9-a4bb62cebffc\", \"33430f0f-ebfa-c3ec-f237-3e77efa03d0a\", \"17c9c895-5a16-7443-bb81-f34b30b21548\", \"6ddafdc4-c070-95f0-4211-328e9f31daf6\"], \"nodes\": [{\"properties\": {}, \"name\": \"Start\", \"children\": [{\"to\": \"6ddafdc4-c070-95f0-4211-328e9f31daf6\"}], \"actionParametersFetched\": false, \"type\": \"start-widget\", \"id\": \"3f107997-04cc-8733-60a9-a4bb62cebffc\", \"actionParameters\": []}, {\"properties\": {}, \"name\": \"End\", \"children\": [], \"actionParametersFetched\": false, \"type\": \"end-widget\", \"id\": \"33430f0f-ebfa-c3ec-f237-3e77efa03d0a\", \"actionParameters\": []}, {\"properties\": {\"body\": \"\", \"cc\": \"\", \"to\": \"\", \"enableMail\": false, \"message\": \"Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]\", \"subject\": \"\"}, \"name\": \"Kill\", \"children\": [], \"actionParametersFetched\": false, \"type\": \"kill-widget\", \"id\": \"17c9c895-5a16-7443-bb81-f34b30b21548\", \"actionParameters\": []}, {\"properties\": {\"files\": [{\"value\": \"/my_file\"}], \"job_xml\": [], \"jar_path\": \"/my/jar\", \"java_opts\": [{\"value\": \"-Dsun.security.jgss.debug=true\"}], \"retry_max\": [], \"retry_interval\": [], \"job_properties\": [], \"capture_output\": false, \"main_class\": \"MyClass\", \"arguments\": [{\"value\": \"my_arg\"}], \"prepares\": [], \"credentials\": [], \"sla\": [{\"value\": false, \"key\": \"enabled\"}, {\"value\": \"${nominal_time}\", \"key\": \"nominal-time\"}, {\"value\": \"\", \"key\": \"should-start\"}, {\"value\": \"${30 * MINUTES}\", \"key\": \"should-end\"}, {\"value\": \"\", \"key\": \"max-duration\"}, {\"value\": \"\", \"key\": \"alert-events\"}, {\"value\": \"\", \"key\": \"alert-contact\"}, {\"value\": \"\", \"key\": \"notification-msg\"}, {\"value\": \"\", \"key\": \"upstream-apps\"}], \"archives\": []}, \"name\": \"java-6dda\", \"children\": [{\"to\": \"33430f0f-ebfa-c3ec-f237-3e77efa03d0a\"}, {\"error\": \"17c9c895-5a16-7443-bb81-f34b30b21548\"}], \"actionParametersFetched\": false, \"type\": \"java-widget\", \"id\": \"6ddafdc4-c070-95f0-4211-328e9f31daf6\", \"actionParameters\": []}], \"id\": 50247, \"nodeNamesMapping\": {\"6ddafdc4-c070-95f0-4211-328e9f31daf6\": \"java-6dda\", \"33430f0f-ebfa-c3ec-f237-3e77efa03d0a\": \"End\", \"3f107997-04cc-8733-60a9-a4bb62cebffc\": \"Start\", \"17c9c895-5a16-7443-bb81-f34b30b21548\": \"Kill\"}, \"uuid\": \"2667d60e-d894-c27b-6e6f-0333704c0989\"}}" # noqa: E501 + ) + + assert [ + '', + '', + '', + 'Action', + 'failed,', + 'error', + 'message[${wf:errorMessage(wf:lastErrorNode())}]', + '', + '', + '', + '${jobTracker}', + '${nameNode}', + 'MyClass', + '-Dsun.security.jgss.debug=true', + 'my_arg', + '/my_file#my_file', + '', + '', + '', + '', + '', + '', + ] == wf.to_xml({'output': '/path'}).split() def test_workflow_generic_gen_xml(self): - workflow = """{"layout": [{"oozieRows": [{"enableOozieDropOnBefore": true, "enableOozieDropOnSide": true, "enableOozieDrop": false, "widgets": [{"status": "", "logsURL": "", "name": "Generic", "widgetType": "generic-widget", "oozieMovable": true, "ooziePropertiesExpanded": true, "properties": {}, "isLoading": true, "offset": 0, "actionURL": "", "progress": 0, "klass": "card card-widget span12", "oozieExpanded": false, "id": "e96bb09b-84d1-6864-5782-42942bab97cb", "size": 12}], "id": "ed10631a-f264-9a3b-aa09-b04cb76f5c32", "columns": []}], "rows": [{"enableOozieDropOnBefore": true, "enableOozieDropOnSide": true, "enableOozieDrop": false, "widgets": [{"status": "", "logsURL": "", "name": "Start", "widgetType": "start-widget", "oozieMovable": false, "ooziePropertiesExpanded": false, "properties": {}, "isLoading": true, "offset": 0, "actionURL": "", "progress": 0, "klass": "card card-widget span12", "oozieExpanded": false, "id": "3f107997-04cc-8733-60a9-a4bb62cebffc", "size": 12}], "id": "68d83128-2c08-28f6-e9d1-a912d20f8af5", "columns": []}, {"enableOozieDropOnBefore": true, "enableOozieDropOnSide": true, "enableOozieDrop": false, "widgets": [{"status": "", "logsURL": "", "name": "Generic", "widgetType": "generic-widget", "oozieMovable": true, "ooziePropertiesExpanded": true, "properties": {}, "isLoading": true, "offset": 0, "actionURL": "", "progress": 0, "klass": "card card-widget span12", "oozieExpanded": false, "id": "e96bb09b-84d1-6864-5782-42942bab97cb", "size": 12}], "id": "ed10631a-f264-9a3b-aa09-b04cb76f5c32", "columns": []}, {"enableOozieDropOnBefore": true, "enableOozieDropOnSide": true, "enableOozieDrop": false, "widgets": [{"status": "", "logsURL": "", "name": "End", "widgetType": "end-widget", "oozieMovable": false, "ooziePropertiesExpanded": false, "properties": {}, "isLoading": true, "offset": 0, "actionURL": "", "progress": 0, "klass": "card card-widget span12", "oozieExpanded": false, "id": "33430f0f-ebfa-c3ec-f237-3e77efa03d0a", "size": 12}], "id": "7bf3cdc7-f79b-ff36-b152-e37217c40ccb", "columns": []}, {"enableOozieDropOnBefore": true, "enableOozieDropOnSide": true, "enableOozieDrop": false, "widgets": [{"status": "", "logsURL": "", "name": "Kill", "widgetType": "kill-widget", "oozieMovable": true, "ooziePropertiesExpanded": false, "properties": {}, "isLoading": true, "offset": 0, "actionURL": "", "progress": 0, "klass": "card card-widget span12", "oozieExpanded": false, "id": "17c9c895-5a16-7443-bb81-f34b30b21548", "size": 12}], "id": "07c4f1bd-8f58-ea51-fc3d-50acf74d6747", "columns": []}], "oozieEndRow": {"enableOozieDropOnBefore": true, "enableOozieDropOnSide": true, "enableOozieDrop": false, "widgets": [{"status": "", "logsURL": "", "name": "End", "widgetType": "end-widget", "oozieMovable": false, "ooziePropertiesExpanded": false, "properties": {}, "isLoading": true, "offset": 0, "actionURL": "", "progress": 0, "klass": "card card-widget span12", "oozieExpanded": false, "id": "33430f0f-ebfa-c3ec-f237-3e77efa03d0a", "size": 12}], "id": "7bf3cdc7-f79b-ff36-b152-e37217c40ccb", "columns": []}, "oozieKillRow": {"enableOozieDropOnBefore": true, "enableOozieDropOnSide": true, "enableOozieDrop": false, "widgets": [{"status": "", "logsURL": "", "name": "Kill", "widgetType": "kill-widget", "oozieMovable": true, "ooziePropertiesExpanded": false, "properties": {}, "isLoading": true, "offset": 0, "actionURL": "", "progress": 0, "klass": "card card-widget span12", "oozieExpanded": false, "id": "17c9c895-5a16-7443-bb81-f34b30b21548", "size": 12}], "id": "07c4f1bd-8f58-ea51-fc3d-50acf74d6747", "columns": []}, "enableOozieDropOnAfter": true, "oozieStartRow": {"enableOozieDropOnBefore": true, "enableOozieDropOnSide": true, "enableOozieDrop": false, "widgets": [{"status": "", "logsURL": "", "name": "Start", "widgetType": "start-widget", "oozieMovable": false, "ooziePropertiesExpanded": false, "properties": {}, "isLoading": true, "offset": 0, "actionURL": "", "progress": 0, "klass": "card card-widget span12", "oozieExpanded": false, "id": "3f107997-04cc-8733-60a9-a4bb62cebffc", "size": 12}], "id": "68d83128-2c08-28f6-e9d1-a912d20f8af5", "columns": []}, "klass": "card card-home card-column span12", "enableOozieDropOnBefore": true, "drops": ["temp"], "id": "0e8b5e24-4f78-0f76-fe91-0c8e7f0d290a", "size": 12}], "workflow": {"properties": {"job_xml": "", "description": "", "wf1_id": null, "sla_enabled": false, "deployment_dir": "/user/hue/oozie/workspaces/hue-oozie-1446487280.19", "schema_version": "uri:oozie:workflow:0.5", "properties": [], "show_arrows": true, "parameters": [{"name": "oozie.use.system.libpath", "value": true}], "sla": [{"value": false, "key": "enabled"}, {"value": "${nominal_time}", "key": "nominal-time"}, {"value": "", "key": "should-start"}, {"value": "${30 * MINUTES}", "key": "should-end"}, {"value": "", "key": "max-duration"}, {"value": "", "key": "alert-events"}, {"value": "", "key": "alert-contact"}, {"value": "", "key": "notification-msg"}, {"value": "", "key": "upstream-apps"}]}, "name": "My Workflow 3", "versions": ["uri:oozie:workflow:0.4", "uri:oozie:workflow:0.4.5", "uri:oozie:workflow:0.5"], "isDirty": false, "movedNode": null, "linkMapping": {"17c9c895-5a16-7443-bb81-f34b30b21548": [], "33430f0f-ebfa-c3ec-f237-3e77efa03d0a": [], "3f107997-04cc-8733-60a9-a4bb62cebffc": ["e96bb09b-84d1-6864-5782-42942bab97cb"], "e96bb09b-84d1-6864-5782-42942bab97cb": ["33430f0f-ebfa-c3ec-f237-3e77efa03d0a"]}, "nodeIds": ["3f107997-04cc-8733-60a9-a4bb62cebffc", "33430f0f-ebfa-c3ec-f237-3e77efa03d0a", "17c9c895-5a16-7443-bb81-f34b30b21548", "e96bb09b-84d1-6864-5782-42942bab97cb"], "nodes": [{"properties": {}, "name": "Start", "children": [{"to": "e96bb09b-84d1-6864-5782-42942bab97cb"}], "actionParametersFetched": false, "type": "start-widget", "id": "3f107997-04cc-8733-60a9-a4bb62cebffc", "actionParameters": []}, {"properties": {}, "name": "End", "children": [], "actionParametersFetched": false, "type": "end-widget", "id": "33430f0f-ebfa-c3ec-f237-3e77efa03d0a", "actionParameters": []}, {"properties": {"message": "Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]"}, "name": "Kill", "children": [], "actionParametersFetched": false, "type": "kill-widget", "id": "17c9c895-5a16-7443-bb81-f34b30b21548", "actionParameters": []}, {"properties": {"xml": "\\n", "credentials": [], "retry_max": [], "sla": [{"key": "enabled", "value": false}, {"key": "nominal-time", "value": "${nominal_time}"}, {"key": "should-start", "value": ""}, {"key": "should-end", "value": "${30 * MINUTES}"}, {"key": "max-duration", "value": ""}, {"key": "alert-events", "value": ""}, {"key": "alert-contact", "value": ""}, {"key": "notification-msg", "value": ""}, {"key": "upstream-apps", "value": ""}], "retry_interval": []}, "name": "generic-e96b", "children": [{"to": "33430f0f-ebfa-c3ec-f237-3e77efa03d0a"}, {"error": "17c9c895-5a16-7443-bb81-f34b30b21548"}], "actionParametersFetched": false, "type": "generic-widget", "id": "e96bb09b-84d1-6864-5782-42942bab97cb", "actionParameters": []}], "id": 50027, "nodeNamesMapping": {"17c9c895-5a16-7443-bb81-f34b30b21548": "Kill", "33430f0f-ebfa-c3ec-f237-3e77efa03d0a": "End", "3f107997-04cc-8733-60a9-a4bb62cebffc": "Start", "e96bb09b-84d1-6864-5782-42942bab97cb": "generic-e96b"}, "uuid": "83fb9dc4-8687-e369-9220-c8501a93d446"}}""" + workflow = """{"layout": [{"oozieRows": [{"enableOozieDropOnBefore": true, "enableOozieDropOnSide": true, "enableOozieDrop": false, "widgets": [{"status": "", "logsURL": "", "name": "Generic", "widgetType": "generic-widget", "oozieMovable": true, "ooziePropertiesExpanded": true, "properties": {}, "isLoading": true, "offset": 0, "actionURL": "", "progress": 0, "klass": "card card-widget span12", "oozieExpanded": false, "id": "e96bb09b-84d1-6864-5782-42942bab97cb", "size": 12}], "id": "ed10631a-f264-9a3b-aa09-b04cb76f5c32", "columns": []}], "rows": [{"enableOozieDropOnBefore": true, "enableOozieDropOnSide": true, "enableOozieDrop": false, "widgets": [{"status": "", "logsURL": "", "name": "Start", "widgetType": "start-widget", "oozieMovable": false, "ooziePropertiesExpanded": false, "properties": {}, "isLoading": true, "offset": 0, "actionURL": "", "progress": 0, "klass": "card card-widget span12", "oozieExpanded": false, "id": "3f107997-04cc-8733-60a9-a4bb62cebffc", "size": 12}], "id": "68d83128-2c08-28f6-e9d1-a912d20f8af5", "columns": []}, {"enableOozieDropOnBefore": true, "enableOozieDropOnSide": true, "enableOozieDrop": false, "widgets": [{"status": "", "logsURL": "", "name": "Generic", "widgetType": "generic-widget", "oozieMovable": true, "ooziePropertiesExpanded": true, "properties": {}, "isLoading": true, "offset": 0, "actionURL": "", "progress": 0, "klass": "card card-widget span12", "oozieExpanded": false, "id": "e96bb09b-84d1-6864-5782-42942bab97cb", "size": 12}], "id": "ed10631a-f264-9a3b-aa09-b04cb76f5c32", "columns": []}, {"enableOozieDropOnBefore": true, "enableOozieDropOnSide": true, "enableOozieDrop": false, "widgets": [{"status": "", "logsURL": "", "name": "End", "widgetType": "end-widget", "oozieMovable": false, "ooziePropertiesExpanded": false, "properties": {}, "isLoading": true, "offset": 0, "actionURL": "", "progress": 0, "klass": "card card-widget span12", "oozieExpanded": false, "id": "33430f0f-ebfa-c3ec-f237-3e77efa03d0a", "size": 12}], "id": "7bf3cdc7-f79b-ff36-b152-e37217c40ccb", "columns": []}, {"enableOozieDropOnBefore": true, "enableOozieDropOnSide": true, "enableOozieDrop": false, "widgets": [{"status": "", "logsURL": "", "name": "Kill", "widgetType": "kill-widget", "oozieMovable": true, "ooziePropertiesExpanded": false, "properties": {}, "isLoading": true, "offset": 0, "actionURL": "", "progress": 0, "klass": "card card-widget span12", "oozieExpanded": false, "id": "17c9c895-5a16-7443-bb81-f34b30b21548", "size": 12}], "id": "07c4f1bd-8f58-ea51-fc3d-50acf74d6747", "columns": []}], "oozieEndRow": {"enableOozieDropOnBefore": true, "enableOozieDropOnSide": true, "enableOozieDrop": false, "widgets": [{"status": "", "logsURL": "", "name": "End", "widgetType": "end-widget", "oozieMovable": false, "ooziePropertiesExpanded": false, "properties": {}, "isLoading": true, "offset": 0, "actionURL": "", "progress": 0, "klass": "card card-widget span12", "oozieExpanded": false, "id": "33430f0f-ebfa-c3ec-f237-3e77efa03d0a", "size": 12}], "id": "7bf3cdc7-f79b-ff36-b152-e37217c40ccb", "columns": []}, "oozieKillRow": {"enableOozieDropOnBefore": true, "enableOozieDropOnSide": true, "enableOozieDrop": false, "widgets": [{"status": "", "logsURL": "", "name": "Kill", "widgetType": "kill-widget", "oozieMovable": true, "ooziePropertiesExpanded": false, "properties": {}, "isLoading": true, "offset": 0, "actionURL": "", "progress": 0, "klass": "card card-widget span12", "oozieExpanded": false, "id": "17c9c895-5a16-7443-bb81-f34b30b21548", "size": 12}], "id": "07c4f1bd-8f58-ea51-fc3d-50acf74d6747", "columns": []}, "enableOozieDropOnAfter": true, "oozieStartRow": {"enableOozieDropOnBefore": true, "enableOozieDropOnSide": true, "enableOozieDrop": false, "widgets": [{"status": "", "logsURL": "", "name": "Start", "widgetType": "start-widget", "oozieMovable": false, "ooziePropertiesExpanded": false, "properties": {}, "isLoading": true, "offset": 0, "actionURL": "", "progress": 0, "klass": "card card-widget span12", "oozieExpanded": false, "id": "3f107997-04cc-8733-60a9-a4bb62cebffc", "size": 12}], "id": "68d83128-2c08-28f6-e9d1-a912d20f8af5", "columns": []}, "klass": "card card-home card-column span12", "enableOozieDropOnBefore": true, "drops": ["temp"], "id": "0e8b5e24-4f78-0f76-fe91-0c8e7f0d290a", "size": 12}], "workflow": {"properties": {"job_xml": "", "description": "", "wf1_id": null, "sla_enabled": false, "deployment_dir": "/user/hue/oozie/workspaces/hue-oozie-1446487280.19", "schema_version": "uri:oozie:workflow:0.5", "properties": [], "show_arrows": true, "parameters": [{"name": "oozie.use.system.libpath", "value": true}], "sla": [{"value": false, "key": "enabled"}, {"value": "${nominal_time}", "key": "nominal-time"}, {"value": "", "key": "should-start"}, {"value": "${30 * MINUTES}", "key": "should-end"}, {"value": "", "key": "max-duration"}, {"value": "", "key": "alert-events"}, {"value": "", "key": "alert-contact"}, {"value": "", "key": "notification-msg"}, {"value": "", "key": "upstream-apps"}]}, "name": "My Workflow 3", "versions": ["uri:oozie:workflow:0.4", "uri:oozie:workflow:0.4.5", "uri:oozie:workflow:0.5"], "isDirty": false, "movedNode": null, "linkMapping": {"17c9c895-5a16-7443-bb81-f34b30b21548": [], "33430f0f-ebfa-c3ec-f237-3e77efa03d0a": [], "3f107997-04cc-8733-60a9-a4bb62cebffc": ["e96bb09b-84d1-6864-5782-42942bab97cb"], "e96bb09b-84d1-6864-5782-42942bab97cb": ["33430f0f-ebfa-c3ec-f237-3e77efa03d0a"]}, "nodeIds": ["3f107997-04cc-8733-60a9-a4bb62cebffc", "33430f0f-ebfa-c3ec-f237-3e77efa03d0a", "17c9c895-5a16-7443-bb81-f34b30b21548", "e96bb09b-84d1-6864-5782-42942bab97cb"], "nodes": [{"properties": {}, "name": "Start", "children": [{"to": "e96bb09b-84d1-6864-5782-42942bab97cb"}], "actionParametersFetched": false, "type": "start-widget", "id": "3f107997-04cc-8733-60a9-a4bb62cebffc", "actionParameters": []}, {"properties": {}, "name": "End", "children": [], "actionParametersFetched": false, "type": "end-widget", "id": "33430f0f-ebfa-c3ec-f237-3e77efa03d0a", "actionParameters": []}, {"properties": {"message": "Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]"}, "name": "Kill", "children": [], "actionParametersFetched": false, "type": "kill-widget", "id": "17c9c895-5a16-7443-bb81-f34b30b21548", "actionParameters": []}, {"properties": {"xml": "\\n", "credentials": [], "retry_max": [], "sla": [{"key": "enabled", "value": false}, {"key": "nominal-time", "value": "${nominal_time}"}, {"key": "should-start", "value": ""}, {"key": "should-end", "value": "${30 * MINUTES}"}, {"key": "max-duration", "value": ""}, {"key": "alert-events", "value": ""}, {"key": "alert-contact", "value": ""}, {"key": "notification-msg", "value": ""}, {"key": "upstream-apps", "value": ""}], "retry_interval": []}, "name": "generic-e96b", "children": [{"to": "33430f0f-ebfa-c3ec-f237-3e77efa03d0a"}, {"error": "17c9c895-5a16-7443-bb81-f34b30b21548"}], "actionParametersFetched": false, "type": "generic-widget", "id": "e96bb09b-84d1-6864-5782-42942bab97cb", "actionParameters": []}], "id": 50027, "nodeNamesMapping": {"17c9c895-5a16-7443-bb81-f34b30b21548": "Kill", "33430f0f-ebfa-c3ec-f237-3e77efa03d0a": "End", "3f107997-04cc-8733-60a9-a4bb62cebffc": "Start", "e96bb09b-84d1-6864-5782-42942bab97cb": "generic-e96b"}, "uuid": "83fb9dc4-8687-e369-9220-c8501a93d446"}}""" # noqa: E501 wf = Workflow(data=workflow) - assert ([ - u'', - u'', - u'', u'Action', u'failed,', u'error', u'message[${wf:errorMessage(wf:lastErrorNode())}]', u'', - u'', u'', u'', - u'', u'', - u'', - u'', - u''] == - wf.to_xml({'output': '/path'}).split()) + assert [ + '', + '', + '', + 'Action', + 'failed,', + 'error', + 'message[${wf:errorMessage(wf:lastErrorNode())}]', + '', + '', + '', + '', + '', + '', + '', + '', + '', + ] == wf.to_xml({'output': '/path'}).split() def test_workflow_email_on_kill_node_xml(self): - workflow = """{"history": {"oozie_id": "0000013-151015155856463-oozie-oozi-W", "properties": {"oozie.use.system.libpath": "True", "security_enabled": false, "dryrun": false, "jobTracker": "localhost:8032", "oozie.wf.application.path": "hdfs://localhost:8020/user/hue/oozie/workspaces/hue-oozie-1445431078.26", "hue-id-w": 6, "nameNode": "hdfs://localhost:8020"}}, "layout": [{"oozieRows": [], "rows": [{"enableOozieDropOnBefore": true, "enableOozieDropOnSide": true, "enableOozieDrop": false, "widgets": [{"status": "", "logsURL": "", "name": "Start", "widgetType": "start-widget", "oozieMovable": false, "ooziePropertiesExpanded": false, "properties": {}, "isLoading": true, "offset": 0, "actionURL": "", "progress": 0, "klass": "card card-widget span12", "oozieExpanded": false, "id": "3f107997-04cc-8733-60a9-a4bb62cebffc", "size": 12}], "id": "9cf57679-292c-d980-8053-1180a84eaa54", "columns": []}, {"enableOozieDropOnBefore": true, "enableOozieDropOnSide": true, "enableOozieDrop": false, "widgets": [{"status": "", "logsURL": "", "name": "End", "widgetType": "end-widget", "oozieMovable": false, "ooziePropertiesExpanded": false, "properties": {}, "isLoading": true, "offset": 0, "actionURL": "", "progress": 0, "klass": "card card-widget span12", "oozieExpanded": false, "id": "33430f0f-ebfa-c3ec-f237-3e77efa03d0a", "size": 12}], "id": "f8f22c81-a9eb-5138-64cf-014ae588d0ca", "columns": []}, {"enableOozieDropOnBefore": true, "enableOozieDropOnSide": true, "enableOozieDrop": false, "widgets": [{"status": "", "logsURL": "", "name": "Kill", "widgetType": "kill-widget", "oozieMovable": true, "ooziePropertiesExpanded": false, "properties": {}, "isLoading": true, "offset": 0, "actionURL": "", "progress": 0, "klass": "card card-widget span12", "oozieExpanded": false, "id": "17c9c895-5a16-7443-bb81-f34b30b21548", "size": 12}], "id": "31f194ff-cd4f-faef-652d-0c5f66a80f97", "columns": []}], "oozieEndRow": {"enableOozieDropOnBefore": true, "enableOozieDropOnSide": true, "enableOozieDrop": false, "widgets": [{"status": "", "logsURL": "", "name": "End", "widgetType": "end-widget", "oozieMovable": false, "ooziePropertiesExpanded": false, "properties": {}, "isLoading": true, "offset": 0, "actionURL": "", "progress": 0, "klass": "card card-widget span12", "oozieExpanded": false, "id": "33430f0f-ebfa-c3ec-f237-3e77efa03d0a", "size": 12}], "id": "f8f22c81-a9eb-5138-64cf-014ae588d0ca", "columns": []}, "oozieKillRow": {"enableOozieDropOnBefore": true, "enableOozieDropOnSide": true, "enableOozieDrop": false, "widgets": [{"status": "", "logsURL": "", "name": "Kill", "widgetType": "kill-widget", "oozieMovable": true, "ooziePropertiesExpanded": false, "properties": {}, "isLoading": true, "offset": 0, "actionURL": "", "progress": 0, "klass": "card card-widget span12", "oozieExpanded": false, "id": "17c9c895-5a16-7443-bb81-f34b30b21548", "size": 12}], "id": "31f194ff-cd4f-faef-652d-0c5f66a80f97", "columns": []}, "enableOozieDropOnAfter": true, "oozieStartRow": {"enableOozieDropOnBefore": true, "enableOozieDropOnSide": true, "enableOozieDrop": false, "widgets": [{"status": "", "logsURL": "", "name": "Start", "widgetType": "start-widget", "oozieMovable": false, "ooziePropertiesExpanded": false, "properties": {}, "isLoading": true, "offset": 0, "actionURL": "", "progress": 0, "klass": "card card-widget span12", "oozieExpanded": false, "id": "3f107997-04cc-8733-60a9-a4bb62cebffc", "size": 12}], "id": "9cf57679-292c-d980-8053-1180a84eaa54", "columns": []}, "klass": "card card-home card-column span12", "enableOozieDropOnBefore": true, "drops": ["temp"], "id": "1920900a-a735-7e66-61d4-23de384e8f62", "size": 12}], "workflow": {"properties": {"job_xml": "", "description": "", "wf1_id": null, "sla_enabled": false, "deployment_dir": "/user/hue/oozie/workspaces/hue-oozie-1445431078.26", "schema_version": "uri:oozie:workflow:0.5", "properties": [], "show_arrows": true, "parameters": [{"name": "oozie.use.system.libpath", "value": true}], "sla": [{"value": false, "key": "enabled"}, {"value": "${nominal_time}", "key": "nominal-time"}, {"value": "", "key": "should-start"}, {"value": "${30 * MINUTES}", "key": "should-end"}, {"value": "", "key": "max-duration"}, {"value": "", "key": "alert-events"}, {"value": "", "key": "alert-contact"}, {"value": "", "key": "notification-msg"}, {"value": "", "key": "upstream-apps"}]}, "name": "My real Workflow 1", "versions": ["uri:oozie:workflow:0.4", "uri:oozie:workflow:0.4.5", "uri:oozie:workflow:0.5"], "isDirty": false, "movedNode": null, "linkMapping": {"33430f0f-ebfa-c3ec-f237-3e77efa03d0a": [], "3f107997-04cc-8733-60a9-a4bb62cebffc": ["33430f0f-ebfa-c3ec-f237-3e77efa03d0a"], "17c9c895-5a16-7443-bb81-f34b30b21548": []}, "nodeIds": ["3f107997-04cc-8733-60a9-a4bb62cebffc", "33430f0f-ebfa-c3ec-f237-3e77efa03d0a", "17c9c895-5a16-7443-bb81-f34b30b21548"], "nodes": [{"properties": {}, "name": "Start", "children": [{"to": "33430f0f-ebfa-c3ec-f237-3e77efa03d0a"}], "actionParametersFetched": false, "type": "start-widget", "id": "3f107997-04cc-8733-60a9-a4bb62cebffc", "actionParameters": []}, {"properties": {}, "name": "End", "children": [], "actionParametersFetched": false, "type": "end-widget", "id": "33430f0f-ebfa-c3ec-f237-3e77efa03d0a", "actionParameters": []}, {"properties": {"body": "", "cc": "", "to": "hue@gethue.com", "enableMail": true, "message": "Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]", "subject": "Error on workflow"}, "name": "Kill", "children": [], "actionParametersFetched": false, "type": "kill-widget", "id": "17c9c895-5a16-7443-bb81-f34b30b21548", "actionParameters": []}], "id": 50020, "nodeNamesMapping": {"33430f0f-ebfa-c3ec-f237-3e77efa03d0a": "End", "3f107997-04cc-8733-60a9-a4bb62cebffc": "Start", "17c9c895-5a16-7443-bb81-f34b30b21548": "Kill"}, "uuid": "330c70c8-33fb-16e1-68fb-c42582c7d178"}}""" + workflow = """{"history": {"oozie_id": "0000013-151015155856463-oozie-oozi-W", "properties": {"oozie.use.system.libpath": "True", "security_enabled": false, "dryrun": false, "jobTracker": "localhost:8032", "oozie.wf.application.path": "hdfs://localhost:8020/user/hue/oozie/workspaces/hue-oozie-1445431078.26", "hue-id-w": 6, "nameNode": "hdfs://localhost:8020"}}, "layout": [{"oozieRows": [], "rows": [{"enableOozieDropOnBefore": true, "enableOozieDropOnSide": true, "enableOozieDrop": false, "widgets": [{"status": "", "logsURL": "", "name": "Start", "widgetType": "start-widget", "oozieMovable": false, "ooziePropertiesExpanded": false, "properties": {}, "isLoading": true, "offset": 0, "actionURL": "", "progress": 0, "klass": "card card-widget span12", "oozieExpanded": false, "id": "3f107997-04cc-8733-60a9-a4bb62cebffc", "size": 12}], "id": "9cf57679-292c-d980-8053-1180a84eaa54", "columns": []}, {"enableOozieDropOnBefore": true, "enableOozieDropOnSide": true, "enableOozieDrop": false, "widgets": [{"status": "", "logsURL": "", "name": "End", "widgetType": "end-widget", "oozieMovable": false, "ooziePropertiesExpanded": false, "properties": {}, "isLoading": true, "offset": 0, "actionURL": "", "progress": 0, "klass": "card card-widget span12", "oozieExpanded": false, "id": "33430f0f-ebfa-c3ec-f237-3e77efa03d0a", "size": 12}], "id": "f8f22c81-a9eb-5138-64cf-014ae588d0ca", "columns": []}, {"enableOozieDropOnBefore": true, "enableOozieDropOnSide": true, "enableOozieDrop": false, "widgets": [{"status": "", "logsURL": "", "name": "Kill", "widgetType": "kill-widget", "oozieMovable": true, "ooziePropertiesExpanded": false, "properties": {}, "isLoading": true, "offset": 0, "actionURL": "", "progress": 0, "klass": "card card-widget span12", "oozieExpanded": false, "id": "17c9c895-5a16-7443-bb81-f34b30b21548", "size": 12}], "id": "31f194ff-cd4f-faef-652d-0c5f66a80f97", "columns": []}], "oozieEndRow": {"enableOozieDropOnBefore": true, "enableOozieDropOnSide": true, "enableOozieDrop": false, "widgets": [{"status": "", "logsURL": "", "name": "End", "widgetType": "end-widget", "oozieMovable": false, "ooziePropertiesExpanded": false, "properties": {}, "isLoading": true, "offset": 0, "actionURL": "", "progress": 0, "klass": "card card-widget span12", "oozieExpanded": false, "id": "33430f0f-ebfa-c3ec-f237-3e77efa03d0a", "size": 12}], "id": "f8f22c81-a9eb-5138-64cf-014ae588d0ca", "columns": []}, "oozieKillRow": {"enableOozieDropOnBefore": true, "enableOozieDropOnSide": true, "enableOozieDrop": false, "widgets": [{"status": "", "logsURL": "", "name": "Kill", "widgetType": "kill-widget", "oozieMovable": true, "ooziePropertiesExpanded": false, "properties": {}, "isLoading": true, "offset": 0, "actionURL": "", "progress": 0, "klass": "card card-widget span12", "oozieExpanded": false, "id": "17c9c895-5a16-7443-bb81-f34b30b21548", "size": 12}], "id": "31f194ff-cd4f-faef-652d-0c5f66a80f97", "columns": []}, "enableOozieDropOnAfter": true, "oozieStartRow": {"enableOozieDropOnBefore": true, "enableOozieDropOnSide": true, "enableOozieDrop": false, "widgets": [{"status": "", "logsURL": "", "name": "Start", "widgetType": "start-widget", "oozieMovable": false, "ooziePropertiesExpanded": false, "properties": {}, "isLoading": true, "offset": 0, "actionURL": "", "progress": 0, "klass": "card card-widget span12", "oozieExpanded": false, "id": "3f107997-04cc-8733-60a9-a4bb62cebffc", "size": 12}], "id": "9cf57679-292c-d980-8053-1180a84eaa54", "columns": []}, "klass": "card card-home card-column span12", "enableOozieDropOnBefore": true, "drops": ["temp"], "id": "1920900a-a735-7e66-61d4-23de384e8f62", "size": 12}], "workflow": {"properties": {"job_xml": "", "description": "", "wf1_id": null, "sla_enabled": false, "deployment_dir": "/user/hue/oozie/workspaces/hue-oozie-1445431078.26", "schema_version": "uri:oozie:workflow:0.5", "properties": [], "show_arrows": true, "parameters": [{"name": "oozie.use.system.libpath", "value": true}], "sla": [{"value": false, "key": "enabled"}, {"value": "${nominal_time}", "key": "nominal-time"}, {"value": "", "key": "should-start"}, {"value": "${30 * MINUTES}", "key": "should-end"}, {"value": "", "key": "max-duration"}, {"value": "", "key": "alert-events"}, {"value": "", "key": "alert-contact"}, {"value": "", "key": "notification-msg"}, {"value": "", "key": "upstream-apps"}]}, "name": "My real Workflow 1", "versions": ["uri:oozie:workflow:0.4", "uri:oozie:workflow:0.4.5", "uri:oozie:workflow:0.5"], "isDirty": false, "movedNode": null, "linkMapping": {"33430f0f-ebfa-c3ec-f237-3e77efa03d0a": [], "3f107997-04cc-8733-60a9-a4bb62cebffc": ["33430f0f-ebfa-c3ec-f237-3e77efa03d0a"], "17c9c895-5a16-7443-bb81-f34b30b21548": []}, "nodeIds": ["3f107997-04cc-8733-60a9-a4bb62cebffc", "33430f0f-ebfa-c3ec-f237-3e77efa03d0a", "17c9c895-5a16-7443-bb81-f34b30b21548"], "nodes": [{"properties": {}, "name": "Start", "children": [{"to": "33430f0f-ebfa-c3ec-f237-3e77efa03d0a"}], "actionParametersFetched": false, "type": "start-widget", "id": "3f107997-04cc-8733-60a9-a4bb62cebffc", "actionParameters": []}, {"properties": {}, "name": "End", "children": [], "actionParametersFetched": false, "type": "end-widget", "id": "33430f0f-ebfa-c3ec-f237-3e77efa03d0a", "actionParameters": []}, {"properties": {"body": "", "cc": "", "to": "hue@gethue.com", "enableMail": true, "message": "Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]", "subject": "Error on workflow"}, "name": "Kill", "children": [], "actionParametersFetched": false, "type": "kill-widget", "id": "17c9c895-5a16-7443-bb81-f34b30b21548", "actionParameters": []}], "id": 50020, "nodeNamesMapping": {"33430f0f-ebfa-c3ec-f237-3e77efa03d0a": "End", "3f107997-04cc-8733-60a9-a4bb62cebffc": "Start", "17c9c895-5a16-7443-bb81-f34b30b21548": "Kill"}, "uuid": "330c70c8-33fb-16e1-68fb-c42582c7d178"}}""" # noqa: E501 wf = Workflow(data=workflow) - assert ([ - u'', - u'', - u'', - u'', u'hue@gethue.com', u'Error', u'on', u'workflow', u'', u'', - u'', u'', - u'', - u'', - u'Action', u'failed,', u'error', u'message[${wf:errorMessage(wf:lastErrorNode())}]', - u'', - u'', - u''] == - wf.to_xml({'output': '/path'}).split()) - + assert [ + '', + '', + '', + '', + 'hue@gethue.com', + 'Error', + 'on', + 'workflow', + '', + '', + '', + '', + '', + '', + 'Action', + 'failed,', + 'error', + 'message[${wf:errorMessage(wf:lastErrorNode())}]', + '', + '', + '', + ] == wf.to_xml({'output': '/path'}).split() def test_workflow_submission_on_email_notification(self): - workflow = """{"history": {"oozie_id": "0000013-151015155856463-oozie-oozi-W", "properties": {"oozie.use.system.libpath": "True", "security_enabled": false, "dryrun": false, "jobTracker": "localhost:8032", "oozie.wf.application.path": "hdfs://localhost:8020/user/hue/oozie/workspaces/hue-oozie-1445431078.26", "email_checkbox": "True", "hue-id-w": 6, "nameNode": "hdfs://localhost:8020"}}, "layout": [{"oozieRows": [], "rows": [{"enableOozieDropOnBefore": true, "enableOozieDropOnSide": true, "enableOozieDrop": false, "widgets": [{"status": "", "logsURL": "", "name": "Start", "widgetType": "start-widget", "oozieMovable": false, "ooziePropertiesExpanded": false, "properties": {}, "isLoading": true, "offset": 0, "actionURL": "", "progress": 0, "klass": "card card-widget span12", "oozieExpanded": false, "id": "3f107997-04cc-8733-60a9-a4bb62cebffc", "size": 12}], "id": "9cf57679-292c-d980-8053-1180a84eaa54", "columns": []}, {"enableOozieDropOnBefore": true, "enableOozieDropOnSide": true, "enableOozieDrop": false, "widgets": [{"status": "", "logsURL": "", "name": "End", "widgetType": "end-widget", "oozieMovable": false, "ooziePropertiesExpanded": false, "properties": {}, "isLoading": true, "offset": 0, "actionURL": "", "progress": 0, "klass": "card card-widget span12", "oozieExpanded": false, "id": "33430f0f-ebfa-c3ec-f237-3e77efa03d0a", "size": 12}], "id": "f8f22c81-a9eb-5138-64cf-014ae588d0ca", "columns": []}, {"enableOozieDropOnBefore": true, "enableOozieDropOnSide": true, "enableOozieDrop": false, "widgets": [{"status": "", "logsURL": "", "name": "Kill", "widgetType": "kill-widget", "oozieMovable": true, "ooziePropertiesExpanded": false, "properties": {}, "isLoading": true, "offset": 0, "actionURL": "", "progress": 0, "klass": "card card-widget span12", "oozieExpanded": false, "id": "17c9c895-5a16-7443-bb81-f34b30b21548", "size": 12}], "id": "31f194ff-cd4f-faef-652d-0c5f66a80f97", "columns": []}], "oozieEndRow": {"enableOozieDropOnBefore": true, "enableOozieDropOnSide": true, "enableOozieDrop": false, "widgets": [{"status": "", "logsURL": "", "name": "End", "widgetType": "end-widget", "oozieMovable": false, "ooziePropertiesExpanded": false, "properties": {}, "isLoading": true, "offset": 0, "actionURL": "", "progress": 0, "klass": "card card-widget span12", "oozieExpanded": false, "id": "33430f0f-ebfa-c3ec-f237-3e77efa03d0a", "size": 12}], "id": "f8f22c81-a9eb-5138-64cf-014ae588d0ca", "columns": []}, "oozieKillRow": {"enableOozieDropOnBefore": true, "enableOozieDropOnSide": true, "enableOozieDrop": false, "widgets": [{"status": "", "logsURL": "", "name": "Kill", "widgetType": "kill-widget", "oozieMovable": true, "ooziePropertiesExpanded": false, "properties": {}, "isLoading": true, "offset": 0, "actionURL": "", "progress": 0, "klass": "card card-widget span12", "oozieExpanded": false, "id": "17c9c895-5a16-7443-bb81-f34b30b21548", "size": 12}], "id": "31f194ff-cd4f-faef-652d-0c5f66a80f97", "columns": []}, "enableOozieDropOnAfter": true, "oozieStartRow": {"enableOozieDropOnBefore": true, "enableOozieDropOnSide": true, "enableOozieDrop": false, "widgets": [{"status": "", "logsURL": "", "name": "Start", "widgetType": "start-widget", "oozieMovable": false, "ooziePropertiesExpanded": false, "properties": {}, "isLoading": true, "offset": 0, "actionURL": "", "progress": 0, "klass": "card card-widget span12", "oozieExpanded": false, "id": "3f107997-04cc-8733-60a9-a4bb62cebffc", "size": 12}], "id": "9cf57679-292c-d980-8053-1180a84eaa54", "columns": []}, "klass": "card card-home card-column span12", "enableOozieDropOnBefore": true, "drops": ["temp"], "id": "1920900a-a735-7e66-61d4-23de384e8f62", "size": 12}], "workflow": {"properties": {"job_xml": "", "description": "", "wf1_id": null, "sla_enabled": false, "deployment_dir": "/user/hue/oozie/workspaces/hue-oozie-1445431078.26", "schema_version": "uri:oozie:workflow:0.5", "properties": [], "show_arrows": true, "parameters": [{"name": "oozie.use.system.libpath", "value": true}], "sla": [{"value": false, "key": "enabled"}, {"value": "${nominal_time}", "key": "nominal-time"}, {"value": "", "key": "should-start"}, {"value": "${30 * MINUTES}", "key": "should-end"}, {"value": "", "key": "max-duration"}, {"value": "", "key": "alert-events"}, {"value": "", "key": "alert-contact"}, {"value": "", "key": "notification-msg"}, {"value": "", "key": "upstream-apps"}]}, "name": "My real Workflow 1", "versions": ["uri:oozie:workflow:0.4", "uri:oozie:workflow:0.4.5", "uri:oozie:workflow:0.5"], "isDirty": false, "movedNode": null, "linkMapping": {"33430f0f-ebfa-c3ec-f237-3e77efa03d0a": [], "3f107997-04cc-8733-60a9-a4bb62cebffc": ["33430f0f-ebfa-c3ec-f237-3e77efa03d0a"], "17c9c895-5a16-7443-bb81-f34b30b21548": []}, "nodeIds": ["3f107997-04cc-8733-60a9-a4bb62cebffc", "33430f0f-ebfa-c3ec-f237-3e77efa03d0a", "17c9c895-5a16-7443-bb81-f34b30b21548"], "nodes": [{"properties": {}, "name": "Start", "children": [{"to": "33430f0f-ebfa-c3ec-f237-3e77efa03d0a"}], "actionParametersFetched": false, "type": "start-widget", "id": "3f107997-04cc-8733-60a9-a4bb62cebffc", "actionParameters": []}, {"properties": {}, "name": "End", "children": [], "actionParametersFetched": false, "type": "end-widget", "id": "33430f0f-ebfa-c3ec-f237-3e77efa03d0a", "actionParameters": []}, {"properties": {"body": "", "cc": "", "to": "hue@gethue.com", "enableMail": true, "message": "Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]", "subject": "Error on workflow"}, "name": "Kill", "children": [], "actionParametersFetched": false, "type": "kill-widget", "id": "17c9c895-5a16-7443-bb81-f34b30b21548", "actionParameters": []}], "id": 50020, "nodeNamesMapping": {"33430f0f-ebfa-c3ec-f237-3e77efa03d0a": "End", "3f107997-04cc-8733-60a9-a4bb62cebffc": "Start", "17c9c895-5a16-7443-bb81-f34b30b21548": "Kill"}, "uuid": "330c70c8-33fb-16e1-68fb-c42582c7d178"}}""" + workflow = """{"history": {"oozie_id": "0000013-151015155856463-oozie-oozi-W", "properties": {"oozie.use.system.libpath": "True", "security_enabled": false, "dryrun": false, "jobTracker": "localhost:8032", "oozie.wf.application.path": "hdfs://localhost:8020/user/hue/oozie/workspaces/hue-oozie-1445431078.26", "email_checkbox": "True", "hue-id-w": 6, "nameNode": "hdfs://localhost:8020"}}, "layout": [{"oozieRows": [], "rows": [{"enableOozieDropOnBefore": true, "enableOozieDropOnSide": true, "enableOozieDrop": false, "widgets": [{"status": "", "logsURL": "", "name": "Start", "widgetType": "start-widget", "oozieMovable": false, "ooziePropertiesExpanded": false, "properties": {}, "isLoading": true, "offset": 0, "actionURL": "", "progress": 0, "klass": "card card-widget span12", "oozieExpanded": false, "id": "3f107997-04cc-8733-60a9-a4bb62cebffc", "size": 12}], "id": "9cf57679-292c-d980-8053-1180a84eaa54", "columns": []}, {"enableOozieDropOnBefore": true, "enableOozieDropOnSide": true, "enableOozieDrop": false, "widgets": [{"status": "", "logsURL": "", "name": "End", "widgetType": "end-widget", "oozieMovable": false, "ooziePropertiesExpanded": false, "properties": {}, "isLoading": true, "offset": 0, "actionURL": "", "progress": 0, "klass": "card card-widget span12", "oozieExpanded": false, "id": "33430f0f-ebfa-c3ec-f237-3e77efa03d0a", "size": 12}], "id": "f8f22c81-a9eb-5138-64cf-014ae588d0ca", "columns": []}, {"enableOozieDropOnBefore": true, "enableOozieDropOnSide": true, "enableOozieDrop": false, "widgets": [{"status": "", "logsURL": "", "name": "Kill", "widgetType": "kill-widget", "oozieMovable": true, "ooziePropertiesExpanded": false, "properties": {}, "isLoading": true, "offset": 0, "actionURL": "", "progress": 0, "klass": "card card-widget span12", "oozieExpanded": false, "id": "17c9c895-5a16-7443-bb81-f34b30b21548", "size": 12}], "id": "31f194ff-cd4f-faef-652d-0c5f66a80f97", "columns": []}], "oozieEndRow": {"enableOozieDropOnBefore": true, "enableOozieDropOnSide": true, "enableOozieDrop": false, "widgets": [{"status": "", "logsURL": "", "name": "End", "widgetType": "end-widget", "oozieMovable": false, "ooziePropertiesExpanded": false, "properties": {}, "isLoading": true, "offset": 0, "actionURL": "", "progress": 0, "klass": "card card-widget span12", "oozieExpanded": false, "id": "33430f0f-ebfa-c3ec-f237-3e77efa03d0a", "size": 12}], "id": "f8f22c81-a9eb-5138-64cf-014ae588d0ca", "columns": []}, "oozieKillRow": {"enableOozieDropOnBefore": true, "enableOozieDropOnSide": true, "enableOozieDrop": false, "widgets": [{"status": "", "logsURL": "", "name": "Kill", "widgetType": "kill-widget", "oozieMovable": true, "ooziePropertiesExpanded": false, "properties": {}, "isLoading": true, "offset": 0, "actionURL": "", "progress": 0, "klass": "card card-widget span12", "oozieExpanded": false, "id": "17c9c895-5a16-7443-bb81-f34b30b21548", "size": 12}], "id": "31f194ff-cd4f-faef-652d-0c5f66a80f97", "columns": []}, "enableOozieDropOnAfter": true, "oozieStartRow": {"enableOozieDropOnBefore": true, "enableOozieDropOnSide": true, "enableOozieDrop": false, "widgets": [{"status": "", "logsURL": "", "name": "Start", "widgetType": "start-widget", "oozieMovable": false, "ooziePropertiesExpanded": false, "properties": {}, "isLoading": true, "offset": 0, "actionURL": "", "progress": 0, "klass": "card card-widget span12", "oozieExpanded": false, "id": "3f107997-04cc-8733-60a9-a4bb62cebffc", "size": 12}], "id": "9cf57679-292c-d980-8053-1180a84eaa54", "columns": []}, "klass": "card card-home card-column span12", "enableOozieDropOnBefore": true, "drops": ["temp"], "id": "1920900a-a735-7e66-61d4-23de384e8f62", "size": 12}], "workflow": {"properties": {"job_xml": "", "description": "", "wf1_id": null, "sla_enabled": false, "deployment_dir": "/user/hue/oozie/workspaces/hue-oozie-1445431078.26", "schema_version": "uri:oozie:workflow:0.5", "properties": [], "show_arrows": true, "parameters": [{"name": "oozie.use.system.libpath", "value": true}], "sla": [{"value": false, "key": "enabled"}, {"value": "${nominal_time}", "key": "nominal-time"}, {"value": "", "key": "should-start"}, {"value": "${30 * MINUTES}", "key": "should-end"}, {"value": "", "key": "max-duration"}, {"value": "", "key": "alert-events"}, {"value": "", "key": "alert-contact"}, {"value": "", "key": "notification-msg"}, {"value": "", "key": "upstream-apps"}]}, "name": "My real Workflow 1", "versions": ["uri:oozie:workflow:0.4", "uri:oozie:workflow:0.4.5", "uri:oozie:workflow:0.5"], "isDirty": false, "movedNode": null, "linkMapping": {"33430f0f-ebfa-c3ec-f237-3e77efa03d0a": [], "3f107997-04cc-8733-60a9-a4bb62cebffc": ["33430f0f-ebfa-c3ec-f237-3e77efa03d0a"], "17c9c895-5a16-7443-bb81-f34b30b21548": []}, "nodeIds": ["3f107997-04cc-8733-60a9-a4bb62cebffc", "33430f0f-ebfa-c3ec-f237-3e77efa03d0a", "17c9c895-5a16-7443-bb81-f34b30b21548"], "nodes": [{"properties": {}, "name": "Start", "children": [{"to": "33430f0f-ebfa-c3ec-f237-3e77efa03d0a"}], "actionParametersFetched": false, "type": "start-widget", "id": "3f107997-04cc-8733-60a9-a4bb62cebffc", "actionParameters": []}, {"properties": {}, "name": "End", "children": [], "actionParametersFetched": false, "type": "end-widget", "id": "33430f0f-ebfa-c3ec-f237-3e77efa03d0a", "actionParameters": []}, {"properties": {"body": "", "cc": "", "to": "hue@gethue.com", "enableMail": true, "message": "Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]", "subject": "Error on workflow"}, "name": "Kill", "children": [], "actionParametersFetched": false, "type": "kill-widget", "id": "17c9c895-5a16-7443-bb81-f34b30b21548", "actionParameters": []}], "id": 50020, "nodeNamesMapping": {"33430f0f-ebfa-c3ec-f237-3e77efa03d0a": "End", "3f107997-04cc-8733-60a9-a4bb62cebffc": "Start", "17c9c895-5a16-7443-bb81-f34b30b21548": "Kill"}, "uuid": "330c70c8-33fb-16e1-68fb-c42582c7d178"}}""" # noqa: E501 wf = Workflow(data=workflow, user=self.user) - assert ([ - u'', - u'', - u'', - u'', - u'hue@gethue.com', u'Error', u'on', u'workflow', u'', - u'', - u'', - u'', - u'', - u'', - u'Action', u'failed,', u'error', u'message[${wf:errorMessage(wf:lastErrorNode())}]', - u'', - u'', - u'', - u'test@localhost', u'${wf:name()}', u'execution', u'successful', u'', u'text/plain', - u'', - u'', - u'', - u'', - u'', - u'' - ] == - wf.to_xml({'output': '/path', 'send_email': 'True'}).split()) - + assert [ + '', + '', + '', + '', + 'hue@gethue.com', + 'Error', + 'on', + 'workflow', + '', + '', + '', + '', + '', + '', + 'Action', + 'failed,', + 'error', + 'message[${wf:errorMessage(wf:lastErrorNode())}]', + '', + '', + '', + 'test@localhost', + '${wf:name()}', + 'execution', + 'successful', + '', + 'text/plain', + '', + '', + '', + '', + '', + '', + ] == wf.to_xml({'output': '/path', 'send_email': 'True'}).split() def test_workflow_email_gen_xml(self): self.maxDiff = None - workflow = """{"history": {"oozie_id": "0000013-151015155856463-oozie-oozi-W", "properties": {"oozie.use.system.libpath": "True", "security_enabled": false, "dryrun": false, "jobTracker": "localhost:8032", "oozie.wf.application.path": "hdfs://localhost:8020/user/hue/oozie/workspaces/hue-oozie-1445431078.26", "hue-id-w": 6, "nameNode": "hdfs://localhost:8020"}}, "layout": [{"oozieRows": [], "rows": [{"enableOozieDropOnBefore": true, "enableOozieDropOnSide": true, "enableOozieDrop": false, "widgets": [{"status": "", "logsURL": "", "name": "Start", "widgetType": "start-widget", "oozieMovable": false, "ooziePropertiesExpanded": false, "properties": {}, "isLoading": true, "offset": 0, "actionURL": "", "progress": 0, "klass": "card card-widget span12", "oozieExpanded": false, "id": "3f107997-04cc-8733-60a9-a4bb62cebffc", "size": 12}], "id": "9cf57679-292c-d980-8053-1180a84eaa54", "columns": []}, {"enableOozieDropOnBefore": true, "enableOozieDropOnSide": true, "enableOozieDrop": false, "widgets": [{"status": "", "logsURL": "", "name": "End", "widgetType": "end-widget", "oozieMovable": false, "ooziePropertiesExpanded": false, "properties": {}, "isLoading": true, "offset": 0, "actionURL": "", "progress": 0, "klass": "card card-widget span12", "oozieExpanded": false, "id": "33430f0f-ebfa-c3ec-f237-3e77efa03d0a", "size": 12}], "id": "f8f22c81-a9eb-5138-64cf-014ae588d0ca", "columns": []}, {"enableOozieDropOnBefore": true, "enableOozieDropOnSide": true, "enableOozieDrop": false, "widgets": [{"status": "", "logsURL": "", "name": "Kill", "widgetType": "kill-widget", "oozieMovable": true, "ooziePropertiesExpanded": false, "properties": {}, "isLoading": true, "offset": 0, "actionURL": "", "progress": 0, "klass": "card card-widget span12", "oozieExpanded": false, "id": "17c9c895-5a16-7443-bb81-f34b30b21548", "size": 12}], "id": "31f194ff-cd4f-faef-652d-0c5f66a80f97", "columns": []}], "oozieEndRow": {"enableOozieDropOnBefore": true, "enableOozieDropOnSide": true, "enableOozieDrop": false, "widgets": [{"status": "", "logsURL": "", "name": "End", "widgetType": "end-widget", "oozieMovable": false, "ooziePropertiesExpanded": false, "properties": {}, "isLoading": true, "offset": 0, "actionURL": "", "progress": 0, "klass": "card card-widget span12", "oozieExpanded": false, "id": "33430f0f-ebfa-c3ec-f237-3e77efa03d0a", "size": 12}], "id": "f8f22c81-a9eb-5138-64cf-014ae588d0ca", "columns": []}, "oozieKillRow": {"enableOozieDropOnBefore": true, "enableOozieDropOnSide": true, "enableOozieDrop": false, "widgets": [{"status": "", "logsURL": "", "name": "Kill", "widgetType": "kill-widget", "oozieMovable": true, "ooziePropertiesExpanded": false, "properties": {}, "isLoading": true, "offset": 0, "actionURL": "", "progress": 0, "klass": "card card-widget span12", "oozieExpanded": false, "id": "17c9c895-5a16-7443-bb81-f34b30b21548", "size": 12}], "id": "31f194ff-cd4f-faef-652d-0c5f66a80f97", "columns": []}, "enableOozieDropOnAfter": true, "oozieStartRow": {"enableOozieDropOnBefore": true, "enableOozieDropOnSide": true, "enableOozieDrop": false, "widgets": [{"status": "", "logsURL": "", "name": "Start", "widgetType": "start-widget", "oozieMovable": false, "ooziePropertiesExpanded": false, "properties": {}, "isLoading": true, "offset": 0, "actionURL": "", "progress": 0, "klass": "card card-widget span12", "oozieExpanded": false, "id": "3f107997-04cc-8733-60a9-a4bb62cebffc", "size": 12}], "id": "9cf57679-292c-d980-8053-1180a84eaa54", "columns": []}, "klass": "card card-home card-column span12", "enableOozieDropOnBefore": true, "drops": ["temp"], "id": "1920900a-a735-7e66-61d4-23de384e8f62", "size": 12}], "workflow": {"properties": {"job_xml": "", "description": "", "wf1_id": null, "sla_enabled": false, "deployment_dir": "/user/hue/oozie/workspaces/hue-oozie-1445431078.26", "schema_version": "uri:oozie:workflow:0.5", "properties": [], "show_arrows": true, "parameters": [{"name": "oozie.use.system.libpath", "value": true}], "sla": [{"value": false, "key": "enabled"}, {"value": "${nominal_time}", "key": "nominal-time"}, {"value": "", "key": "should-start"}, {"value": "${30 * MINUTES}", "key": "should-end"}, {"value": "", "key": "max-duration"}, {"value": "", "key": "alert-events"}, {"value": "", "key": "alert-contact"}, {"value": "", "key": "notification-msg"}, {"value": "", "key": "upstream-apps"}]}, "name": "My real Workflow 1", "versions": ["uri:oozie:workflow:0.4", "uri:oozie:workflow:0.4.5", "uri:oozie:workflow:0.5"], "isDirty": false, "movedNode": null, "linkMapping": {"33430f0f-ebfa-c3ec-f237-3e77efa03d0a": [], "3f107997-04cc-8733-60a9-a4bb62cebffc": ["33430f0f-ebfa-c3ec-f237-3e77efa03d0a"], "17c9c895-5a16-7443-bb81-f34b30b21548": []}, "nodeIds": ["3f107997-04cc-8733-60a9-a4bb62cebffc", "33430f0f-ebfa-c3ec-f237-3e77efa03d0a", "17c9c895-5a16-7443-bb81-f34b30b21548"], "nodes": [{"properties": {}, "name": "Start", "children": [{"to": "33430f0f-ebfa-c3ec-f237-3e77efa03d0a"}], "actionParametersFetched": false, "type": "start-widget", "id": "3f107997-04cc-8733-60a9-a4bb62cebffc", "actionParameters": []}, {"properties": {}, "name": "End", "children": [], "actionParametersFetched": false, "type": "end-widget", "id": "33430f0f-ebfa-c3ec-f237-3e77efa03d0a", "actionParameters": []}, {"properties": {"body": "This\\n\\ncontains\\n\\n\\nnew lines.", "bcc": "example@bcc.com", "content_type": "text/plain", "cc": "", "to": "hue@gethue.com", "enableMail": true, "message": "Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]", "subject": "Error on workflow"}, "name": "Kill", "children": [], "actionParametersFetched": false, "type": "kill-widget", "id": "17c9c895-5a16-7443-bb81-f34b30b21548", "actionParameters": []}], "id": 50020, "nodeNamesMapping": {"33430f0f-ebfa-c3ec-f237-3e77efa03d0a": "End", "3f107997-04cc-8733-60a9-a4bb62cebffc": "Start", "17c9c895-5a16-7443-bb81-f34b30b21548": "Kill"}, "uuid": "330c70c8-33fb-16e1-68fb-c42582c7d178"}}""" + workflow = """{"history": {"oozie_id": "0000013-151015155856463-oozie-oozi-W", "properties": {"oozie.use.system.libpath": "True", "security_enabled": false, "dryrun": false, "jobTracker": "localhost:8032", "oozie.wf.application.path": "hdfs://localhost:8020/user/hue/oozie/workspaces/hue-oozie-1445431078.26", "hue-id-w": 6, "nameNode": "hdfs://localhost:8020"}}, "layout": [{"oozieRows": [], "rows": [{"enableOozieDropOnBefore": true, "enableOozieDropOnSide": true, "enableOozieDrop": false, "widgets": [{"status": "", "logsURL": "", "name": "Start", "widgetType": "start-widget", "oozieMovable": false, "ooziePropertiesExpanded": false, "properties": {}, "isLoading": true, "offset": 0, "actionURL": "", "progress": 0, "klass": "card card-widget span12", "oozieExpanded": false, "id": "3f107997-04cc-8733-60a9-a4bb62cebffc", "size": 12}], "id": "9cf57679-292c-d980-8053-1180a84eaa54", "columns": []}, {"enableOozieDropOnBefore": true, "enableOozieDropOnSide": true, "enableOozieDrop": false, "widgets": [{"status": "", "logsURL": "", "name": "End", "widgetType": "end-widget", "oozieMovable": false, "ooziePropertiesExpanded": false, "properties": {}, "isLoading": true, "offset": 0, "actionURL": "", "progress": 0, "klass": "card card-widget span12", "oozieExpanded": false, "id": "33430f0f-ebfa-c3ec-f237-3e77efa03d0a", "size": 12}], "id": "f8f22c81-a9eb-5138-64cf-014ae588d0ca", "columns": []}, {"enableOozieDropOnBefore": true, "enableOozieDropOnSide": true, "enableOozieDrop": false, "widgets": [{"status": "", "logsURL": "", "name": "Kill", "widgetType": "kill-widget", "oozieMovable": true, "ooziePropertiesExpanded": false, "properties": {}, "isLoading": true, "offset": 0, "actionURL": "", "progress": 0, "klass": "card card-widget span12", "oozieExpanded": false, "id": "17c9c895-5a16-7443-bb81-f34b30b21548", "size": 12}], "id": "31f194ff-cd4f-faef-652d-0c5f66a80f97", "columns": []}], "oozieEndRow": {"enableOozieDropOnBefore": true, "enableOozieDropOnSide": true, "enableOozieDrop": false, "widgets": [{"status": "", "logsURL": "", "name": "End", "widgetType": "end-widget", "oozieMovable": false, "ooziePropertiesExpanded": false, "properties": {}, "isLoading": true, "offset": 0, "actionURL": "", "progress": 0, "klass": "card card-widget span12", "oozieExpanded": false, "id": "33430f0f-ebfa-c3ec-f237-3e77efa03d0a", "size": 12}], "id": "f8f22c81-a9eb-5138-64cf-014ae588d0ca", "columns": []}, "oozieKillRow": {"enableOozieDropOnBefore": true, "enableOozieDropOnSide": true, "enableOozieDrop": false, "widgets": [{"status": "", "logsURL": "", "name": "Kill", "widgetType": "kill-widget", "oozieMovable": true, "ooziePropertiesExpanded": false, "properties": {}, "isLoading": true, "offset": 0, "actionURL": "", "progress": 0, "klass": "card card-widget span12", "oozieExpanded": false, "id": "17c9c895-5a16-7443-bb81-f34b30b21548", "size": 12}], "id": "31f194ff-cd4f-faef-652d-0c5f66a80f97", "columns": []}, "enableOozieDropOnAfter": true, "oozieStartRow": {"enableOozieDropOnBefore": true, "enableOozieDropOnSide": true, "enableOozieDrop": false, "widgets": [{"status": "", "logsURL": "", "name": "Start", "widgetType": "start-widget", "oozieMovable": false, "ooziePropertiesExpanded": false, "properties": {}, "isLoading": true, "offset": 0, "actionURL": "", "progress": 0, "klass": "card card-widget span12", "oozieExpanded": false, "id": "3f107997-04cc-8733-60a9-a4bb62cebffc", "size": 12}], "id": "9cf57679-292c-d980-8053-1180a84eaa54", "columns": []}, "klass": "card card-home card-column span12", "enableOozieDropOnBefore": true, "drops": ["temp"], "id": "1920900a-a735-7e66-61d4-23de384e8f62", "size": 12}], "workflow": {"properties": {"job_xml": "", "description": "", "wf1_id": null, "sla_enabled": false, "deployment_dir": "/user/hue/oozie/workspaces/hue-oozie-1445431078.26", "schema_version": "uri:oozie:workflow:0.5", "properties": [], "show_arrows": true, "parameters": [{"name": "oozie.use.system.libpath", "value": true}], "sla": [{"value": false, "key": "enabled"}, {"value": "${nominal_time}", "key": "nominal-time"}, {"value": "", "key": "should-start"}, {"value": "${30 * MINUTES}", "key": "should-end"}, {"value": "", "key": "max-duration"}, {"value": "", "key": "alert-events"}, {"value": "", "key": "alert-contact"}, {"value": "", "key": "notification-msg"}, {"value": "", "key": "upstream-apps"}]}, "name": "My real Workflow 1", "versions": ["uri:oozie:workflow:0.4", "uri:oozie:workflow:0.4.5", "uri:oozie:workflow:0.5"], "isDirty": false, "movedNode": null, "linkMapping": {"33430f0f-ebfa-c3ec-f237-3e77efa03d0a": [], "3f107997-04cc-8733-60a9-a4bb62cebffc": ["33430f0f-ebfa-c3ec-f237-3e77efa03d0a"], "17c9c895-5a16-7443-bb81-f34b30b21548": []}, "nodeIds": ["3f107997-04cc-8733-60a9-a4bb62cebffc", "33430f0f-ebfa-c3ec-f237-3e77efa03d0a", "17c9c895-5a16-7443-bb81-f34b30b21548"], "nodes": [{"properties": {}, "name": "Start", "children": [{"to": "33430f0f-ebfa-c3ec-f237-3e77efa03d0a"}], "actionParametersFetched": false, "type": "start-widget", "id": "3f107997-04cc-8733-60a9-a4bb62cebffc", "actionParameters": []}, {"properties": {}, "name": "End", "children": [], "actionParametersFetched": false, "type": "end-widget", "id": "33430f0f-ebfa-c3ec-f237-3e77efa03d0a", "actionParameters": []}, {"properties": {"body": "This\\n\\ncontains\\n\\n\\nnew lines.", "bcc": "example@bcc.com", "content_type": "text/plain", "cc": "", "to": "hue@gethue.com", "enableMail": true, "message": "Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]", "subject": "Error on workflow"}, "name": "Kill", "children": [], "actionParametersFetched": false, "type": "kill-widget", "id": "17c9c895-5a16-7443-bb81-f34b30b21548", "actionParameters": []}], "id": 50020, "nodeNamesMapping": {"33430f0f-ebfa-c3ec-f237-3e77efa03d0a": "End", "3f107997-04cc-8733-60a9-a4bb62cebffc": "Start", "17c9c895-5a16-7443-bb81-f34b30b21548": "Kill"}, "uuid": "330c70c8-33fb-16e1-68fb-c42582c7d178"}}""" # noqa: E501 wf = Workflow(data=workflow) - assert u'\n \n \n \n hue@gethue.com\n Error on workflow\n This\n\ncontains\n\n\nnew lines.\n \n \n \n \n \n Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]\n \n \n' == wf.to_xml({'output': '/path'}) + assert ( + '\n \n \n \n hue@gethue.com\n Error on workflow\n This\n\ncontains\n\n\nnew lines.\n \n \n \n \n \n Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]\n \n \n' # noqa: E501 + == wf.to_xml({'output': '/path'}) + ) def test_job_validate_xml_name(self): job = Workflow() @@ -236,8 +352,8 @@ def test_job_validate_xml_name(self): job.update_name('%a') assert '%a' == job.validated_name - job.update_name(u'你好') - assert u'你好' == job.validated_name + job.update_name('你好') + assert '你好' == job.validated_name job.update_name('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaz') assert len('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa') == len(job.validated_name) @@ -246,27 +362,50 @@ def test_job_validate_xml_name(self): assert 'My <...> 1st W$rkflow [With] (Bad) lette' == job.validated_name def test_ignore_dead_fork_link(self): - data = {'id': 1, 'type': 'fork', 'children': [{'to': 1, 'id': 1}, {'to': 2, 'id': 2}], 'properties': {}, 'name': 'my-fork'} # to --> 2 does not exist + data = { + 'id': 1, + 'type': 'fork', + 'children': [{'to': 1, 'id': 1}, {'to': 2, 'id': 2}], + 'properties': {}, + 'name': 'my-fork', + } # to --> 2 does not exist fork = Node(data) - node_mapping = {1: fork} # Point to ourself + node_mapping = {1: fork} # Point to ourself assert ['', '', ''] == fork.to_xml(node_mapping=node_mapping).split() def test_action_gen_xml_prepare(self): # Prepare has a value data = { - u'properties': { - u'files': [], u'job_xml': [], u'parameters': [], u'retry_interval': [], u'retry_max': [], u'job_properties': [], u'arguments': [], - u'prepares': [{u'type': u'mkdir', u'value': u'/my_dir'}], - u'credentials': [], u'script_path': u'my_pig.pig', - u'sla': [{u'key': u'enabled', u'value': False}, {u'key': u'nominal-time', u'value': u'${nominal_time}'}, {u'key': u'should-start', u'value': u''}, {u'key': u'should-end', u'value': u'${30 * MINUTES}'}, {u'key': u'max-duration', u'value': u''}, {u'key': u'alert-events', u'value': u''}, {u'key': u'alert-contact', u'value': u''}, {u'key': u'notification-msg', u'value': u''}, {u'key': u'upstream-apps', u'value': u''}], - u'archives': [] - }, - u'type': u'pig-widget', - u'id': u'c59d1947-7ce0-ef34-22b2-d64b9fc5bf9a', - u'name': u'pig-c59d', - "children":[{"to": "c59d1947-7ce0-ef34-22b2-d64b9fc5bf9a"}, {"error": "c59d1947-7ce0-ef34-22b2-d64b9fc5bf9a"}] + 'properties': { + 'files': [], + 'job_xml': [], + 'parameters': [], + 'retry_interval': [], + 'retry_max': [], + 'job_properties': [], + 'arguments': [], + 'prepares': [{'type': 'mkdir', 'value': '/my_dir'}], + 'credentials': [], + 'script_path': 'my_pig.pig', + 'sla': [ + {'key': 'enabled', 'value': False}, + {'key': 'nominal-time', 'value': '${nominal_time}'}, + {'key': 'should-start', 'value': ''}, + {'key': 'should-end', 'value': '${30 * MINUTES}'}, + {'key': 'max-duration', 'value': ''}, + {'key': 'alert-events', 'value': ''}, + {'key': 'alert-contact', 'value': ''}, + {'key': 'notification-msg', 'value': ''}, + {'key': 'upstream-apps', 'value': ''}, + ], + 'archives': [], + }, + 'type': 'pig-widget', + 'id': 'c59d1947-7ce0-ef34-22b2-d64b9fc5bf9a', + 'name': 'pig-c59d', + "children": [{"to": "c59d1947-7ce0-ef34-22b2-d64b9fc5bf9a"}, {"error": "c59d1947-7ce0-ef34-22b2-d64b9fc5bf9a"}], } pig_node = Node(data) @@ -275,58 +414,98 @@ def test_action_gen_xml_prepare(self): xml = pig_node.to_xml(node_mapping=node_mapping) xml = [row.strip() for row in xml.split()] - assert u'' in xml, xml - assert u'' in xml, xml + assert '' in xml, xml + assert '' in xml, xml # Prepare has empty value and is skipped - pig_node.data['properties']['prepares'] = [{u'type': u'mkdir', u'value': u''}] + pig_node.data['properties']['prepares'] = [{'type': 'mkdir', 'value': ''}] xml = pig_node.to_xml(node_mapping=node_mapping) xml = [row.strip() for row in xml.split()] - assert not u'' in xml, xml - assert not u'' not in xml, xml + assert '' in xml, xml - assert u'' in xml, xml + assert '' in xml, xml + assert '' in xml, xml - assert not u'[{u'value': u'-debug -Da -Db=1'}]" in xml, xml + assert "[{u'value': u'-debug -Da -Db=1'}]" not in xml, xml assert "-debug -Da -Db=1" in xml, xml def test_workflow_create_single_action_data(self): - workflow = Workflow(data="{\"layout\": [{\"oozieRows\": [{\"enableOozieDropOnBefore\": true, \"enableOozieDropOnSide\": true, \"enableOozieDrop\": false, \"widgets\": [{\"status\": \"\", \"logsURL\": \"\", \"name\": \"MapReduce job\", \"widgetType\": \"mapreduce-widget\", \"oozieMovable\": true, \"ooziePropertiesExpanded\": false, \"properties\": {}, \"isLoading\": true, \"offset\": 0, \"actionURL\": \"\", \"progress\": 0, \"klass\": \"card card-widget span12\", \"oozieExpanded\": false, \"id\": \"0cf2d5d5-2315-0bda-bd53-0eec257e943f\", \"size\": 12}], \"id\": \"e2caca14-8afc-d7e0-287c-88accd0b4253\", \"columns\": []}], \"rows\": [{\"enableOozieDropOnBefore\": true, \"enableOozieDropOnSide\": true, \"enableOozieDrop\": false, \"widgets\": [{\"status\": \"\", \"logsURL\": \"\", \"name\": \"Start\", \"widgetType\": \"start-widget\", \"oozieMovable\": false, \"ooziePropertiesExpanded\": false, \"properties\": {}, \"isLoading\": true, \"offset\": 0, \"actionURL\": \"\", \"progress\": 0, \"klass\": \"card card-widget span12\", \"oozieExpanded\": false, \"id\": \"3f107997-04cc-8733-60a9-a4bb62cebffc\", \"size\": 12}], \"id\": \"ff63ee3f-df54-2fa3-477b-65f5e0f0632c\", \"columns\": []}, {\"enableOozieDropOnBefore\": true, \"enableOozieDropOnSide\": true, \"enableOozieDrop\": false, \"widgets\": [{\"status\": \"\", \"logsURL\": \"\", \"name\": \"MapReduce job\", \"widgetType\": \"mapreduce-widget\", \"oozieMovable\": true, \"ooziePropertiesExpanded\": false, \"properties\": {}, \"isLoading\": true, \"offset\": 0, \"actionURL\": \"\", \"progress\": 0, \"klass\": \"card card-widget span12\", \"oozieExpanded\": false, \"id\": \"0cf2d5d5-2315-0bda-bd53-0eec257e943f\", \"size\": 12}], \"id\": \"e2caca14-8afc-d7e0-287c-88accd0b4253\", \"columns\": []}, {\"enableOozieDropOnBefore\": true, \"enableOozieDropOnSide\": true, \"enableOozieDrop\": false, \"widgets\": [{\"status\": \"\", \"logsURL\": \"\", \"name\": \"End\", \"widgetType\": \"end-widget\", \"oozieMovable\": false, \"ooziePropertiesExpanded\": false, \"properties\": {}, \"isLoading\": true, \"offset\": 0, \"actionURL\": \"\", \"progress\": 0, \"klass\": \"card card-widget span12\", \"oozieExpanded\": false, \"id\": \"33430f0f-ebfa-c3ec-f237-3e77efa03d0a\", \"size\": 12}], \"id\": \"6a13d869-d04c-8431-6c5c-dbe67ea33889\", \"columns\": []}, {\"enableOozieDropOnBefore\": true, \"enableOozieDropOnSide\": true, \"enableOozieDrop\": false, \"widgets\": [{\"status\": \"\", \"logsURL\": \"\", \"name\": \"Kill\", \"widgetType\": \"kill-widget\", \"oozieMovable\": true, \"ooziePropertiesExpanded\": false, \"properties\": {}, \"isLoading\": true, \"offset\": 0, \"actionURL\": \"\", \"progress\": 0, \"klass\": \"card card-widget span12\", \"oozieExpanded\": false, \"id\": \"17c9c895-5a16-7443-bb81-f34b30b21548\", \"size\": 12}], \"id\": \"e3b56553-7a4f-43d2-b1e2-4dc433280095\", \"columns\": []}], \"oozieEndRow\": {\"enableOozieDropOnBefore\": true, \"enableOozieDropOnSide\": true, \"enableOozieDrop\": false, \"widgets\": [{\"status\": \"\", \"logsURL\": \"\", \"name\": \"End\", \"widgetType\": \"end-widget\", \"oozieMovable\": false, \"ooziePropertiesExpanded\": false, \"properties\": {}, \"isLoading\": true, \"offset\": 0, \"actionURL\": \"\", \"progress\": 0, \"klass\": \"card card-widget span12\", \"oozieExpanded\": false, \"id\": \"33430f0f-ebfa-c3ec-f237-3e77efa03d0a\", \"size\": 12}], \"id\": \"6a13d869-d04c-8431-6c5c-dbe67ea33889\", \"columns\": []}, \"oozieKillRow\": {\"enableOozieDropOnBefore\": true, \"enableOozieDropOnSide\": true, \"enableOozieDrop\": false, \"widgets\": [{\"status\": \"\", \"logsURL\": \"\", \"name\": \"Kill\", \"widgetType\": \"kill-widget\", \"oozieMovable\": true, \"ooziePropertiesExpanded\": false, \"properties\": {}, \"isLoading\": true, \"offset\": 0, \"actionURL\": \"\", \"progress\": 0, \"klass\": \"card card-widget span12\", \"oozieExpanded\": false, \"id\": \"17c9c895-5a16-7443-bb81-f34b30b21548\", \"size\": 12}], \"id\": \"e3b56553-7a4f-43d2-b1e2-4dc433280095\", \"columns\": []}, \"enableOozieDropOnAfter\": true, \"oozieStartRow\": {\"enableOozieDropOnBefore\": true, \"enableOozieDropOnSide\": true, \"enableOozieDrop\": false, \"widgets\": [{\"status\": \"\", \"logsURL\": \"\", \"name\": \"Start\", \"widgetType\": \"start-widget\", \"oozieMovable\": false, \"ooziePropertiesExpanded\": false, \"properties\": {}, \"isLoading\": true, \"offset\": 0, \"actionURL\": \"\", \"progress\": 0, \"klass\": \"card card-widget span12\", \"oozieExpanded\": false, \"id\": \"3f107997-04cc-8733-60a9-a4bb62cebffc\", \"size\": 12}], \"id\": \"ff63ee3f-df54-2fa3-477b-65f5e0f0632c\", \"columns\": []}, \"klass\": \"card card-home card-column span12\", \"enableOozieDropOnBefore\": true, \"drops\": [\"temp\"], \"id\": \"0c1908e7-0096-46e7-a16b-b17b1142a730\", \"size\": 12}], \"workflow\": {\"properties\": {\"job_xml\": \"\", \"description\": \"\", \"wf1_id\": null, \"sla_enabled\": false, \"deployment_dir\": \"/user/hue/oozie/workspaces/hue-oozie-1430228904.58\", \"schema_version\": \"uri:oozie:workflow:0.5\", \"sla\": [{\"key\": \"enabled\", \"value\": false}, {\"key\": \"nominal-time\", \"value\": \"${nominal_time}\"}, {\"key\": \"should-start\", \"value\": \"\"}, {\"key\": \"should-end\", \"value\": \"${30 * MINUTES}\"}, {\"key\": \"max-duration\", \"value\": \"\"}, {\"key\": \"alert-events\", \"value\": \"\"}, {\"key\": \"alert-contact\", \"value\": \"\"}, {\"key\": \"notification-msg\", \"value\": \"\"}, {\"key\": \"upstream-apps\", \"value\": \"\"}], \"show_arrows\": true, \"parameters\": [{\"name\": \"oozie.use.system.libpath\", \"value\": true}], \"properties\": []}, \"name\": \"My Workflow\", \"versions\": [\"uri:oozie:workflow:0.4\", \"uri:oozie:workflow:0.4.5\", \"uri:oozie:workflow:0.5\"], \"isDirty\": true, \"movedNode\": null, \"linkMapping\": {\"0cf2d5d5-2315-0bda-bd53-0eec257e943f\": [\"33430f0f-ebfa-c3ec-f237-3e77efa03d0a\"], \"33430f0f-ebfa-c3ec-f237-3e77efa03d0a\": [], \"3f107997-04cc-8733-60a9-a4bb62cebffc\": [\"0cf2d5d5-2315-0bda-bd53-0eec257e943f\"], \"17c9c895-5a16-7443-bb81-f34b30b21548\": []}, \"nodeIds\": [\"3f107997-04cc-8733-60a9-a4bb62cebffc\", \"33430f0f-ebfa-c3ec-f237-3e77efa03d0a\", \"17c9c895-5a16-7443-bb81-f34b30b21548\", \"0cf2d5d5-2315-0bda-bd53-0eec257e943f\"], \"nodes\": [{\"properties\": {}, \"name\": \"Start\", \"children\": [{\"to\": \"0cf2d5d5-2315-0bda-bd53-0eec257e943f\"}], \"actionParametersFetched\": false, \"type\": \"start-widget\", \"id\": \"3f107997-04cc-8733-60a9-a4bb62cebffc\", \"actionParameters\": []}, {\"properties\": {}, \"name\": \"End\", \"children\": [], \"actionParametersFetched\": false, \"type\": \"end-widget\", \"id\": \"33430f0f-ebfa-c3ec-f237-3e77efa03d0a\", \"actionParameters\": []}, {\"properties\": {\"message\": \"Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]\"}, \"name\": \"Kill\", \"children\": [], \"actionParametersFetched\": false, \"type\": \"kill-widget\", \"id\": \"17c9c895-5a16-7443-bb81-f34b30b21548\", \"actionParameters\": []}, {\"properties\": {\"retry_max\": [{\"value\": \"5\"}], \"files\": [], \"job_xml\": \"\", \"jar_path\": \"my_jar\", \"job_properties\": [{\"name\": \"prop_1_name\", \"value\": \"prop_1_value\"}], \"archives\": [], \"prepares\": [], \"credentials\": [], \"sla\": [{\"key\": \"enabled\", \"value\": false}, {\"key\": \"nominal-time\", \"value\": \"${nominal_time}\"}, {\"key\": \"should-start\", \"value\": \"\"}, {\"key\": \"should-end\", \"value\": \"${30 * MINUTES}\"}, {\"key\": \"max-duration\", \"value\": \"\"}, {\"key\": \"alert-events\", \"value\": \"\"}, {\"key\": \"alert-contact\", \"value\": \"\"}, {\"key\": \"notification-msg\", \"value\": \"\"}, {\"key\": \"upstream-apps\", \"value\": \"\"}]}, \"name\": \"mapreduce-0cf2\", \"children\": [{\"to\": \"33430f0f-ebfa-c3ec-f237-3e77efa03d0a\"}, {\"error\": \"17c9c895-5a16-7443-bb81-f34b30b21548\"}], \"actionParametersFetched\": false, \"type\": \"mapreduce-widget\", \"id\": \"0cf2d5d5-2315-0bda-bd53-0eec257e943f\", \"actionParameters\": []}], \"id\": 50019, \"nodeNamesMapping\": {\"0cf2d5d5-2315-0bda-bd53-0eec257e943f\": \"mapreduce-0cf2\", \"33430f0f-ebfa-c3ec-f237-3e77efa03d0a\": \"End\", \"3f107997-04cc-8733-60a9-a4bb62cebffc\": \"Start\", \"17c9c895-5a16-7443-bb81-f34b30b21548\": \"Kill\"}, \"uuid\": \"084f4d4c-00f1-62d2-e27e-e153c1f9acfb\"}}") + workflow = Workflow( + data="{\"layout\": [{\"oozieRows\": [{\"enableOozieDropOnBefore\": true, \"enableOozieDropOnSide\": true, \"enableOozieDrop\": false, \"widgets\": [{\"status\": \"\", \"logsURL\": \"\", \"name\": \"MapReduce job\", \"widgetType\": \"mapreduce-widget\", \"oozieMovable\": true, \"ooziePropertiesExpanded\": false, \"properties\": {}, \"isLoading\": true, \"offset\": 0, \"actionURL\": \"\", \"progress\": 0, \"klass\": \"card card-widget span12\", \"oozieExpanded\": false, \"id\": \"0cf2d5d5-2315-0bda-bd53-0eec257e943f\", \"size\": 12}], \"id\": \"e2caca14-8afc-d7e0-287c-88accd0b4253\", \"columns\": []}], \"rows\": [{\"enableOozieDropOnBefore\": true, \"enableOozieDropOnSide\": true, \"enableOozieDrop\": false, \"widgets\": [{\"status\": \"\", \"logsURL\": \"\", \"name\": \"Start\", \"widgetType\": \"start-widget\", \"oozieMovable\": false, \"ooziePropertiesExpanded\": false, \"properties\": {}, \"isLoading\": true, \"offset\": 0, \"actionURL\": \"\", \"progress\": 0, \"klass\": \"card card-widget span12\", \"oozieExpanded\": false, \"id\": \"3f107997-04cc-8733-60a9-a4bb62cebffc\", \"size\": 12}], \"id\": \"ff63ee3f-df54-2fa3-477b-65f5e0f0632c\", \"columns\": []}, {\"enableOozieDropOnBefore\": true, \"enableOozieDropOnSide\": true, \"enableOozieDrop\": false, \"widgets\": [{\"status\": \"\", \"logsURL\": \"\", \"name\": \"MapReduce job\", \"widgetType\": \"mapreduce-widget\", \"oozieMovable\": true, \"ooziePropertiesExpanded\": false, \"properties\": {}, \"isLoading\": true, \"offset\": 0, \"actionURL\": \"\", \"progress\": 0, \"klass\": \"card card-widget span12\", \"oozieExpanded\": false, \"id\": \"0cf2d5d5-2315-0bda-bd53-0eec257e943f\", \"size\": 12}], \"id\": \"e2caca14-8afc-d7e0-287c-88accd0b4253\", \"columns\": []}, {\"enableOozieDropOnBefore\": true, \"enableOozieDropOnSide\": true, \"enableOozieDrop\": false, \"widgets\": [{\"status\": \"\", \"logsURL\": \"\", \"name\": \"End\", \"widgetType\": \"end-widget\", \"oozieMovable\": false, \"ooziePropertiesExpanded\": false, \"properties\": {}, \"isLoading\": true, \"offset\": 0, \"actionURL\": \"\", \"progress\": 0, \"klass\": \"card card-widget span12\", \"oozieExpanded\": false, \"id\": \"33430f0f-ebfa-c3ec-f237-3e77efa03d0a\", \"size\": 12}], \"id\": \"6a13d869-d04c-8431-6c5c-dbe67ea33889\", \"columns\": []}, {\"enableOozieDropOnBefore\": true, \"enableOozieDropOnSide\": true, \"enableOozieDrop\": false, \"widgets\": [{\"status\": \"\", \"logsURL\": \"\", \"name\": \"Kill\", \"widgetType\": \"kill-widget\", \"oozieMovable\": true, \"ooziePropertiesExpanded\": false, \"properties\": {}, \"isLoading\": true, \"offset\": 0, \"actionURL\": \"\", \"progress\": 0, \"klass\": \"card card-widget span12\", \"oozieExpanded\": false, \"id\": \"17c9c895-5a16-7443-bb81-f34b30b21548\", \"size\": 12}], \"id\": \"e3b56553-7a4f-43d2-b1e2-4dc433280095\", \"columns\": []}], \"oozieEndRow\": {\"enableOozieDropOnBefore\": true, \"enableOozieDropOnSide\": true, \"enableOozieDrop\": false, \"widgets\": [{\"status\": \"\", \"logsURL\": \"\", \"name\": \"End\", \"widgetType\": \"end-widget\", \"oozieMovable\": false, \"ooziePropertiesExpanded\": false, \"properties\": {}, \"isLoading\": true, \"offset\": 0, \"actionURL\": \"\", \"progress\": 0, \"klass\": \"card card-widget span12\", \"oozieExpanded\": false, \"id\": \"33430f0f-ebfa-c3ec-f237-3e77efa03d0a\", \"size\": 12}], \"id\": \"6a13d869-d04c-8431-6c5c-dbe67ea33889\", \"columns\": []}, \"oozieKillRow\": {\"enableOozieDropOnBefore\": true, \"enableOozieDropOnSide\": true, \"enableOozieDrop\": false, \"widgets\": [{\"status\": \"\", \"logsURL\": \"\", \"name\": \"Kill\", \"widgetType\": \"kill-widget\", \"oozieMovable\": true, \"ooziePropertiesExpanded\": false, \"properties\": {}, \"isLoading\": true, \"offset\": 0, \"actionURL\": \"\", \"progress\": 0, \"klass\": \"card card-widget span12\", \"oozieExpanded\": false, \"id\": \"17c9c895-5a16-7443-bb81-f34b30b21548\", \"size\": 12}], \"id\": \"e3b56553-7a4f-43d2-b1e2-4dc433280095\", \"columns\": []}, \"enableOozieDropOnAfter\": true, \"oozieStartRow\": {\"enableOozieDropOnBefore\": true, \"enableOozieDropOnSide\": true, \"enableOozieDrop\": false, \"widgets\": [{\"status\": \"\", \"logsURL\": \"\", \"name\": \"Start\", \"widgetType\": \"start-widget\", \"oozieMovable\": false, \"ooziePropertiesExpanded\": false, \"properties\": {}, \"isLoading\": true, \"offset\": 0, \"actionURL\": \"\", \"progress\": 0, \"klass\": \"card card-widget span12\", \"oozieExpanded\": false, \"id\": \"3f107997-04cc-8733-60a9-a4bb62cebffc\", \"size\": 12}], \"id\": \"ff63ee3f-df54-2fa3-477b-65f5e0f0632c\", \"columns\": []}, \"klass\": \"card card-home card-column span12\", \"enableOozieDropOnBefore\": true, \"drops\": [\"temp\"], \"id\": \"0c1908e7-0096-46e7-a16b-b17b1142a730\", \"size\": 12}], \"workflow\": {\"properties\": {\"job_xml\": \"\", \"description\": \"\", \"wf1_id\": null, \"sla_enabled\": false, \"deployment_dir\": \"/user/hue/oozie/workspaces/hue-oozie-1430228904.58\", \"schema_version\": \"uri:oozie:workflow:0.5\", \"sla\": [{\"key\": \"enabled\", \"value\": false}, {\"key\": \"nominal-time\", \"value\": \"${nominal_time}\"}, {\"key\": \"should-start\", \"value\": \"\"}, {\"key\": \"should-end\", \"value\": \"${30 * MINUTES}\"}, {\"key\": \"max-duration\", \"value\": \"\"}, {\"key\": \"alert-events\", \"value\": \"\"}, {\"key\": \"alert-contact\", \"value\": \"\"}, {\"key\": \"notification-msg\", \"value\": \"\"}, {\"key\": \"upstream-apps\", \"value\": \"\"}], \"show_arrows\": true, \"parameters\": [{\"name\": \"oozie.use.system.libpath\", \"value\": true}], \"properties\": []}, \"name\": \"My Workflow\", \"versions\": [\"uri:oozie:workflow:0.4\", \"uri:oozie:workflow:0.4.5\", \"uri:oozie:workflow:0.5\"], \"isDirty\": true, \"movedNode\": null, \"linkMapping\": {\"0cf2d5d5-2315-0bda-bd53-0eec257e943f\": [\"33430f0f-ebfa-c3ec-f237-3e77efa03d0a\"], \"33430f0f-ebfa-c3ec-f237-3e77efa03d0a\": [], \"3f107997-04cc-8733-60a9-a4bb62cebffc\": [\"0cf2d5d5-2315-0bda-bd53-0eec257e943f\"], \"17c9c895-5a16-7443-bb81-f34b30b21548\": []}, \"nodeIds\": [\"3f107997-04cc-8733-60a9-a4bb62cebffc\", \"33430f0f-ebfa-c3ec-f237-3e77efa03d0a\", \"17c9c895-5a16-7443-bb81-f34b30b21548\", \"0cf2d5d5-2315-0bda-bd53-0eec257e943f\"], \"nodes\": [{\"properties\": {}, \"name\": \"Start\", \"children\": [{\"to\": \"0cf2d5d5-2315-0bda-bd53-0eec257e943f\"}], \"actionParametersFetched\": false, \"type\": \"start-widget\", \"id\": \"3f107997-04cc-8733-60a9-a4bb62cebffc\", \"actionParameters\": []}, {\"properties\": {}, \"name\": \"End\", \"children\": [], \"actionParametersFetched\": false, \"type\": \"end-widget\", \"id\": \"33430f0f-ebfa-c3ec-f237-3e77efa03d0a\", \"actionParameters\": []}, {\"properties\": {\"message\": \"Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]\"}, \"name\": \"Kill\", \"children\": [], \"actionParametersFetched\": false, \"type\": \"kill-widget\", \"id\": \"17c9c895-5a16-7443-bb81-f34b30b21548\", \"actionParameters\": []}, {\"properties\": {\"retry_max\": [{\"value\": \"5\"}], \"files\": [], \"job_xml\": \"\", \"jar_path\": \"my_jar\", \"job_properties\": [{\"name\": \"prop_1_name\", \"value\": \"prop_1_value\"}], \"archives\": [], \"prepares\": [], \"credentials\": [], \"sla\": [{\"key\": \"enabled\", \"value\": false}, {\"key\": \"nominal-time\", \"value\": \"${nominal_time}\"}, {\"key\": \"should-start\", \"value\": \"\"}, {\"key\": \"should-end\", \"value\": \"${30 * MINUTES}\"}, {\"key\": \"max-duration\", \"value\": \"\"}, {\"key\": \"alert-events\", \"value\": \"\"}, {\"key\": \"alert-contact\", \"value\": \"\"}, {\"key\": \"notification-msg\", \"value\": \"\"}, {\"key\": \"upstream-apps\", \"value\": \"\"}]}, \"name\": \"mapreduce-0cf2\", \"children\": [{\"to\": \"33430f0f-ebfa-c3ec-f237-3e77efa03d0a\"}, {\"error\": \"17c9c895-5a16-7443-bb81-f34b30b21548\"}], \"actionParametersFetched\": false, \"type\": \"mapreduce-widget\", \"id\": \"0cf2d5d5-2315-0bda-bd53-0eec257e943f\", \"actionParameters\": []}], \"id\": 50019, \"nodeNamesMapping\": {\"0cf2d5d5-2315-0bda-bd53-0eec257e943f\": \"mapreduce-0cf2\", \"33430f0f-ebfa-c3ec-f237-3e77efa03d0a\": \"End\", \"3f107997-04cc-8733-60a9-a4bb62cebffc\": \"Start\", \"17c9c895-5a16-7443-bb81-f34b30b21548\": \"Kill\"}, \"uuid\": \"084f4d4c-00f1-62d2-e27e-e153c1f9acfb\"}}" # noqa: E501 + ) single_action_wf_data = workflow.create_single_action_workflow_data('0cf2d5d5-2315-0bda-bd53-0eec257e943f') single_action_wf = Workflow(data=single_action_wf_data) @@ -348,7 +527,7 @@ def test_submit_single_action(self): reset = ENABLE_V2.set_for_testing(True) try: response = self.c.get(reverse('oozie:submit_single_action', args=[wf_doc.id, '3f107997-04cc-8733-60a9-a4bb62cebabc'])) - assert [{'name':'Dryrun', 'value': False}, {'name':'ls_arg', 'value': '-l'}] == response.context[0]._data['params_form'].initial + assert [{'name': 'Dryrun', 'value': False}, {'name': 'ls_arg', 'value': '-l'}] == response.context[0]._data['params_form'].initial except Exception as ex: logging.exception(ex) finally: @@ -361,7 +540,7 @@ def test_list_bundles_page(self): assert 'bundles_json' in response.context[0]._data, response.context def test_workflow_dependencies(self): - wf_data = """{"layout": [{"oozieRows": [{"enableOozieDropOnBefore": true, "enableOozieDropOnSide": true, "enableOozieDrop": false, "widgets": [{"status": "", "logsURL": "", "name": "Sub workflow", "widgetType": "subworkflow-widget", "oozieMovable": true, "ooziePropertiesExpanded": false, "externalIdUrl": "", "properties": {}, "isLoading": true, "offset": 0, "actionURL": "", "progress": 0, "klass": "card card-widget span12", "oozieExpanded": false, "id": "9a24c7b1-b031-15d6-4086-e8af63be7ed4", "size": 12}], "id": "a566315f-e0e0-f408-fabd-c4576cc4041d", "columns": []}], "rows": [{"enableOozieDropOnBefore": true, "enableOozieDropOnSide": true, "enableOozieDrop": false, "widgets": [{"status": "", "logsURL": "", "name": "Start", "widgetType": "start-widget", "oozieMovable": false, "ooziePropertiesExpanded": false, "externalIdUrl": "", "properties": {}, "isLoading": true, "offset": 0, "actionURL": "", "progress": 0, "klass": "card card-widget span12", "oozieExpanded": false, "id": "3f107997-04cc-8733-60a9-a4bb62cebffc", "size": 12}], "id": "ec1fbd7f-ff6c-95eb-a865-ed3a3a00fc59", "columns": []}, {"enableOozieDropOnBefore": true, "enableOozieDropOnSide": true, "enableOozieDrop": false, "widgets": [{"status": "", "logsURL": "", "name": "Sub workflow", "widgetType": "subworkflow-widget", "oozieMovable": true, "ooziePropertiesExpanded": false, "externalIdUrl": "", "properties": {}, "isLoading": true, "offset": 0, "actionURL": "", "progress": 0, "klass": "card card-widget span12", "oozieExpanded": false, "id": "9a24c7b1-b031-15d6-4086-e8af63be7ed4", "size": 12}], "id": "a566315f-e0e0-f408-fabd-c4576cc4041d", "columns": []}, {"enableOozieDropOnBefore": true, "enableOozieDropOnSide": true, "enableOozieDrop": false, "widgets": [{"status": "", "logsURL": "", "name": "End", "widgetType": "end-widget", "oozieMovable": false, "ooziePropertiesExpanded": false, "externalIdUrl": "", "properties": {}, "isLoading": true, "offset": 0, "actionURL": "", "progress": 0, "klass": "card card-widget span12", "oozieExpanded": false, "id": "33430f0f-ebfa-c3ec-f237-3e77efa03d0a", "size": 12}], "id": "cd1a181a-9db0-c295-78e4-4d67ecedd057", "columns": []}, {"enableOozieDropOnBefore": true, "enableOozieDropOnSide": true, "enableOozieDrop": false, "widgets": [{"status": "", "logsURL": "", "name": "Kill", "widgetType": "kill-widget", "oozieMovable": true, "ooziePropertiesExpanded": false, "externalIdUrl": "", "properties": {}, "isLoading": true, "offset": 0, "actionURL": "", "progress": 0, "klass": "card card-widget span12", "oozieExpanded": false, "id": "17c9c895-5a16-7443-bb81-f34b30b21548", "size": 12}], "id": "caf2a089-c5d2-4a55-5b90-2a691be25884", "columns": []}], "oozieEndRow": {"enableOozieDropOnBefore": true, "enableOozieDropOnSide": true, "enableOozieDrop": false, "widgets": [{"status": "", "logsURL": "", "name": "End", "widgetType": "end-widget", "oozieMovable": false, "ooziePropertiesExpanded": false, "externalIdUrl": "", "properties": {}, "isLoading": true, "offset": 0, "actionURL": "", "progress": 0, "klass": "card card-widget span12", "oozieExpanded": false, "id": "33430f0f-ebfa-c3ec-f237-3e77efa03d0a", "size": 12}], "id": "cd1a181a-9db0-c295-78e4-4d67ecedd057", "columns": []}, "oozieKillRow": {"enableOozieDropOnBefore": true, "enableOozieDropOnSide": true, "enableOozieDrop": false, "widgets": [{"status": "", "logsURL": "", "name": "Kill", "widgetType": "kill-widget", "oozieMovable": true, "ooziePropertiesExpanded": false, "externalIdUrl": "", "properties": {}, "isLoading": true, "offset": 0, "actionURL": "", "progress": 0, "klass": "card card-widget span12", "oozieExpanded": false, "id": "17c9c895-5a16-7443-bb81-f34b30b21548", "size": 12}], "id": "caf2a089-c5d2-4a55-5b90-2a691be25884", "columns": []}, "enableOozieDropOnAfter": true, "oozieStartRow": {"enableOozieDropOnBefore": true, "enableOozieDropOnSide": true, "enableOozieDrop": false, "widgets": [{"status": "", "logsURL": "", "name": "Start", "widgetType": "start-widget", "oozieMovable": false, "ooziePropertiesExpanded": false, "externalIdUrl": "", "properties": {}, "isLoading": true, "offset": 0, "actionURL": "", "progress": 0, "klass": "card card-widget span12", "oozieExpanded": false, "id": "3f107997-04cc-8733-60a9-a4bb62cebffc", "size": 12}], "id": "ec1fbd7f-ff6c-95eb-a865-ed3a3a00fc59", "columns": []}, "klass": "card card-home card-column span12", "enableOozieDropOnBefore": true, "drops": ["temp"], "id": "f162ea58-e396-9703-c2b4-329bad4c9fa9", "size": 12}], "workflow": {"properties": {"job_xml": "", "description": "", "parameters": [{"name": "oozie.use.system.libpath", "value": true}], "sla_enabled": false, "deployment_dir": "/user/hue/oozie/workspaces/hue-oozie-1462236042.61", "schema_version": "uri:oozie:workflow:0.5", "sla": [{"value": false, "key": "enabled"}, {"value": "${nominal_time}", "key": "nominal-time"}, {"value": "", "key": "should-start"}, {"value": "${30 * MINUTES}", "key": "should-end"}, {"value": "", "key": "max-duration"}, {"value": "", "key": "alert-events"}, {"value": "", "key": "alert-contact"}, {"value": "", "key": "notification-msg"}, {"value": "", "key": "upstream-apps"}], "show_arrows": true, "wf1_id": null, "properties": []}, "name": "test-sub", "versions": ["uri:oozie:workflow:0.4", "uri:oozie:workflow:0.4.5", "uri:oozie:workflow:0.5"], "isDirty": true, "movedNode": null, "linkMapping": {"17c9c895-5a16-7443-bb81-f34b30b21548": [], "33430f0f-ebfa-c3ec-f237-3e77efa03d0a": [], "9a24c7b1-b031-15d6-4086-e8af63be7ed4": ["33430f0f-ebfa-c3ec-f237-3e77efa03d0a"], "3f107997-04cc-8733-60a9-a4bb62cebffc": ["9a24c7b1-b031-15d6-4086-e8af63be7ed4"]}, "nodeIds": ["3f107997-04cc-8733-60a9-a4bb62cebffc", "33430f0f-ebfa-c3ec-f237-3e77efa03d0a", "17c9c895-5a16-7443-bb81-f34b30b21548", "9a24c7b1-b031-15d6-4086-e8af63be7ed4"], "nodes": [{"properties": {"uuid": "7705a9dd-164e-67eb-8758-2573800c86e1", "workflow": "7705a9dd-164e-67eb-8758-2573800c86e6", "retry_interval": [], "retry_max": [], "job_properties": [], "credentials": [], "propagate_configuration": true, "sla": [{"key": "enabled", "value": false}, {"key": "nominal-time", "value": "${nominal_time}"}, {"key": "should-start", "value": ""}, {"key": "should-end", "value": "${30 * MINUTES}"}, {"key": "max-duration", "value": ""}, {"key": "alert-events", "value": ""}, {"key": "alert-contact", "value": ""}, {"key": "notification-msg", "value": ""}, {"key": "upstream-apps", "value": ""}]}, "name": "hive-sql", "children": [{"to": "33430f0f-ebfa-c3ec-f237-3e77efa03d0a"}, {"error": "17c9c895-5a16-7443-bb81-f34b30b21548"}], "actionParametersFetched": false, "type": "hive-document-widget", "id": "9a24c7b1-b031-15d6-4086-e8af63be7ed3", "actionParameters": []}, {"properties": {}, "name": "Start", "children": [{"to": "9a24c7b1-b031-15d6-4086-e8af63be7ed4"}], "actionParametersFetched": false, "type": "start-widget", "id": "3f107997-04cc-8733-60a9-a4bb62cebffc", "actionParameters": []}, {"properties": {}, "name": "End", "children": [], "actionParametersFetched": false, "type": "end-widget", "id": "33430f0f-ebfa-c3ec-f237-3e77efa03d0a", "actionParameters": []}, {"properties": {"body": "", "cc": "", "to": "", "enableMail": false, "message": "Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]", "subject": ""}, "name": "Kill", "children": [], "actionParametersFetched": false, "type": "kill-widget", "id": "17c9c895-5a16-7443-bb81-f34b30b21548", "actionParameters": []}, {"properties": {"workflow": "7705a9dd-164e-67eb-8758-2573800c86e5", "retry_interval": [], "retry_max": [], "job_properties": [], "credentials": [], "propagate_configuration": true, "sla": [{"value": false, "key": "enabled"}, {"value": "${nominal_time}", "key": "nominal-time"}, {"value": "", "key": "should-start"}, {"value": "${30 * MINUTES}", "key": "should-end"}, {"value": "", "key": "max-duration"}, {"value": "", "key": "alert-events"}, {"value": "", "key": "alert-contact"}, {"value": "", "key": "notification-msg"}, {"value": "", "key": "upstream-apps"}]}, "name": "subworkflow-9a24", "children": [{"to": "33430f0f-ebfa-c3ec-f237-3e77efa03d0a"}, {"error": "17c9c895-5a16-7443-bb81-f34b30b21548"}], "actionParametersFetched": false, "type": "subworkflow-widget", "id": "9a24c7b1-b031-15d6-4086-e8af63be7ed4", "actionParameters": []}], "id": null, "nodeNamesMapping": {"17c9c895-5a16-7443-bb81-f34b30b21548": "Kill", "33430f0f-ebfa-c3ec-f237-3e77efa03d0a": "End", "9a24c7b1-b031-15d6-4086-e8af63be7ed4": "subworkflow-9a24", "3f107997-04cc-8733-60a9-a4bb62cebffc": "Start"}, "uuid": "73c6219d-272f-db98-3cd9-d413ea2625ac"}}""" + wf_data = """{"layout": [{"oozieRows": [{"enableOozieDropOnBefore": true, "enableOozieDropOnSide": true, "enableOozieDrop": false, "widgets": [{"status": "", "logsURL": "", "name": "Sub workflow", "widgetType": "subworkflow-widget", "oozieMovable": true, "ooziePropertiesExpanded": false, "externalIdUrl": "", "properties": {}, "isLoading": true, "offset": 0, "actionURL": "", "progress": 0, "klass": "card card-widget span12", "oozieExpanded": false, "id": "9a24c7b1-b031-15d6-4086-e8af63be7ed4", "size": 12}], "id": "a566315f-e0e0-f408-fabd-c4576cc4041d", "columns": []}], "rows": [{"enableOozieDropOnBefore": true, "enableOozieDropOnSide": true, "enableOozieDrop": false, "widgets": [{"status": "", "logsURL": "", "name": "Start", "widgetType": "start-widget", "oozieMovable": false, "ooziePropertiesExpanded": false, "externalIdUrl": "", "properties": {}, "isLoading": true, "offset": 0, "actionURL": "", "progress": 0, "klass": "card card-widget span12", "oozieExpanded": false, "id": "3f107997-04cc-8733-60a9-a4bb62cebffc", "size": 12}], "id": "ec1fbd7f-ff6c-95eb-a865-ed3a3a00fc59", "columns": []}, {"enableOozieDropOnBefore": true, "enableOozieDropOnSide": true, "enableOozieDrop": false, "widgets": [{"status": "", "logsURL": "", "name": "Sub workflow", "widgetType": "subworkflow-widget", "oozieMovable": true, "ooziePropertiesExpanded": false, "externalIdUrl": "", "properties": {}, "isLoading": true, "offset": 0, "actionURL": "", "progress": 0, "klass": "card card-widget span12", "oozieExpanded": false, "id": "9a24c7b1-b031-15d6-4086-e8af63be7ed4", "size": 12}], "id": "a566315f-e0e0-f408-fabd-c4576cc4041d", "columns": []}, {"enableOozieDropOnBefore": true, "enableOozieDropOnSide": true, "enableOozieDrop": false, "widgets": [{"status": "", "logsURL": "", "name": "End", "widgetType": "end-widget", "oozieMovable": false, "ooziePropertiesExpanded": false, "externalIdUrl": "", "properties": {}, "isLoading": true, "offset": 0, "actionURL": "", "progress": 0, "klass": "card card-widget span12", "oozieExpanded": false, "id": "33430f0f-ebfa-c3ec-f237-3e77efa03d0a", "size": 12}], "id": "cd1a181a-9db0-c295-78e4-4d67ecedd057", "columns": []}, {"enableOozieDropOnBefore": true, "enableOozieDropOnSide": true, "enableOozieDrop": false, "widgets": [{"status": "", "logsURL": "", "name": "Kill", "widgetType": "kill-widget", "oozieMovable": true, "ooziePropertiesExpanded": false, "externalIdUrl": "", "properties": {}, "isLoading": true, "offset": 0, "actionURL": "", "progress": 0, "klass": "card card-widget span12", "oozieExpanded": false, "id": "17c9c895-5a16-7443-bb81-f34b30b21548", "size": 12}], "id": "caf2a089-c5d2-4a55-5b90-2a691be25884", "columns": []}], "oozieEndRow": {"enableOozieDropOnBefore": true, "enableOozieDropOnSide": true, "enableOozieDrop": false, "widgets": [{"status": "", "logsURL": "", "name": "End", "widgetType": "end-widget", "oozieMovable": false, "ooziePropertiesExpanded": false, "externalIdUrl": "", "properties": {}, "isLoading": true, "offset": 0, "actionURL": "", "progress": 0, "klass": "card card-widget span12", "oozieExpanded": false, "id": "33430f0f-ebfa-c3ec-f237-3e77efa03d0a", "size": 12}], "id": "cd1a181a-9db0-c295-78e4-4d67ecedd057", "columns": []}, "oozieKillRow": {"enableOozieDropOnBefore": true, "enableOozieDropOnSide": true, "enableOozieDrop": false, "widgets": [{"status": "", "logsURL": "", "name": "Kill", "widgetType": "kill-widget", "oozieMovable": true, "ooziePropertiesExpanded": false, "externalIdUrl": "", "properties": {}, "isLoading": true, "offset": 0, "actionURL": "", "progress": 0, "klass": "card card-widget span12", "oozieExpanded": false, "id": "17c9c895-5a16-7443-bb81-f34b30b21548", "size": 12}], "id": "caf2a089-c5d2-4a55-5b90-2a691be25884", "columns": []}, "enableOozieDropOnAfter": true, "oozieStartRow": {"enableOozieDropOnBefore": true, "enableOozieDropOnSide": true, "enableOozieDrop": false, "widgets": [{"status": "", "logsURL": "", "name": "Start", "widgetType": "start-widget", "oozieMovable": false, "ooziePropertiesExpanded": false, "externalIdUrl": "", "properties": {}, "isLoading": true, "offset": 0, "actionURL": "", "progress": 0, "klass": "card card-widget span12", "oozieExpanded": false, "id": "3f107997-04cc-8733-60a9-a4bb62cebffc", "size": 12}], "id": "ec1fbd7f-ff6c-95eb-a865-ed3a3a00fc59", "columns": []}, "klass": "card card-home card-column span12", "enableOozieDropOnBefore": true, "drops": ["temp"], "id": "f162ea58-e396-9703-c2b4-329bad4c9fa9", "size": 12}], "workflow": {"properties": {"job_xml": "", "description": "", "parameters": [{"name": "oozie.use.system.libpath", "value": true}], "sla_enabled": false, "deployment_dir": "/user/hue/oozie/workspaces/hue-oozie-1462236042.61", "schema_version": "uri:oozie:workflow:0.5", "sla": [{"value": false, "key": "enabled"}, {"value": "${nominal_time}", "key": "nominal-time"}, {"value": "", "key": "should-start"}, {"value": "${30 * MINUTES}", "key": "should-end"}, {"value": "", "key": "max-duration"}, {"value": "", "key": "alert-events"}, {"value": "", "key": "alert-contact"}, {"value": "", "key": "notification-msg"}, {"value": "", "key": "upstream-apps"}], "show_arrows": true, "wf1_id": null, "properties": []}, "name": "test-sub", "versions": ["uri:oozie:workflow:0.4", "uri:oozie:workflow:0.4.5", "uri:oozie:workflow:0.5"], "isDirty": true, "movedNode": null, "linkMapping": {"17c9c895-5a16-7443-bb81-f34b30b21548": [], "33430f0f-ebfa-c3ec-f237-3e77efa03d0a": [], "9a24c7b1-b031-15d6-4086-e8af63be7ed4": ["33430f0f-ebfa-c3ec-f237-3e77efa03d0a"], "3f107997-04cc-8733-60a9-a4bb62cebffc": ["9a24c7b1-b031-15d6-4086-e8af63be7ed4"]}, "nodeIds": ["3f107997-04cc-8733-60a9-a4bb62cebffc", "33430f0f-ebfa-c3ec-f237-3e77efa03d0a", "17c9c895-5a16-7443-bb81-f34b30b21548", "9a24c7b1-b031-15d6-4086-e8af63be7ed4"], "nodes": [{"properties": {"uuid": "7705a9dd-164e-67eb-8758-2573800c86e1", "workflow": "7705a9dd-164e-67eb-8758-2573800c86e6", "retry_interval": [], "retry_max": [], "job_properties": [], "credentials": [], "propagate_configuration": true, "sla": [{"key": "enabled", "value": false}, {"key": "nominal-time", "value": "${nominal_time}"}, {"key": "should-start", "value": ""}, {"key": "should-end", "value": "${30 * MINUTES}"}, {"key": "max-duration", "value": ""}, {"key": "alert-events", "value": ""}, {"key": "alert-contact", "value": ""}, {"key": "notification-msg", "value": ""}, {"key": "upstream-apps", "value": ""}]}, "name": "hive-sql", "children": [{"to": "33430f0f-ebfa-c3ec-f237-3e77efa03d0a"}, {"error": "17c9c895-5a16-7443-bb81-f34b30b21548"}], "actionParametersFetched": false, "type": "hive-document-widget", "id": "9a24c7b1-b031-15d6-4086-e8af63be7ed3", "actionParameters": []}, {"properties": {}, "name": "Start", "children": [{"to": "9a24c7b1-b031-15d6-4086-e8af63be7ed4"}], "actionParametersFetched": false, "type": "start-widget", "id": "3f107997-04cc-8733-60a9-a4bb62cebffc", "actionParameters": []}, {"properties": {}, "name": "End", "children": [], "actionParametersFetched": false, "type": "end-widget", "id": "33430f0f-ebfa-c3ec-f237-3e77efa03d0a", "actionParameters": []}, {"properties": {"body": "", "cc": "", "to": "", "enableMail": false, "message": "Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]", "subject": ""}, "name": "Kill", "children": [], "actionParametersFetched": false, "type": "kill-widget", "id": "17c9c895-5a16-7443-bb81-f34b30b21548", "actionParameters": []}, {"properties": {"workflow": "7705a9dd-164e-67eb-8758-2573800c86e5", "retry_interval": [], "retry_max": [], "job_properties": [], "credentials": [], "propagate_configuration": true, "sla": [{"value": false, "key": "enabled"}, {"value": "${nominal_time}", "key": "nominal-time"}, {"value": "", "key": "should-start"}, {"value": "${30 * MINUTES}", "key": "should-end"}, {"value": "", "key": "max-duration"}, {"value": "", "key": "alert-events"}, {"value": "", "key": "alert-contact"}, {"value": "", "key": "notification-msg"}, {"value": "", "key": "upstream-apps"}]}, "name": "subworkflow-9a24", "children": [{"to": "33430f0f-ebfa-c3ec-f237-3e77efa03d0a"}, {"error": "17c9c895-5a16-7443-bb81-f34b30b21548"}], "actionParametersFetched": false, "type": "subworkflow-widget", "id": "9a24c7b1-b031-15d6-4086-e8af63be7ed4", "actionParameters": []}], "id": null, "nodeNamesMapping": {"17c9c895-5a16-7443-bb81-f34b30b21548": "Kill", "33430f0f-ebfa-c3ec-f237-3e77efa03d0a": "End", "9a24c7b1-b031-15d6-4086-e8af63be7ed4": "subworkflow-9a24", "3f107997-04cc-8733-60a9-a4bb62cebffc": "Start"}, "uuid": "73c6219d-272f-db98-3cd9-d413ea2625ac"}}""" # noqa: E501 wf_doc1 = Document2.objects.create(name='test', type='oozie-workflow2', owner=self.user, data=wf_data) Document.objects.link(wf_doc1, owner=wf_doc1.owner, name=wf_doc1.name, description=wf_doc1.description, extra='workflow2') @@ -371,36 +550,36 @@ def test_workflow_dependencies(self): # Add coordinator dependency coord_data = { - 'id': None, - 'uuid': None, - 'name': 'My Schedule', - 'variables': [], # Aka workflow parameters - 'properties': { - 'description': '', - 'deployment_dir': '', - 'schema_version': 'uri:oozie:coordinator:0.2', - 'frequency_number': 1, - 'frequency_unit': 'days', - 'cron_frequency': '0 0 * * *', - 'cron_advanced': False, - 'timezone': '', - 'start': '${start_date}', - 'end': '${end_date}', - 'workflow': None, - 'timeout': None, - 'concurrency': None, - 'execution': None, - 'throttle': None, - 'job_xml': '', - 'credentials': [], - 'parameters': [ - {'name': 'oozie.use.system.libpath', 'value': True}, - {'name': 'start_date', 'value': ''}, - {'name': 'end_date', 'value': ''} - ], - 'sla': WorkflowConfiguration.SLA_DEFAULT - } - } + 'id': None, + 'uuid': None, + 'name': 'My Schedule', + 'variables': [], # Aka workflow parameters + 'properties': { + 'description': '', + 'deployment_dir': '', + 'schema_version': 'uri:oozie:coordinator:0.2', + 'frequency_number': 1, + 'frequency_unit': 'days', + 'cron_frequency': '0 0 * * *', + 'cron_advanced': False, + 'timezone': '', + 'start': '${start_date}', + 'end': '${end_date}', + 'workflow': None, + 'timeout': None, + 'concurrency': None, + 'execution': None, + 'throttle': None, + 'job_xml': '', + 'credentials': [], + 'parameters': [ + {'name': 'oozie.use.system.libpath', 'value': True}, + {'name': 'start_date', 'value': ''}, + {'name': 'end_date', 'value': ''}, + ], + 'sla': WorkflowConfiguration.SLA_DEFAULT, + }, + } wf_doc2 = Document2.objects.create(name='test', type='oozie-coordinator2', owner=wf_doc1.owner, data=coord_data) wf_doc1.dependencies.add(wf_doc2) wf_doc1.save() @@ -410,11 +589,15 @@ def test_workflow_dependencies(self): # Add query doc to Doc2 hive_node_props = [node['properties'] for node in workflow_data['nodes'] if node['type'] == 'hive-document-widget'][0] - query_doc = Document2.objects.create(name='test', uuid=hive_node_props['uuid'], type='query-hive', owner=wf_doc1.owner, description='test') + query_doc = Document2.objects.create( + name='test', uuid=hive_node_props['uuid'], type='query-hive', owner=wf_doc1.owner, description='test' + ) # Add subworkflow doc to Doc2 subworkflow_node_props = [node['properties'] for node in workflow_data['nodes'] if node['type'] == 'subworkflow-widget'][0] - subworkflow_doc = Document2.objects.create(name='test', uuid=subworkflow_node_props['workflow'], type='oozie-workflow2', owner=wf_doc1.owner, description='test') + subworkflow_doc = Document2.objects.create( + name='test', uuid=subworkflow_node_props['workflow'], type='oozie-workflow2', owner=wf_doc1.owner, description='test' + ) workflow_data['id'] = wf_doc1.id response = self.c.post(reverse('oozie:save_workflow'), {'workflow': json.dumps(workflow_data), 'layout': json.dumps(layout_data)}) @@ -436,7 +619,6 @@ def test_workflow_dependencies(self): subworkflow_doc.delete() query_doc.delete() - def test_editor_access_permissions(self): group = 'no_editor' @@ -464,7 +646,6 @@ def test_editor_access_permissions(self): finally: remove_from_group("test", group) - def test_share_workflow(self): try: wf_doc = save_temp_workflow(MockOozieApi.JSON_WORKFLOW_LIST[5], self.user) @@ -481,11 +662,10 @@ def test_share_workflow(self): # other user can access document response = self.client_not_me.get(reverse('oozie:edit_workflow'), {'workflow': wf_doc.uuid}) - assert not b'Document does not exist or you don't have the permission to access it.' in response.content, response.content + assert b'Document does not exist or you don't have the permission to access it.' not in response.content, response.content finally: wf_doc.delete() - def test_list_editor_workflows(self): wf_doc = save_temp_workflow(MockOozieApi.JSON_WORKFLOW_LIST[5], self.user) reset = ENABLE_V2.set_for_testing(True) @@ -502,12 +682,11 @@ def test_list_editor_workflows(self): assert response.status_code == 200 data = json.loads(response.context[0]['workflows_json']) uuids = [doc['uuid'] for doc in data] - assert not wf_doc.uuid in uuids, data + assert wf_doc.uuid not in uuids, data finally: reset() wf_doc.delete() - def test_workflow_properties(self): reset = USE_DEFAULT_CONFIGURATION.set_for_testing(True) @@ -525,20 +704,16 @@ def test_workflow_properties(self): 'nice_name': 'Workspace', 'key': 'deployment_dir', 'help_text': 'Specify the deployment directory.', - 'type': 'hdfs-file' - }, { + 'type': 'hdfs-file', + }, + { 'multiple': True, - 'value': [ - { - 'value': 'test', - 'key': 'mapred.queue.name' - } - ], + 'value': [{'value': 'test', 'key': 'mapred.queue.name'}], 'nice_name': 'Hadoop Properties', 'key': 'properties', 'help_text': 'Hadoop configuration properties.', - 'type': 'settings' - } + 'type': 'settings', + }, ] wf_props = Workflow.get_properties() @@ -555,28 +730,20 @@ def test_workflow_properties(self): # Test that a new workflow will be initialized with Group saved config if it exists properties = [ { - 'multiple': True, - 'value': [ - { - 'value': 'org.myorg.WordCount.Map', - 'key': 'mapred.mapper.class' - }, - { - 'value': 'org.myorg.WordCount.Reduce', - 'key': 'mapred.reducer.class' - } - ], - 'nice_name': 'Hadoop Properties', - 'key': 'properties', - 'help_text': 'Hadoop configuration properties.', - 'type': 'settings' + 'multiple': True, + 'value': [ + {'value': 'org.myorg.WordCount.Map', 'key': 'mapred.mapper.class'}, + {'value': 'org.myorg.WordCount.Reduce', 'key': 'mapred.reducer.class'}, + ], + 'nice_name': 'Hadoop Properties', + 'key': 'properties', + 'help_text': 'Hadoop configuration properties.', + 'type': 'settings', } ] wf_props = Workflow.get_properties() - config = DefaultConfiguration.objects.create(app=WorkflowConfiguration.APP_NAME, - properties=json.dumps(properties), - is_default=False) + config = DefaultConfiguration.objects.create(app=WorkflowConfiguration.APP_NAME, properties=json.dumps(properties), is_default=False) config.groups.add(self.user.groups.first()) config.save() wf_props.update(config.properties_dict) @@ -589,9 +756,9 @@ def test_workflow_properties(self): finally: reset() + @pytest.mark.django_db class TestExternalWorkflowGraph(object): - def setup_method(self): self.wf = Workflow() @@ -603,17 +770,51 @@ def setup_method(self): def test_graph_generation_from_xml(self): f = open('apps/oozie/src/oozie/test_data/xslt2/test-workflow.xml') self.wf.definition = f.read() - self.node_list = [{u'node_type': u'start', u'ok_to': u'fork-68d4', u'name': u''}, {u'node_type': u'kill', u'ok_to': u'', u'name': u'Kill'}, {u'path2': u'shell-0f44', u'node_type': u'fork', u'ok_to': u'', u'name': u'fork-68d4', u'path1': u'subworkflow-a13f'}, {u'node_type': u'join', u'ok_to': u'End', u'name': u'join-775e'}, {u'node_type': u'end', u'ok_to': u'', u'name': u'End'}, {u'subworkflow': {u'app-path': u'${nameNode}/user/hue/oozie/deployments/_admin_-oozie-50001-1427488969.48'}, u'node_type': u'sub-workflow', u'ok_to': u'join-775e', u'name': u'subworkflow-a13f', u'error_to': u'Kill'}, {u'shell': {u'command': u'ls'}, u'node_type': u'shell', u'ok_to': u'join-775e', u'name': u'shell-0f44', u'error_to': u'Kill'}] + self.node_list = [ + {'node_type': 'start', 'ok_to': 'fork-68d4', 'name': ''}, + {'node_type': 'kill', 'ok_to': '', 'name': 'Kill'}, + {'path2': 'shell-0f44', 'node_type': 'fork', 'ok_to': '', 'name': 'fork-68d4', 'path1': 'subworkflow-a13f'}, + {'node_type': 'join', 'ok_to': 'End', 'name': 'join-775e'}, + {'node_type': 'end', 'ok_to': '', 'name': 'End'}, + { + 'subworkflow': {'app-path': '${nameNode}/user/hue/oozie/deployments/_admin_-oozie-50001-1427488969.48'}, + 'node_type': 'sub-workflow', + 'ok_to': 'join-775e', + 'name': 'subworkflow-a13f', + 'error_to': 'Kill', + }, + {'shell': {'command': 'ls'}, 'node_type': 'shell', 'ok_to': 'join-775e', 'name': 'shell-0f44', 'error_to': 'Kill'}, + ] assert self.node_list == generate_v2_graph_nodes(self.wf.definition) def test_get_graph_adjacency_list(self): - self.node_list = [{u'node_type': u'start', u'ok_to': u'fork-68d4', u'name': u''}, {u'node_type': u'kill', u'ok_to': u'', u'name': u'kill'}, {u'path2': u'shell-0f44', u'node_type': u'fork', u'ok_to': u'', u'name': u'fork-68d4', u'path1': u'subworkflow-a13f'}, {u'node_type': u'join', u'ok_to': u'end', u'name': u'join-775e'}, {u'node_type': u'end', u'ok_to': u'', u'name': u'end'}, {u'node_type': u'sub-workflow', u'ok_to': u'join-775e', u'sub-workflow': {u'app-path': u'${nameNode}/user/hue/oozie/deployments/_admin_-oozie-50001-1427488969.48'}, u'name': u'subworkflow-a13f', u'error_to': u'kill'}, {u'shell': {u'command': u'ls'}, u'node_type': u'shell', u'ok_to': u'join-775e', u'name': u'shell-0f44', u'error_to': u'kill'}] + self.node_list = [ + {'node_type': 'start', 'ok_to': 'fork-68d4', 'name': ''}, + {'node_type': 'kill', 'ok_to': '', 'name': 'kill'}, + {'path2': 'shell-0f44', 'node_type': 'fork', 'ok_to': '', 'name': 'fork-68d4', 'path1': 'subworkflow-a13f'}, + {'node_type': 'join', 'ok_to': 'end', 'name': 'join-775e'}, + {'node_type': 'end', 'ok_to': '', 'name': 'end'}, + { + 'node_type': 'sub-workflow', + 'ok_to': 'join-775e', + 'sub-workflow': {'app-path': '${nameNode}/user/hue/oozie/deployments/_admin_-oozie-50001-1427488969.48'}, + 'name': 'subworkflow-a13f', + 'error_to': 'kill', + }, + {'shell': {'command': 'ls'}, 'node_type': 'shell', 'ok_to': 'join-775e', 'name': 'shell-0f44', 'error_to': 'kill'}, + ] adj_list = _create_graph_adjaceny_list(self.node_list) assert len(adj_list) == 7 assert 'subworkflow-a13f' in list(adj_list.keys()) assert adj_list['shell-0f44']['shell']['command'] == 'ls' - assert adj_list['fork-68d4'] == {u'path2': u'shell-0f44', u'node_type': u'fork', u'ok_to': u'', u'name': u'fork-68d4', u'path1': u'subworkflow-a13f'} + assert adj_list['fork-68d4'] == { + 'path2': 'shell-0f44', + 'node_type': 'fork', + 'ok_to': '', + 'name': 'fork-68d4', + 'path1': 'subworkflow-a13f', + } def test_get_hierarchy_from_adj_list(self): self.wf.definition = """ @@ -677,9 +878,13 @@ def test_get_hierarchy_from_adj_list(self): node_hierarchy = ['start'] _get_hierarchy_from_adj_list(adj_list, adj_list['start']['ok_to'], node_hierarchy) - expected_node_hierarchy_py2 = ['start', [u'fork-fe93', [[u'shell-bd90'], [u'shell-d64c'], [u'shell-5429'], [u'shell-d8cc']], u'join-7f80'], ['Kill'], ['End']] - expected_node_hierarchy_py3 = ['start', [u'fork-fe93', [[u'shell-5429'], [u'shell-bd90'], [u'shell-d64c'], [u'shell-d8cc']], u'join-7f80'], ['Kill'], ['End']] - assert node_hierarchy == (expected_node_hierarchy_py3 if sys.version_info[0] > 2 else expected_node_hierarchy_py2) + expected_node_hierarchy_py3 = [ + 'start', + ['fork-fe93', [['shell-5429'], ['shell-bd90'], ['shell-d64c'], ['shell-d8cc']], 'join-7f80'], + ['Kill'], + ['End'], + ] + assert node_hierarchy == expected_node_hierarchy_py3 def test_gen_workflow_data_from_xml(self): self.wf.definition = """ @@ -818,7 +1023,7 @@ def test_gen_workflow_data_from_xml_for_fs(self): assert len(workflow_data['workflow']['nodes']) == 4 assert workflow_data['layout'][0]['rows'][1]['widgets'][0]['widgetType'] == 'fs-widget' assert len(workflow_data['workflow']['nodes'][1]['properties']['deletes']), 2 - assert workflow_data['workflow']['nodes'][1]['properties']['deletes'][0]['value'] == u'${nameNode}/user/admin/y' + assert workflow_data['workflow']['nodes'][1]['properties']['deletes'][0]['value'] == '${nameNode}/user/admin/y' def test_gen_workflow_data_from_xml_for_decision_node(self): self.wf.definition = """ @@ -914,7 +1119,6 @@ def test_gen_workflow_data_from_xml_for_decision_node(self): assert len(workflow_data['workflow']['nodes'][7]['children']) == 2 def test_gen_workflow_data_from_xml_for_oozie_old_schemas(self): - common_wf_definition = """ @@ -1101,31 +1305,41 @@ def test_gen_workflow_data_from_xml_for_oozie_old_schemas(self): self.wf.definition = common_wf_definition % 0.1 workflow_data_01 = Workflow.gen_workflow_data_from_xml(self.user, self.wf) - assert (len(workflow_data_01['layout'][0]['rows']) == - len(workflow_data_02['layout'][0]['rows']) == - len(workflow_data_03['layout'][0]['rows']) == - len(workflow_data_04['layout'][0]['rows']) == - 10) - assert (len(workflow_data_01['workflow']['nodes']) == - len(workflow_data_02['workflow']['nodes']) == - len(workflow_data_03['workflow']['nodes']) == - len(workflow_data_04['workflow']['nodes']) == - 22) - assert (workflow_data_01['layout'][0]['rows'][5]['widgets'][0]['widgetType'] == - workflow_data_02['layout'][0]['rows'][5]['widgets'][0]['widgetType'] == - workflow_data_03['layout'][0]['rows'][5]['widgets'][0]['widgetType'] == - workflow_data_04['layout'][0]['rows'][5]['widgets'][0]['widgetType'] == - 'fork-widget') - assert (workflow_data_01['workflow']['nodes'][7]['type'] == - workflow_data_02['workflow']['nodes'][7]['type'] == - workflow_data_03['workflow']['nodes'][7]['type'] == - workflow_data_04['workflow']['nodes'][7]['type'] == - 'hive-widget' if sys.version_info[0] == 2 else 'spark-widget') - assert (len(workflow_data_01['workflow']['nodes'][7]['children']) == - len(workflow_data_02['workflow']['nodes'][7]['children']) == - len(workflow_data_03['workflow']['nodes'][7]['children']) == - len(workflow_data_04['workflow']['nodes'][7]['children']) == - 2) + assert ( + len(workflow_data_01['layout'][0]['rows']) + == len(workflow_data_02['layout'][0]['rows']) + == len(workflow_data_03['layout'][0]['rows']) + == len(workflow_data_04['layout'][0]['rows']) + == 10 + ) + assert ( + len(workflow_data_01['workflow']['nodes']) + == len(workflow_data_02['workflow']['nodes']) + == len(workflow_data_03['workflow']['nodes']) + == len(workflow_data_04['workflow']['nodes']) + == 22 + ) + assert ( + workflow_data_01['layout'][0]['rows'][5]['widgets'][0]['widgetType'] + == workflow_data_02['layout'][0]['rows'][5]['widgets'][0]['widgetType'] + == workflow_data_03['layout'][0]['rows'][5]['widgets'][0]['widgetType'] + == workflow_data_04['layout'][0]['rows'][5]['widgets'][0]['widgetType'] + == 'fork-widget' + ) + assert ( + workflow_data_01['workflow']['nodes'][7]['type'] + == workflow_data_02['workflow']['nodes'][7]['type'] + == workflow_data_03['workflow']['nodes'][7]['type'] + == workflow_data_04['workflow']['nodes'][7]['type'] + == 'spark-widget' + ) + assert ( + len(workflow_data_01['workflow']['nodes'][7]['children']) + == len(workflow_data_02['workflow']['nodes'][7]['children']) + == len(workflow_data_03['workflow']['nodes'][7]['children']) + == len(workflow_data_04['workflow']['nodes'][7]['children']) + == 2 + ) def test_gen_workflow_data_from_xml_for_spark_schema02(self): self.wf.definition = """ @@ -1232,7 +1446,7 @@ def test_gen_workflow_data_for_xml_with_multiple_generic_nodes(self): assert len(workflow_data['workflow']['nodes'][1]['children']) == 2 def test_get_hierarchy_from_adj_list_throws_exception(self): - self.wf.definition = """ @@ -1666,15 +1880,15 @@ def test_get_hierarchy_from_adj_list_throws_exception(self): """ - self.node_list = generate_v2_graph_nodes(self.wf.definition) - adj_list = _create_graph_adjaceny_list(self.node_list) - node_hierarchy = ['start'] - with pytest.raises(WorkflowDepthReached): - _get_hierarchy_from_adj_list(adj_list, adj_list['start']['ok_to'], node_hierarchy) + self.node_list = generate_v2_graph_nodes(self.wf.definition) + adj_list = _create_graph_adjaceny_list(self.node_list) + node_hierarchy = ['start'] + with pytest.raises(WorkflowDepthReached): + _get_hierarchy_from_adj_list(adj_list, adj_list['start']['ok_to'], node_hierarchy) + @pytest.mark.django_db class TestModelAPI(OozieMockBase): - def setup_method(self): super(TestModelAPI, self).setup_method() self.wf = Workflow() @@ -1682,7 +1896,6 @@ def setup_method(self): self.client_not_me = make_logged_in_client(username="not_perm_user", groupname="default", recreate=True, is_superuser=False) self.user_not_me = User.objects.get(username="not_perm_user") - def test_gen_workflow_from_document(self): notebook = make_notebook(name='Browse', editor_type='hive', statement='SHOW TABLES', status='ready') notebook_doc, save_as = _save_notebook(notebook.get_data(), self.user) @@ -1694,7 +1907,6 @@ def test_gen_workflow_from_document(self): _data = workflow.get_data() assert len(_data['workflow']['nodes']) == 4 - def test_gen_pig_document(self): notebook = make_notebook(name='Browse', editor_type='pig', statement='ls', status='ready') notebook_doc, save_as = _save_notebook(notebook.get_data(), self.user) @@ -1706,22 +1918,22 @@ def test_gen_pig_document(self): _data = workflow.get_data() assert len(_data['workflow']['nodes']) == 4 - def test_find_all_parameters_check_validity(self): wf_data = Workflow.get_default_workflow() wf_data['properties'] = Workflow.get_properties() - wf_data['nodes'] = [{ - u'name': u'Start', - u'properties': {'parameters': [{'value': 'a=1'}, {'value': 'b'}, {'value': ''}, {'value':'c=d=1'}]}, - u'id': u'3f107997-04cc-8733-60a9-a4bb62cebffc', - u'type': u'document-widget', - u'children': [{u'to': u'33430f0f-ebfa-c3ec-f237-3e77efa03d0a'}], - u'actionParameters': [], - }] - - assert {u'a': u'1', u'c': u'd=1'} == Workflow(data=json.dumps({'workflow': wf_data})).find_parameters() + wf_data['nodes'] = [ + { + 'name': 'Start', + 'properties': {'parameters': [{'value': 'a=1'}, {'value': 'b'}, {'value': ''}, {'value': 'c=d=1'}]}, + 'id': '3f107997-04cc-8733-60a9-a4bb62cebffc', + 'type': 'document-widget', + 'children': [{'to': '33430f0f-ebfa-c3ec-f237-3e77efa03d0a'}], + 'actionParameters': [], + } + ] + assert {'a': '1', 'c': 'd=1'} == Workflow(data=json.dumps({'workflow': wf_data})).find_parameters() def test_gen_hive_xml(self): notebook = make_notebook(name='Browse', editor_type='hive', statement='SHOW TABLES', status='ready') @@ -1730,34 +1942,32 @@ def test_gen_hive_xml(self): workflow_doc = WorkflowBuilder().create_workflow(document=notebook_doc, user=self.user, managed=True) workflow = Workflow(document=workflow_doc, user=self.user) - assert re.search('', workflow.to_xml({'output': '/path'})) - + assert re.search(r'', workflow.to_xml({'output': '/path'})) def test_gen_workflow_from_notebook(self): snippets = [ { - 'status': 'running', - 'statement_raw': 'SHOW TABLES', - 'statement': 'SHOW TABLES', - 'type': 'hive', - 'properties': { - }, - 'database': 'default', + 'status': 'running', + 'statement_raw': 'SHOW TABLES', + 'statement': 'SHOW TABLES', + 'type': 'hive', + 'properties': {}, + 'database': 'default', }, { 'type': 'java', 'status': 'running', - 'properties': { + 'properties': { 'files': [], 'class': 'org.apache.solr.hadoop.MapReduceIndexerTool', 'app_jar': '/user/hue/app.jar', 'arguments': [ - '--morphline-file', - 'morphline.conf', + '--morphline-file', + 'morphline.conf', ], 'archives': [], - } - } + }, + }, ] notebook = make_notebook2(name='2 actions', snippets=snippets) diff --git a/apps/oozie/src/oozie/old_migrations/0020_chg_large_varchars_to_textfields.py b/apps/oozie/src/oozie/old_migrations/0020_chg_large_varchars_to_textfields.py index cae2ecc32cd..140a64da33c 100644 --- a/apps/oozie/src/oozie/old_migrations/0020_chg_large_varchars_to_textfields.py +++ b/apps/oozie/src/oozie/old_migrations/0020_chg_large_varchars_to_textfields.py @@ -6,10 +6,7 @@ from south.v2 import SchemaMigration from django.db import models -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ +from django.utils.translation import gettext as _ class Migration(SchemaMigration): diff --git a/apps/oozie/src/oozie/tests.py b/apps/oozie/src/oozie/tests.py index f1933016798..b8aa8bf557b 100644 --- a/apps/oozie/src/oozie/tests.py +++ b/apps/oozie/src/oozie/tests.py @@ -60,10 +60,7 @@ from oozie.importlib.workflows import import_workflow from oozie.importlib.jobdesigner import convert_jobsub_design -if sys.version_info[0] > 2: - from io import BytesIO as string_io -else: - from cStringIO import StringIO as string_io +from io import BytesIO as string_io LOG = logging.getLogger() diff --git a/apps/oozie/src/oozie/urls.py b/apps/oozie/src/oozie/urls.py index fd1badad857..48cb97b56d2 100644 --- a/apps/oozie/src/oozie/urls.py +++ b/apps/oozie/src/oozie/urls.py @@ -17,15 +17,14 @@ import sys -from oozie.views import editor as oozie_views_editor -from oozie.views import editor2 as oozie_views_editor2 -from oozie.views import api as oozie_views_api -from oozie.views import dashboard as oozie_views_dashboard +from django.urls import re_path -if sys.version_info[0] > 2: - from django.urls import re_path -else: - from django.conf.urls import url as re_path +from oozie.views import ( + api as oozie_views_api, + dashboard as oozie_views_dashboard, + editor as oozie_views_editor, + editor2 as oozie_views_editor2, +) IS_URL_NAMESPACED = True @@ -82,7 +81,7 @@ name='edit_bundled_coordinator' ), - re_path(r'^list_history$', oozie_views_editor.list_history, name='list_history'), # Unused + re_path(r'^list_history$', oozie_views_editor.list_history, name='list_history'), # Unused re_path(r'^list_history/(?P[-\w]+)$', oozie_views_editor.list_history_record, name='list_history_record'), re_path(r'^install_examples/?$', oozie_views_editor.install_examples, name='install_examples'), ] @@ -184,4 +183,4 @@ re_path(r'^list_oozie_info/?$', oozie_views_dashboard.list_oozie_info, name='list_oozie_info'), re_path(r'^list_oozie_sla/?$', oozie_views_dashboard.list_oozie_sla, name='list_oozie_sla'), -] \ No newline at end of file +] diff --git a/apps/oozie/src/oozie/utils.py b/apps/oozie/src/oozie/utils.py index a6d2c0e22f5..ab1bcf054e8 100644 --- a/apps/oozie/src/oozie/utils.py +++ b/apps/oozie/src/oozie/utils.py @@ -15,35 +15,28 @@ # See the License for the specific language governing permissions and # limitations under the License. -from future import standard_library -standard_library.install_aliases() -from builtins import str -from past.builtins import basestring -import json -import logging import re import sys +import json +import logging import urllib.parse +from builtins import str from datetime import datetime -from dateutil import tz -from dateutil import parser +from dateutil import parser, tz from django.utils.formats import localize_input -from desktop.lib.parameterization import find_variables -from liboozie.oozie_api import get_oozie, DEFAULT_USER - -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ +from django.utils.translation import gettext as _ +from past.builtins import basestring +from desktop.lib.parameterization import find_variables +from liboozie.oozie_api import DEFAULT_USER, get_oozie LOG = logging.getLogger() JSON_FIELDS = ('parameters', 'job_properties', 'files', 'archives', 'prepares', 'params', 'deletes', 'mkdirs', 'moves', 'chmods', 'touchzs') -BOOLEAN_FIELDS = ('propagate_configuration','capture_output') +BOOLEAN_FIELDS = ('propagate_configuration', 'capture_output') NUMBER_FIELDS_OR_NULL = ('sub_workflow',) GMT_TIME_FORMAT = "%Y-%m-%dT%H:%MGMT%z" UTC_TIME_FORMAT = "%Y-%m-%dT%H:%MZ" @@ -137,10 +130,12 @@ def smart_path(path, mapping=None, is_coordinator=False): return path + def contains_symlink(path, mapping): vars = find_variables(path) return any([var in mapping and '#' in mapping[var] for var in vars]) or '#' in path + def utc_datetime_format(utc_time): if utc_time and type(utc_time) is datetime: return utc_time.strftime(UTC_TIME_FORMAT) @@ -184,6 +179,7 @@ def oozie_to_hue_frequency(frequency_string): else: raise InvalidFrequency(_('invalid frequency: %s') % frequency_string) + def convert_to_server_timezone(date, local_tz='UTC', server_tz=None, user=DEFAULT_USER): api = get_oozie(user) @@ -211,4 +207,4 @@ def convert_to_server_timezone(date, local_tz='UTC', server_tz=None, user=DEFAUL return date_server_tz.strftime('%Y-%m-%dT%H:%M') + date_server_tz.strftime('%z') except TypeError as ValueError: LOG.error("Failed to convert Oozie timestamp: %s" % date) - return None \ No newline at end of file + return None diff --git a/apps/oozie/src/oozie/views/api.py b/apps/oozie/src/oozie/views/api.py index 16723aca6df..df512e2d022 100644 --- a/apps/oozie/src/oozie/views/api.py +++ b/apps/oozie/src/oozie/views/api.py @@ -15,40 +15,46 @@ # See the License for the specific language governing permissions and # limitations under the License. -from builtins import str -from builtins import range -from past.builtins import basestring -import json -import logging import re import sys +import json +import logging +from builtins import range, str from django.http import Http404 +from django.utils.translation import gettext as _ +from past.builtins import basestring from desktop.lib.django_util import JsonResponse from desktop.lib.exceptions import StructuredException from desktop.lib.i18n import force_unicode from desktop.models import Document - -from oozie.forms import WorkflowForm, NodeForm, design_form_by_type -from oozie.models import Workflow, Node, Start, End, Kill,\ - Link, Decision, Fork, DecisionEnd, Join,\ - NODE_TYPES, ACTION_TYPES, _STD_PROPERTIES from oozie.decorators import check_job_access_permission, check_job_edition_permission -from oozie.utils import model_to_dict, format_dict_field_values, format_field_value - -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ +from oozie.forms import NodeForm, WorkflowForm, design_form_by_type +from oozie.models import ( + _STD_PROPERTIES, + ACTION_TYPES, + NODE_TYPES, + Decision, + DecisionEnd, + End, + Fork, + Join, + Kill, + Link, + Node, + Start, + Workflow, +) +from oozie.utils import format_dict_field_values, format_field_value, model_to_dict LOG = logging.getLogger() try: - from jobbrowser.views import job_single_logs from jobbrowser.models import LinkJobLogs -except: + from jobbrowser.views import job_single_logs +except Exception: LOG.warning('Oozie is not enabled') @@ -225,9 +231,11 @@ def _validate_nodes_json(json_nodes, errors, user, workflow): node_result = True link_result = _validate_node_links_json(node['node_type'], node_dict['child_links'], _errors) result = result and node_result and link_result - if 'name' not in node and ( 'node_type' not in node or 'id' not in node ): - raise StructuredException(code="INVALID_REQUEST_ERROR", message=_('Error saving workflow'), data={'errors': 'Node is missing a name.'}, error_code=400) - errors[node.get('name', '%s-%s' % ( node.get('node_type'), node.get('id')))] = _errors + if 'name' not in node and ('node_type' not in node or 'id' not in node): + raise StructuredException( + code="INVALID_REQUEST_ERROR", message=_('Error saving workflow'), data={'errors': 'Node is missing a name.'}, error_code=400 + ) + errors[node.get('name', '%s-%s' % (node.get('node_type'), node.get('id')))] = _errors return result @@ -246,9 +254,9 @@ def _update_workflow_nodes_json(workflow, json_nodes, id_map, user): # sub_workflow is None node.sub_workflow = None except Workflow.DoesNotExist: - raise StructuredException(code="INVALID_REQUEST_ERROR", message=_('Error saving workflow'), data={'errors': 'Chosen subworkflow does not exist.'}, error_code=400) + raise StructuredException(code="INVALID_REQUEST_ERROR", message=_('Error saving workflow'), data={'errors': 'Chosen subworkflow does not exist.'}, error_code=400) # noqa: E501 elif node.node_type == 'fork' and json_node['node_type'] == 'decision': - node.save() # Need to save in case database throws error when performing delete. + node.save() # Need to save in case database throws error when performing delete. node = node.convert_to_decision() node.save() @@ -314,8 +322,12 @@ def _workflow(request, workflow): @error_handler -@check_job_access_permission(exception_class=(lambda x: StructuredException(code="UNAUTHORIZED_REQUEST_ERROR", message=x, data=None, error_code=401))) -@check_job_edition_permission(exception_class=(lambda x: StructuredException(code="UNAUTHORIZED_REQUEST_ERROR", message=x, data=None, error_code=401))) +@check_job_access_permission( + exception_class=(lambda x: StructuredException(code="UNAUTHORIZED_REQUEST_ERROR", message=x, data=None, error_code=401)) +) +@check_job_edition_permission( + exception_class=(lambda x: StructuredException(code="UNAUTHORIZED_REQUEST_ERROR", message=x, data=None, error_code=401)) +) def workflow_validate_node(request, workflow, node_type): response = {'status': -1, 'data': {}} @@ -331,8 +343,12 @@ def workflow_validate_node(request, workflow, node_type): # Workflow and child links are SPECIAL. @error_handler -@check_job_access_permission(exception_class=(lambda x: StructuredException(code="UNAUTHORIZED_REQUEST_ERROR", message=x, data=None, error_code=401))) -@check_job_edition_permission(exception_class=(lambda x: StructuredException(code="UNAUTHORIZED_REQUEST_ERROR", message=x, data=None, error_code=401))) +@check_job_access_permission( + exception_class=(lambda x: StructuredException(code="UNAUTHORIZED_REQUEST_ERROR", message=x, data=None, error_code=401)) +) +@check_job_edition_permission( + exception_class=(lambda x: StructuredException(code="UNAUTHORIZED_REQUEST_ERROR", message=x, data=None, error_code=401)) +) def workflow_save(request, workflow): if request.method != 'POST': raise StructuredException(code="METHOD_NOT_ALLOWED_ERROR", message=_('Must be POST request.'), error_code=405) @@ -343,7 +359,9 @@ def workflow_save(request, workflow): form = WorkflowForm(data=json_workflow) if not form.is_valid(): - raise StructuredException(code="INVALID_REQUEST_ERROR", message=_('Error saving workflow'), data={'errors': form.errors}, error_code=400) + raise StructuredException( + code="INVALID_REQUEST_ERROR", message=_('Error saving workflow'), data={'errors': form.errors}, error_code=400 + ) json_nodes = json_workflow['nodes'] id_map = {} @@ -385,7 +403,9 @@ def workflow_save(request, workflow): @error_handler -@check_job_access_permission(exception_class=(lambda x: StructuredException(code="UNAUTHORIZED_REQUEST_ERROR", message=x, data=None, error_code=401))) +@check_job_access_permission( + exception_class=(lambda x: StructuredException(code="UNAUTHORIZED_REQUEST_ERROR", message=x, data=None, error_code=401)) +) def workflow(request, workflow): if request.method != 'GET': raise StructuredException(code="METHOD_NOT_ALLOWED_ERROR", message=_('Must be GET request.'), error_code=405) @@ -394,7 +414,9 @@ def workflow(request, workflow): @error_handler -@check_job_access_permission(exception_class=(lambda x: StructuredException(code="UNAUTHORIZED_REQUEST_ERROR", message=x, data=None, error_code=401))) +@check_job_access_permission( + exception_class=(lambda x: StructuredException(code="UNAUTHORIZED_REQUEST_ERROR", message=x, data=None, error_code=401)) +) def workflow_actions(request, workflow): if request.method != 'GET': raise StructuredException(code="METHOD_NOT_ALLOWED_ERROR", message=_('Must be GET request.'), error_code=405) @@ -415,9 +437,9 @@ def workflows(request): raise StructuredException(code="METHOD_NOT_ALLOWED_ERROR", message=_('Must be GET request.'), error_code=405) if request.GET.get('managed', 'false').lower() == 'false': - extra='jobsub' + extra = 'jobsub' else: - extra='' + extra = '' workflow_docs = Document.objects.get_docs(request.user, Workflow, extra=extra) @@ -432,7 +454,7 @@ def workflows(request): def autocomplete_properties(request): - return JsonResponse({ 'properties': _STD_PROPERTIES }) + return JsonResponse({'properties': _STD_PROPERTIES}) @error_handler @@ -500,6 +522,6 @@ def _get_progress(job, log): return 100 else: try: - return int(re.findall("MapReduceLauncher - (1?\d?\d)% complete", log)[-1]) - except: + return int(re.findall(r"MapReduceLauncher - (1?\d?\d)% complete", log)[-1]) + except Exception: return 0 diff --git a/apps/oozie/src/oozie/views/dashboard.py b/apps/oozie/src/oozie/views/dashboard.py index 1eb62a86a55..f1bf05a9eb0 100644 --- a/apps/oozie/src/oozie/views/dashboard.py +++ b/apps/oozie/src/oozie/views/dashboard.py @@ -15,56 +15,49 @@ # See the License for the specific language governing permissions and # limitations under the License. -from future import standard_library -standard_library.install_aliases() -from builtins import str -import json -import logging import os import re import sys +import json import time -import urllib.request, urllib.parse, urllib.error +import logging +import urllib.error +import urllib.parse +import urllib.request +from builtins import str from django.forms.formsets import formset_factory from django.http import HttpResponse -from django.utils.functional import wraps -from django.urls import reverse from django.shortcuts import redirect +from django.urls import reverse +from django.utils.functional import wraps +from django.utils.translation import gettext as _ from azure.abfs.__init__ import abfspath - +from desktop.auth.backend import is_admin from desktop.conf import TIME_ZONE from desktop.lib import django_mako from desktop.lib.django_util import JsonResponse, render -from desktop.lib.json_utils import JSONEncoderForHTML from desktop.lib.exceptions_renderable import PopupException -from desktop.lib.i18n import smart_str, smart_unicode +from desktop.lib.i18n import smart_str +from desktop.lib.json_utils import JSONEncoderForHTML from desktop.lib.paths import SAFE_CHARACTERS_URI_COMPONENTS from desktop.lib.rest.http_client import RestException from desktop.lib.view_util import format_duration_in_millis from desktop.log.access import access_warn from desktop.models import Document, Document2 - from hadoop.fs.hadoopfs import Hdfs from liboozie.credentials import Credentials from liboozie.oozie_api import get_oozie from liboozie.submission2 import Submission from liboozie.utils import catch_unicode_time - -from oozie.conf import OOZIE_JOBS_COUNT, ENABLE_CRON_SCHEDULING, ENABLE_V2, ENABLE_OOZIE_BACKEND_FILTERING -from oozie.forms import RerunForm, ParameterForm, RerunCoordForm, RerunBundleForm, UpdateCoordinatorForm -from oozie.models import Workflow as OldWorkflow, Job, utc_datetime_format, Bundle, Coordinator, get_link, History as OldHistory -from oozie.models2 import History, Workflow, WORKFLOW_NODE_PROPERTIES +from oozie.conf import ENABLE_CRON_SCHEDULING, ENABLE_OOZIE_BACKEND_FILTERING, ENABLE_V2, OOZIE_JOBS_COUNT +from oozie.forms import ParameterForm, RerunBundleForm, RerunCoordForm, RerunForm, UpdateCoordinatorForm +from oozie.models import Bundle, Coordinator, History as OldHistory, Job, Workflow as OldWorkflow, get_link, utc_datetime_format +from oozie.models2 import WORKFLOW_NODE_PROPERTIES, History, Workflow from oozie.settings import DJANGO_APPS from oozie.utils import convert_to_server_timezone -from desktop.auth.backend import is_admin - -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ def get_history(): if ENABLE_V2.get(): @@ -72,6 +65,7 @@ def get_history(): else: return OldHistory + def get_workflow(): if ENABLE_V2.get(): return Workflow @@ -94,6 +88,7 @@ def get_workflow(): * check_job_edition_permission() """ + def _get_workflows(user): return [{ 'name': workflow.name, @@ -302,7 +297,6 @@ def list_oozie_bundles(request): response['total_jobs'] = total_jobs return JsonResponse(response, encoder=JSONEncoderForHTML) - return render('dashboard/list_oozie_bundles.mako', request, { 'jobs': [], 'has_job_edition_permission': has_job_edition_permission, @@ -383,8 +377,11 @@ def list_oozie_workflow(request, job_id): hue_workflow = (hue_coord and hue_coord.workflow) or (history and history.get_workflow()) or \ get_history().get_workflow_from_config(oozie_workflow.conf_dict) - if hue_coord and hue_coord.workflow: Job.objects.can_read_or_exception(request, hue_coord.workflow.id) - if hue_workflow: Job.objects.can_read_or_exception(request, hue_workflow.id) + if hue_coord and hue_coord.workflow: + Job.objects.can_read_or_exception(request, hue_coord.workflow.id) + + if hue_workflow: + Job.objects.can_read_or_exception(request, hue_workflow.id) if hue_workflow: workflow_graph = hue_workflow.gen_status_graph(oozie_workflow) @@ -398,7 +395,6 @@ def list_oozie_workflow(request, job_id): action.oozie_coordinator = oozie_coordinator action.oozie_bundle = oozie_bundle - if request.GET.get('format') == 'json': if not workflow_graph and request.GET.get('is_jb2'): workflow_graph = django_mako.render_to_string('dashboard/list_oozie_workflow_graph.mako', {}) @@ -449,7 +445,7 @@ def list_oozie_workflow(request, job_id): 'oozie_slas': oozie_slas, 'hue_workflow': hue_workflow, 'hue_coord': hue_coord, - 'parameters': dict((var, val) for var, val in parameters.items() if var not in ParameterForm.NON_PARAMETERS and \ + 'parameters': dict((var, val) for var, val in parameters.items() if var not in ParameterForm.NON_PARAMETERS and var != 'oozie.use.system.libpath' or var == 'oozie.wf.application.path'), 'has_job_edition_permission': has_job_edition_permission, 'workflow_graph': workflow_graph, @@ -477,14 +473,14 @@ def list_oozie_coordinator(request, job_id): try: if not ENABLE_V2.get(): coordinator = get_history().objects.get(oozie_job_id=job_id).job.get_full_node() - except: + except Exception: LOG.exception("Ignoring error getting oozie job coordinator for job_id=%s", job_id) oozie_bundle = None if request.GET.get('bundle_job_id'): try: oozie_bundle = check_job_access_permission(request, request.GET.get('bundle_job_id')) - except: + except Exception: LOG.exception("Ignoring error getting oozie bundle for job_id=%s", job_id) if request.GET.get('format') == 'json': @@ -543,7 +539,7 @@ def list_oozie_bundle(request, job_id): bundle = get_history().get_bundle_from_config(oozie_bundle.conf_dict) else: bundle = get_history().objects.get(oozie_job_id=job_id).job.get_full_node() - except: + except Exception: LOG.exception("Ignoring error getting oozie job bundle for job_id=%s", job_id) if request.GET.get('format') == 'json': @@ -665,7 +661,7 @@ def list_oozie_sla(request): job_name = request.POST.get('job_name') - if re.match('.*-oozie-\w+-[WCB]', job_name): + if re.match(r'.*-oozie-\w+-[WCB]', job_name): params['id'] = job_name params['parent_id'] = job_name else: @@ -680,7 +676,7 @@ def list_oozie_sla(request): oozie_slas = oozie_api.get_oozie_slas(**params) else: - oozie_slas = [] # or get latest? + oozie_slas = [] # or get latest? if request.GET.get('format') == 'json': massaged_slas = [] @@ -788,6 +784,7 @@ def sync_coord_workflow(request, job_id): popup = popup.decode('utf-8') return JsonResponse(popup, safe=False) + @show_oozie_error def rerun_oozie_job(request, job_id, app_path=None): ParametersFormSet = formset_factory(ParameterForm, extra=0) @@ -886,7 +883,7 @@ def rerun_oozie_coordinator(request, job_id, app_path=None): request.info(_('Coordinator re-running.')) return redirect(reverse('oozie:list_oozie_coordinator', kwargs={'job_id': job_id})) else: - request.error(_('Invalid submission form: %s') % smart_unicode(rerun_form.errors)) + request.error(_('Invalid submission form: %s') % smart_str(rerun_form.errors)) return list_oozie_coordinator(request, job_id) else: rerun_form = RerunCoordForm(oozie_coordinator=oozie_coordinator, return_json=return_json) @@ -1083,6 +1080,7 @@ def massaged_workflow_actions_for_json(workflow_actions, oozie_coordinator, oozi return actions + def massaged_coordinator_actions_for_json(coordinator, oozie_bundle): coordinator_id = coordinator.id coordinator_actions = coordinator.get_working_actions() @@ -1091,12 +1089,12 @@ def massaged_coordinator_actions_for_json(coordinator, oozie_bundle): related_job_ids = [] related_job_ids.append('coordinator_job_id=%s' % coordinator_id) if oozie_bundle is not None: - related_job_ids.append('bundle_job_id=%s' %oozie_bundle.id) + related_job_ids.append('bundle_job_id=%s' % oozie_bundle.id) for action in coordinator_actions: massaged_action = { 'id': action.id, - 'url': action.externalId and \ + 'url': action.externalId and reverse('oozie:list_oozie_workflow', kwargs={'job_id': action.externalId}) + '?%s' % '&'.join(related_job_ids) or '', 'number': action.actionNumber, 'type': 'schedule-task', @@ -1127,7 +1125,7 @@ def massaged_bundle_actions_for_json(bundle): for action in bundle_actions: massaged_action = { 'id': action.coordJobId, - 'url': action.coordJobId and \ + 'url': action.coordJobId and reverse('oozie:list_oozie_coordinator', kwargs={'job_id': action.coordJobId}) + '?bundle_job_id=%s' % bundle.id or '', 'name': action.coordJobName, 'type': action.type, @@ -1197,9 +1195,9 @@ def massaged_oozie_jobs_for_json(oozie_jobs, user, just_sla=False): 'lastModTimeFormatted': last_modified_time_millis and format_duration_in_millis(last_modified_time_millis) or None, 'kickoffTime': hasattr(job, 'kickoffTime') and job.kickoffTime and format_time(job.kickoffTime) or '', 'kickoffTimeInMillis': hasattr(job, 'kickoffTime') and job.kickoffTime and time.mktime(catch_unicode_time(job.kickoffTime)) or 0, - 'nextMaterializedTime': hasattr(job, 'nextMaterializedTime') and \ + 'nextMaterializedTime': hasattr(job, 'nextMaterializedTime') and job.nextMaterializedTime and format_time(job.nextMaterializedTime) or '', - 'nextMaterializedTimeInMillis': hasattr(job, 'nextMaterializedTime') and \ + 'nextMaterializedTimeInMillis': hasattr(job, 'nextMaterializedTime') and job.nextMaterializedTime and time.mktime(job.nextMaterializedTime) or 0, 'timeOut': hasattr(job, 'timeOut') and job.timeOut or None, 'endTime': job.endTime and format_time(job.endTime) or None, @@ -1284,8 +1282,8 @@ def check_job_edition_permission(oozie_job, user): def has_job_edition_permission(oozie_job, user): - return (is_admin(user) or oozie_job.user == user.username or \ - (oozie_job.group and user.groups.filter(name__in=oozie_job.group.split(",")).exists()) or \ + return (is_admin(user) or oozie_job.user == user.username or + (oozie_job.group and user.groups.filter(name__in=oozie_job.group.split(",")).exists()) or (oozie_job.acl and user.username in oozie_job.acl.split(','))) diff --git a/apps/oozie/src/oozie/views/editor.py b/apps/oozie/src/oozie/views/editor.py index eda35a03915..bab72d63a31 100644 --- a/apps/oozie/src/oozie/views/editor.py +++ b/apps/oozie/src/oozie/views/editor.py @@ -15,55 +15,61 @@ # See the License for the specific language governing permissions and # limitations under the License. -from builtins import str -import json -import logging -import shutil import sys +import json import time +import shutil +import logging +from builtins import str +from functools import partial -from django.urls import reverse from django.db.models import Q from django.forms.formsets import formset_factory from django.forms.models import inlineformset_factory from django.http import HttpResponse from django.shortcuts import redirect from django.template.defaultfilters import strip_tags -from functools import partial +from django.urls import reverse from django.utils.http import http_date +from django.utils.translation import activate as activate_translation, gettext as _ -from desktop.lib.django_util import JsonResponse, render, extract_field_data +from desktop.auth.backend import is_admin +from desktop.lib.django_util import JsonResponse, extract_field_data, render from desktop.lib.exceptions_renderable import PopupException from desktop.lib.i18n import smart_str from desktop.lib.rest.http_client import RestException from desktop.models import Document - +from filebrowser.lib.archives import archive_factory from liboozie.credentials import Credentials from liboozie.oozie_api import get_oozie from liboozie.submittion import Submission - -from filebrowser.lib.archives import archive_factory -from oozie.decorators import check_job_access_permission, check_job_edition_permission,\ - check_dataset_access_permission, check_dataset_edition_permission from oozie.conf import ENABLE_CRON_SCHEDULING, ENABLE_V2 -from oozie.importlib.workflows import import_workflow as _import_workflow +from oozie.decorators import ( + check_dataset_access_permission, + check_dataset_edition_permission, + check_job_access_permission, + check_job_edition_permission, +) +from oozie.forms import ( + BundledCoordinatorForm, + BundleForm, + CoordinatorForm, + DataInputForm, + DataOutputForm, + DatasetForm, + DefaultLinkForm, + ImportCoordinatorForm, + ImportWorkflowForm, + LinkForm, + NodeForm, + ParameterForm, + WorkflowForm, + design_form_by_type, +) from oozie.importlib.coordinators import import_coordinator as _import_coordinator +from oozie.importlib.workflows import import_workflow as _import_workflow from oozie.management.commands import oozie_setup -from oozie.models import Workflow, History, Coordinator,\ - Dataset, DataInput, DataOutput,\ - ACTION_TYPES, Bundle, BundledCoordinator, Job -from oozie.forms import WorkflowForm, CoordinatorForm, DatasetForm,\ - DataInputForm, DataOutputForm, LinkForm,\ - DefaultLinkForm, ParameterForm, NodeForm,\ - BundleForm, BundledCoordinatorForm, design_form_by_type,\ - ImportWorkflowForm, ImportCoordinatorForm - -from desktop.auth.backend import is_admin - -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _, activate as activate_translation -else: - from django.utils.translation import ugettext as _, activate as activate_translation +from oozie.models import ACTION_TYPES, Bundle, BundledCoordinator, Coordinator, DataInput, DataOutput, Dataset, History, Job, Workflow LOG = logging.getLogger() @@ -344,7 +350,6 @@ def clone_workflow(request, workflow): return JsonResponse(response) - @check_job_access_permission() def submit_workflow(request, workflow): ParametersFormSet = formset_factory(ParameterForm, extra=0) @@ -562,7 +567,7 @@ def create_coordinator_dataset(request, coordinator): response['data'] = reverse('oozie:edit_coordinator', kwargs={'coordinator': coordinator.id}) + "#listDataset" request.info(_('Dataset created')) else: - ## Bad + # Bad response['data'] = _('A POST request is required.') if response['status'] != 0: @@ -629,7 +634,7 @@ def create_coordinator_data(request, coordinator, data_type): data_form.save() response['status'] = 0 response['data'] = reverse('oozie:edit_coordinator', kwargs={'coordinator': coordinator.id}) - request.info(_('Coordinator data created')); + request.info(_('Coordinator data created')) else: response['data'] = data_form.errors else: @@ -718,7 +723,6 @@ def create_bundle(request): }) - def delete_bundle(request): if request.method != 'POST': raise PopupException(_('A POST request is required.')) @@ -828,7 +832,7 @@ def get_create_bundled_coordinator_html(request, bundle, bundled_coordinator_for @check_job_access_permission() @check_job_edition_permission(True) def edit_bundled_coordinator(request, bundle, bundled_coordinator): - bundled_coordinator_instance = BundledCoordinator.objects.get(id=bundled_coordinator) # todo secu + bundled_coordinator_instance = BundledCoordinator.objects.get(id=bundled_coordinator) # todo secu response = {'status': -1, 'data': 'None'} diff --git a/apps/oozie/src/oozie/views/editor2.py b/apps/oozie/src/oozie/views/editor2.py index ba8fc78761a..f681871ef0c 100644 --- a/apps/oozie/src/oozie/views/editor2.py +++ b/apps/oozie/src/oozie/views/editor2.py @@ -15,46 +15,49 @@ # See the License for the specific language governing permissions and # limitations under the License. -from builtins import str +import sys import json import logging -import sys - +from builtins import str from datetime import datetime -from django.urls import reverse + from django.forms.formsets import formset_factory from django.shortcuts import redirect +from django.urls import reverse +from django.utils.translation import gettext as _ -from desktop.conf import USE_NEW_EDITOR, IS_MULTICLUSTER_ONLY +from desktop.conf import IS_MULTICLUSTER_ONLY, USE_NEW_EDITOR from desktop.lib import django_mako from desktop.lib.django_util import JsonResponse, render from desktop.lib.exceptions_renderable import PopupException -from desktop.lib.i18n import smart_str, force_unicode -from desktop.lib.rest.http_client import RestException +from desktop.lib.i18n import force_unicode, smart_str from desktop.lib.json_utils import JSONEncoderForHTML +from desktop.lib.rest.http_client import RestException from desktop.models import Document, Document2, get_cluster_config - from liboozie.credentials import Credentials from liboozie.oozie_api import get_oozie from liboozie.submission2 import Submission from metadata.conf import DEFAULT_PUBLIC_KEY from notebook.connectors.base import Notebook - -from oozie.decorators import check_document_access_permission, check_document_modify_permission,\ - check_editor_access_permission +from oozie.decorators import check_document_access_permission, check_document_modify_permission, check_editor_access_permission from oozie.forms import ParameterForm -from oozie.models import Workflow as OldWorklow, Coordinator as OldCoordinator, Bundle as OldBundle, Job -from oozie.models2 import Node, Workflow, Coordinator, Bundle, NODES, WORKFLOW_NODE_PROPERTIES, import_workflow_from_hue_3_7,\ - find_dollar_variables, find_dollar_braced_variables, WorkflowBuilder,\ - _import_workspace, _save_workflow +from oozie.models import Bundle as OldBundle, Coordinator as OldCoordinator, Job, Workflow as OldWorklow +from oozie.models2 import ( + NODES, + WORKFLOW_NODE_PROPERTIES, + Bundle, + Coordinator, + Node, + Workflow, + WorkflowBuilder, + _import_workspace, + _save_workflow, + find_dollar_braced_variables, + find_dollar_variables, + import_workflow_from_hue_3_7, +) from oozie.utils import convert_to_server_timezone -from oozie.views.editor import edit_workflow as old_edit_workflow, edit_coordinator as old_edit_coordinator, edit_bundle as old_edit_bundle - -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - +from oozie.views.editor import edit_bundle as old_edit_bundle, edit_coordinator as old_edit_coordinator, edit_workflow as old_edit_workflow LOG = logging.getLogger() @@ -163,7 +166,7 @@ def delete_job(request): doc.can_write_or_exception(request.user) doc.delete() doc2.delete() - else: # Old version + else: # Old version job = Job.objects.can_read_or_exception(request, job['object_id']) Job.objects.can_edit_or_exception(request, job) OldWorklow.objects.destroy(job, request.fs) @@ -267,6 +270,7 @@ def _get_workflows(user): ] return workflows + @check_editor_access_permission def add_node(request): response = {'status': -1} @@ -399,7 +403,9 @@ def submit_single_action(request, doc_id, node_id): workflow.import_workspace(request.fs, parent_wf.deployment_dir, request.user) workflow.document = parent_doc - return _submit_workflow_helper(request, workflow, submit_action=reverse('oozie:submit_single_action', kwargs={'doc_id': doc_id, 'node_id': node_id})) + return _submit_workflow_helper( + request, workflow, submit_action=reverse('oozie:submit_single_action', kwargs={'doc_id': doc_id, 'node_id': node_id}) + ) def _submit_workflow_helper(request, workflow, submit_action): @@ -544,12 +550,12 @@ def edit_coordinator(request): if coordinator_id and not [a for a in workflows if a['uuid'] == coordinator.data['properties']['workflow']]: raise PopupException(_('You don\'t have access to the workflow of this coordinator.')) - if USE_NEW_EDITOR.get(): # In Hue 4, merge with above + if USE_NEW_EDITOR.get(): # In Hue 4, merge with above workflows = [dict([('uuid', d.uuid), ('name', d.name)]) for d in Document2.objects.documents(request.user, include_managed=False).search_documents(types=['oozie-workflow2'])] can_edit = doc is None or (doc.can_write(request.user) if USE_NEW_EDITOR.get() else doc.doc.get().is_editable(request.user)) - if request.GET.get('format') == 'json': # For Editor + if request.GET.get('format') == 'json': # For Editor return JsonResponse({ 'coordinator': coordinator.get_data_for_json(), 'credentials': list(credentials.credentials.keys()), @@ -631,7 +637,9 @@ def save_coordinator(request): owner=request.user, is_managed=coordinator_data.get('isManaged') ) - Document.objects.link(coordinator_doc, owner=coordinator_doc.owner, name=coordinator_doc.name, description=coordinator_doc.description, extra='coordinator2') + Document.objects.link( + coordinator_doc, owner=coordinator_doc.owner, name=coordinator_doc.name, description=coordinator_doc.description, extra='coordinator2' + ) scheduled_id = coordinator_data['properties']['workflow'] or coordinator_data['properties']['document'] if scheduled_id: @@ -722,7 +730,7 @@ def submit_coordinator(request, doc_id): return render('/scheduler/submit_job_popup.mako', request, { 'params_form': params_form, 'name': coordinator.name, - 'action': reverse('oozie:editor_submit_coordinator', kwargs={'doc_id': coordinator.id}), + 'action': reverse('oozie:editor_submit_coordinator', kwargs={'doc_id': coordinator.id}), 'show_dryrun': True, 'return_json': request.GET.get('format') == 'json' }, force_template=True) @@ -734,17 +742,17 @@ def _submit_coordinator(request, coordinator, mapping): if IS_MULTICLUSTER_ONLY.get() and get_cluster_config(request.user)['has_computes']: mapping['auto-cluster'] = { u'additionalClusterResourceTags': [], - u'automaticTerminationCondition': u'EMPTY_JOB_QUEUE', #'u'NONE', + u'automaticTerminationCondition': u'EMPTY_JOB_QUEUE', # 'u'NONE', u'cdhVersion': u'CDH514', u'clouderaManagerPassword': u'guest', u'clouderaManagerUsername': u'guest', - u'clusterName': u'analytics4', # Add time variable + u'clusterName': u'analytics4', # Add time variable u'computeWorkersConfiguration': { u'bidUSDPerHr': 0, u'groupSize': 0, u'useSpot': False }, - u'environmentName': u'crn:altus:environments:us-west-1:12a0079b-1591-4ca0-b721-a446bda74e67:environment:analytics/236ebdda-18bd-428a-9d2b-cd6973d42946', + u'environmentName': u'crn:altus:environments:us-west-1:12a0079b-1591-4ca0-b721-a446bda74e67:environment:analytics/236ebdda-18bd-428a-9d2b-cd6973d42946', # noqa: E501 u'instanceBootstrapScript': u'', u'instanceType': u'm4.xlarge', u'jobSubmissionGroupName': u'', @@ -777,7 +785,7 @@ def _submit_coordinator(request, coordinator, mapping): # } # } ], - u'namespaceName': u'crn:altus:sdx:us-west-1:12a0079b-1591-4ca0-b721-a446bda74e67:namespace:analytics/7ea35fe5-dbc9-4b17-92b1-97a1ab32e410', + u'namespaceName': u'crn:altus:sdx:us-west-1:12a0079b-1591-4ca0-b721-a446bda74e67:namespace:analytics/7ea35fe5-dbc9-4b17-92b1-97a1ab32e410', # noqa: E501 u'publicKey': DEFAULT_PUBLIC_KEY.get(), u'serviceType': u'SPARK', u'workersConfiguration': {}, @@ -794,7 +802,9 @@ def _submit_coordinator(request, coordinator, mapping): return job_id except RestException as ex: LOG.exception('Error submitting coordinator') - raise PopupException(_("Error submitting coordinator %s") % (coordinator,), detail=ex._headers.get('oozie-error-message', ex), error_code=200) + raise PopupException( + _("Error submitting coordinator %s") % (coordinator,), detail=ex._headers.get('oozie-error-message', ex), error_code=200 + ) @check_editor_access_permission @@ -952,7 +962,7 @@ def submit_bundle(request, doc_id): return render('/scheduler/submit_job_popup.mako', request, { 'params_form': params_form, 'name': bundle.name, - 'action': reverse('oozie:editor_submit_bundle', kwargs={'doc_id': bundle.id}), + 'action': reverse('oozie:editor_submit_bundle', kwargs={'doc_id': bundle.id}), 'return_json': request.GET.get('format') == 'json', 'show_dryrun': False }, force_template=True) @@ -961,7 +971,12 @@ def submit_bundle(request, doc_id): def _submit_bundle(request, bundle, properties): try: deployment_mapping = {} - coords = dict([(c.uuid, c) for c in Document2.objects.filter(type='oozie-coordinator2', uuid__in=[b['coordinator'] for b in bundle.data['coordinators']])]) + coords = dict( + [ + (c.uuid, c) + for c in Document2.objects.filter(type='oozie-coordinator2', uuid__in=[b['coordinator'] for b in bundle.data['coordinators']]) + ] + ) for i, bundled in enumerate(bundle.data['coordinators']): coord = coords[bundled['coordinator']] diff --git a/apps/pig/src/pig/api.py b/apps/pig/src/pig/api.py index 53964a4a2c3..4d309e005fe 100644 --- a/apps/pig/src/pig/api.py +++ b/apps/pig/src/pig/api.py @@ -15,27 +15,22 @@ # See the License for the specific language governing permissions and # limitations under the License. -from builtins import object -import json -import logging import sys +import json import time +import logging +from builtins import object from django.urls import reverse +from django.utils.translation import gettext as _ +from desktop.auth.backend import is_admin from desktop.lib.i18n import smart_str from desktop.lib.view_util import format_duration_in_millis from liboozie.oozie_api import get_oozie -from oozie.models import Workflow, Pig +from oozie.models import Pig, Workflow from oozie.views.api import get_log as get_workflow_logs from oozie.views.editor import _submit_workflow -from desktop.auth.backend import is_admin - -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - LOG = logging.getLogger() @@ -50,21 +45,19 @@ class OozieApi(object): """ WORKFLOW_NAME = 'pig-app-hue-script' - LOG_START_PATTERN = '(Pig script \[(?:[\w.-]+)\] content:.+)' + LOG_START_PATTERN = r'(Pig script \[(?:[\w.-]+)\] content:.+)' LOG_END_PATTERN = '(<<< Invocation of Pig command completed <<<|' \ '<<< Invocation of Main class completed <<<|' \ '<<< Invocation of Pig command completed <<<|' \ '<<< Invocation of Main class completed <<<)' MAX_DASHBOARD_JOBS = 100 - def __init__(self, fs, jt, user): self.oozie_api = get_oozie(user) self.fs = fs self.jt = jt self.user = user - def submit(self, pig_script, params): workflow = None @@ -78,7 +71,6 @@ def submit(self, pig_script, params): return oozie_wf - def _create_workflow(self, pig_script, params): workflow = Workflow.objects.new_workflow(self.user) workflow.schema_version = 'uri:oozie:workflow:0.5' @@ -90,7 +82,7 @@ def _create_workflow(self, pig_script, params): Workflow.objects.initialize(workflow, self.fs) script_path = workflow.deployment_dir + '/script.pig' - if self.fs: # For testing, difficult to mock + if self.fs: # For testing, difficult to mock self.fs.do_as_user(self.user.username, self.fs.create, script_path, data=smart_str(pig_script.dict['script'])) files = [] @@ -141,7 +133,7 @@ def _create_workflow(self, pig_script, params): if pig_script.use_hbase and self.oozie_api.security_enabled: credentials.append({'name': 'hbase', 'value': True}) if credentials: - action.credentials = credentials # Note, action.credentials is a @setter here + action.credentials = credentials # Note, action.credentials is a @setter here action.save() action.add_node(workflow.end) @@ -152,7 +144,6 @@ def _create_workflow(self, pig_script, params): return workflow - def _build_parameters(self, params): pig_params = [] @@ -168,13 +159,11 @@ def _build_parameters(self, params): return pig_params - def stop(self, job_id): return self.oozie_api.job_control(job_id, 'kill') - def get_jobs(self): - kwargs = {'cnt': OozieApi.MAX_DASHBOARD_JOBS,} + kwargs = {'cnt': OozieApi.MAX_DASHBOARD_JOBS, } kwargs['filters'] = [ ('user', self.user.username), ('name', OozieApi.WORKFLOW_NAME) @@ -182,12 +171,10 @@ def get_jobs(self): return self.oozie_api.get_workflows(**kwargs).jobs - def get_log(self, request, oozie_workflow, make_links=True): return get_workflow_logs(request, oozie_workflow, make_links=make_links, log_start_pattern=self.LOG_START_PATTERN, log_end_pattern=self.LOG_END_PATTERN) - def massaged_jobs_for_json(self, request, oozie_jobs, hue_jobs): jobs = [] hue_jobs = dict([(script.dict.get('job_id'), script) for script in hue_jobs if script.dict.get('job_id')]) @@ -195,13 +182,13 @@ def massaged_jobs_for_json(self, request, oozie_jobs, hue_jobs): for job in oozie_jobs: if job.is_running(): job = self.oozie_api.get_job(job.id) - get_copy = request.GET.copy() # Hacky, would need to refactor JobBrowser get logs + get_copy = request.GET.copy() # Hacky, would need to refactor JobBrowser get logs get_copy['format'] = 'python' request.GET = get_copy try: logs, workflow_action, is_really_done = self.get_log(request, job) progress = workflow_action[0]['progress'] - except: + except Exception: LOG.exception('failed to get progress') progress = 0 else: @@ -217,7 +204,7 @@ def massaged_jobs_for_json(self, request, oozie_jobs, hue_jobs): 'endTime': job.endTime and format_time(job.endTime) or None, 'status': job.status, 'isRunning': job.is_running(), - 'duration': job.endTime and job.startTime and format_duration_in_millis(( time.mktime(job.endTime) - time.mktime(job.startTime) ) * 1000) or None, + 'duration': job.endTime and job.startTime and format_duration_in_millis((time.mktime(job.endTime) - time.mktime(job.startTime)) * 1000) or None, # noqa: E501 'appName': hue_pig and hue_pig.dict['name'] or _('Unsaved script'), 'scriptId': hue_pig and hue_pig.id or -1, 'scriptContent': hue_pig and hue_pig.dict['script'] or '', @@ -226,9 +213,9 @@ def massaged_jobs_for_json(self, request, oozie_jobs, hue_jobs): 'user': job.user, 'absoluteUrl': job.get_absolute_url(), 'canEdit': has_job_edition_permission(job, self.user), - 'killUrl': reverse('oozie:manage_oozie_jobs', kwargs={'job_id':job.id, 'action':'kill'}), + 'killUrl': reverse('oozie:manage_oozie_jobs', kwargs={'job_id': job.id, 'action': 'kill'}), 'watchUrl': reverse('pig:watch', kwargs={'job_id': job.id}) + '?format=python', - 'created': hasattr(job, 'createdTime') and job.createdTime and job.createdTime and ((job.type == 'Bundle' and job.createdTime) or format_time(job.createdTime)), + 'created': hasattr(job, 'createdTime') and job.createdTime and job.createdTime and ((job.type == 'Bundle' and job.createdTime) or format_time(job.createdTime)), # noqa: E501 'startTime': hasattr(job, 'startTime') and format_time(job.startTime) or None, 'run': hasattr(job, 'run') and job.run or 0, 'frequency': hasattr(job, 'frequency') and job.frequency or None, @@ -248,4 +235,3 @@ def format_time(st_time): def has_job_edition_permission(oozie_job, user): return is_admin(user) or oozie_job.user == user.username - diff --git a/apps/pig/src/pig/conf.py b/apps/pig/src/pig/conf.py index 854f99c8569..25b51667443 100644 --- a/apps/pig/src/pig/conf.py +++ b/apps/pig/src/pig/conf.py @@ -18,17 +18,12 @@ import os import sys +from django.utils.translation import gettext as _, gettext_lazy as _t + from desktop.lib.conf import Config from liboozie.conf import get_oozie_status - from pig.settings import NICE_NAME -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _, gettext_lazy as _t -else: - from django.utils.translation import ugettext as _, ugettext_lazy as _t - - LOCAL_SAMPLE_DIR = Config( key="local_sample_dir", default=os.path.join(os.path.dirname(__file__), "..", "..", "examples"), @@ -44,7 +39,7 @@ def config_validator(user): res = [] - if not 'test' in sys.argv: # Avoid tests hanging + if 'test' not in sys.argv: # Avoid tests hanging status = get_oozie_status(user) if 'NORMAL' not in status: diff --git a/apps/pig/src/pig/management/commands/pig_setup.py b/apps/pig/src/pig/management/commands/pig_setup.py index c7ef62f5863..301a6c88d88 100644 --- a/apps/pig/src/pig/management/commands/pig_setup.py +++ b/apps/pig/src/pig/management/commands/pig_setup.py @@ -15,14 +15,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import json -import logging import os import sys +import json +import logging from django.core import management from django.core.management.base import BaseCommand from django.db import transaction +from django.utils.translation import gettext as _ from desktop.conf import USE_NEW_EDITOR from desktop.lib import paths @@ -30,16 +31,8 @@ from hadoop import cluster from liboozie.submittion import create_directories from notebook.models import make_notebook - -from useradmin.models import get_default_user_group, install_sample_user - from pig.conf import LOCAL_SAMPLE_DIR, REMOTE_SAMPLE_DIR - -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - +from useradmin.models import get_default_user_group, install_sample_user LOG = logging.getLogger() @@ -102,7 +95,6 @@ def install_pig_script(self, sample_user): return doc2 - def handle(self, *args, **options): fs = cluster.get_hdfs() create_directories(fs, [REMOTE_SAMPLE_DIR.get()]) diff --git a/apps/pig/src/pig/models.py b/apps/pig/src/pig/models.py index 7b0b36db349..93131580a8d 100644 --- a/apps/pig/src/pig/models.py +++ b/apps/pig/src/pig/models.py @@ -15,26 +15,22 @@ # See the License for the specific language governing permissions and # limitations under the License. -from builtins import str -from builtins import object +import sys import json import posixpath -import sys +from builtins import object, str -from django.db import models from django.contrib.contenttypes.fields import GenericRelation +from django.db import models from django.urls import reverse +from django.utils.translation import gettext as _, gettext_lazy as _t from desktop.auth.backend import is_admin from desktop.lib.exceptions_renderable import PopupException -from desktop.models import Document as Doc, SAMPLE_USER_ID +from desktop.models import SAMPLE_USER_ID, Document as Doc from hadoop.fs.hadoopfs import Hdfs from useradmin.models import User -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _, gettext_lazy as _t -else: - from django.utils.translation import ugettext as _, ugettext_lazy as _t class Document(models.Model): owner = models.ForeignKey( @@ -47,10 +43,10 @@ class Document(models.Model): is_design = models.BooleanField(default=True, db_index=True, verbose_name=_t('Is a user document, not a document submission.'), help_text=_t('If the document is not a submitted job but a real query, script, workflow.')) - def is_editable(self, user): # Deprecated + def is_editable(self, user): # Deprecated return is_admin(user) or self.owner == user - def can_edit_or_exception(self, user, exception_class=PopupException): # Deprecated + def can_edit_or_exception(self, user, exception_class=PopupException): # Deprecated if self.is_editable(user): return True else: @@ -97,7 +93,7 @@ def get_absolute_url(self): def use_hcatalog(self): script = self.dict['script'] return ('org.apache.hcatalog.pig.HCatStorer' in script or 'org.apache.hcatalog.pig.HCatLoader' in script) or \ - ('org.apache.hive.hcatalog.pig.HCatLoader' in script or 'org.apache.hive.hcatalog.pig.HCatStorer' in script) # New classes + ('org.apache.hive.hcatalog.pig.HCatLoader' in script or 'org.apache.hive.hcatalog.pig.HCatStorer' in script) # New classes @property def use_hbase(self): @@ -138,7 +134,7 @@ def use_hcatalog(self): script = self.dict['script'] return ('org.apache.hcatalog.pig.HCatStorer' in script or 'org.apache.hcatalog.pig.HCatLoader' in script) or \ - ('org.apache.hive.hcatalog.pig.HCatLoader' in script or 'org.apache.hive.hcatalog.pig.HCatStorer' in script) # New classes + ('org.apache.hive.hcatalog.pig.HCatLoader' in script or 'org.apache.hive.hcatalog.pig.HCatStorer' in script) # New classes @property def use_hbase(self): @@ -150,7 +146,7 @@ def use_hbase(self): def create_or_update_script(id, name, script, user, parameters, resources, hadoopProperties, is_design=True): try: pig_script = PigScript.objects.get(id=id) - if id == str(SAMPLE_USER_ID): # Special case for the Example, just create an history + if id == str(SAMPLE_USER_ID): # Special case for the Example, just create an history is_design = False raise PigScript.DoesNotExist() pig_script.doc.get().can_write_or_exception(user) diff --git a/apps/pig/src/pig/urls.py b/apps/pig/src/pig/urls.py index a99dd8cc7cb..4667e6e0f5c 100644 --- a/apps/pig/src/pig/urls.py +++ b/apps/pig/src/pig/urls.py @@ -17,12 +17,9 @@ import sys -from pig import views as pig_views +from django.urls import re_path -if sys.version_info[0] > 2: - from django.urls import re_path -else: - from django.conf.urls import url as re_path +from pig import views as pig_views urlpatterns = [ re_path(r'^$', pig_views.app, name='index'), diff --git a/apps/pig/src/pig/views.py b/apps/pig/src/pig/views.py index c935429aaf1..e77c414f693 100644 --- a/apps/pig/src/pig/views.py +++ b/apps/pig/src/pig/views.py @@ -15,45 +15,37 @@ # See the License for the specific language governing permissions and # limitations under the License. -from future import standard_library -standard_library.install_aliases() -from builtins import str +import sys import json import logging -import sys -import urllib.request, urllib.parse, urllib.error +import urllib.error +import urllib.parse +import urllib.request +from builtins import str from django.urls import reverse +from django.utils.translation import gettext as _ from django.views.decorators.csrf import ensure_csrf_cookie from desktop.lib.django_util import JsonResponse, render from desktop.lib.exceptions_renderable import PopupException -from desktop.lib.rest.http_client import RestException from desktop.lib.paths import SAFE_CHARACTERS_URI_COMPONENTS +from desktop.lib.rest.http_client import RestException from desktop.models import Document - -from oozie.views.dashboard import show_oozie_error, check_job_access_permission,\ - check_job_edition_permission - +from oozie.views.dashboard import check_job_access_permission, check_job_edition_permission, show_oozie_error from pig import api from pig.management.commands import pig_setup -from pig.models import get_workflow_output, hdfs_link, PigScript,\ - create_or_update_script, get_scripts - -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - +from pig.models import PigScript, create_or_update_script, get_scripts, get_workflow_output, hdfs_link LOG = logging.getLogger() + @ensure_csrf_cookie def app(request): autocomplete_base_url = '' try: autocomplete_base_url = reverse('beeswax:api_autocomplete_databases', kwargs={}) + '/' - except: + except Exception: LOG.exception('failed to find autocomplete base url') return render('app.mako', request, { @@ -210,7 +202,7 @@ def delete(request): pig_script.can_edit_or_exception(request.user) pig_script.doc.all().delete() pig_script.delete() - except: + except Exception: LOG.exception('failed to delete pig script') None @@ -233,8 +225,14 @@ def watch(request, job_id): 'progress': oozie_workflow.get_progress(), 'isRunning': oozie_workflow.is_running(), 'killUrl': reverse('oozie:manage_oozie_jobs', kwargs={'job_id': oozie_workflow.id, 'action': 'kill'}), - 'rerunUrl': reverse('oozie:rerun_oozie_job', kwargs={'job_id': oozie_workflow.id, 'app_path': urllib.parse.quote(oozie_workflow.appPath.encode('utf-8'), safe=SAFE_CHARACTERS_URI_COMPONENTS)}), - 'actions': workflow_actions + 'rerunUrl': reverse( + 'oozie:rerun_oozie_job', + kwargs={ + 'job_id': oozie_workflow.id, + 'app_path': urllib.parse.quote(oozie_workflow.appPath.encode('utf-8'), safe=SAFE_CHARACTERS_URI_COMPONENTS), + }, + ), + 'actions': workflow_actions, } response = { diff --git a/apps/proxy/src/proxy/proxy_test.py b/apps/proxy/src/proxy/proxy_test.py index 10ebbc80256..23733511d81 100644 --- a/apps/proxy/src/proxy/proxy_test.py +++ b/apps/proxy/src/proxy/proxy_test.py @@ -17,26 +17,19 @@ # # Tests for proxy app. -from __future__ import print_function -from future import standard_library -standard_library.install_aliases() -from builtins import str -import threading +import sys import logging +import threading import http.server -import pytest -import sys +from builtins import str +from io import StringIO as string_io +import pytest from django.test.client import Client -from desktop.lib.django_test_util import make_logged_in_client -from proxy.views import _rewrite_links import proxy.conf - -if sys.version_info[0] > 2: - from io import StringIO as string_io -else: - from StringIO import StringIO as string_io +from desktop.lib.django_test_util import make_logged_in_client +from proxy.views import _rewrite_links class Handler(http.server.BaseHTTPRequestHandler): @@ -75,6 +68,7 @@ def log_message(self, fmt, *args): self.log_date_time_string(), fmt % args)) + @pytest.mark.django_db def run_test_server(): """ @@ -90,12 +84,15 @@ def run_test_server(): def finish(): # Make sure the server thread is done. print("Closing thread " + str(thread)) - thread.join(10.0) # Wait at most 10 seconds + thread.join(10.0) # Wait at most 10 seconds assert not thread.is_alive() return httpd, finish + + run_test_server.__test__ = False + @pytest.mark.django_db def test_proxy_get(): """ @@ -124,6 +121,7 @@ def test_proxy_get(): finally: finish() + @pytest.mark.django_db def test_proxy_post(): """ @@ -145,6 +143,7 @@ def test_proxy_post(): finally: finish() + @pytest.mark.django_db def test_blacklist(): client = make_logged_in_client('test') @@ -181,6 +180,7 @@ def geturl(self): """URL we were initialized with.""" return self.url + def test_rewriting(): """ Tests that simple re-writing is working. diff --git a/apps/proxy/src/proxy/urls.py b/apps/proxy/src/proxy/urls.py index 5277e6d1bb3..e5c89b1210c 100644 --- a/apps/proxy/src/proxy/urls.py +++ b/apps/proxy/src/proxy/urls.py @@ -17,12 +17,9 @@ import sys -from proxy import views as proxy_views +from django.urls import re_path -if sys.version_info[0] > 2: - from django.urls import re_path -else: - from django.conf.urls import url as re_path +from proxy import views as proxy_views urlpatterns = [ # Prefix the names of your views with the app name. diff --git a/apps/rdbms/src/rdbms/api.py b/apps/rdbms/src/rdbms/api.py index fc801201822..291d5fd1622 100644 --- a/apps/rdbms/src/rdbms/api.py +++ b/apps/rdbms/src/rdbms/api.py @@ -15,36 +15,28 @@ # See the License for the specific language governing permissions and # limitations under the License. -from builtins import zip -import datetime -import decimal +import sys import json +import decimal import logging -import sys +import datetime +from builtins import zip from django.http import Http404 from django.utils.html import escape - -from desktop.lib.django_util import JsonResponse -from desktop.context_processors import get_app_name - -from librdbms import conf -from librdbms.server import dbms -from librdbms.design import SQLdesign +from django.utils.translation import gettext as _ from beeswax import models as beeswax_models from beeswax.forms import SaveForm from beeswax.views import authorized_get_query_history, safe_get_design - +from desktop.context_processors import get_app_name +from desktop.lib.django_util import JsonResponse +from librdbms import conf +from librdbms.design import SQLdesign +from librdbms.server import dbms from rdbms.forms import SQLForm from rdbms.views import save_design -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - - LOG = logging.getLogger() @@ -177,7 +169,7 @@ def execute_query(request, design_id=None): response['message'] = _('There was an error with your query.') response['errors'] = form.errors except RuntimeError as e: - response['message']= str(e) + response['message'] = str(e) return JsonResponse(response, encoder=ResultEncoder) @@ -215,7 +207,7 @@ def explain_query(request): response['message'] = _('There was an error with your query.') response['errors'] = form.errors except RuntimeError as e: - response['message']= str(e) + response['message'] = str(e) return JsonResponse(response) @@ -347,7 +339,7 @@ def get_query_form(request, design_id=None): raise RuntimeError(_("No databases are available. Permissions could be missing.")) form = SQLForm(request.POST) - form.fields['server'].choices = servers # Could not do it in the form - form.fields['database'].choices = databases # Could not do it in the form + form.fields['server'].choices = servers # Could not do it in the form + form.fields['database'].choices = databases # Could not do it in the form return form diff --git a/apps/rdbms/src/rdbms/forms.py b/apps/rdbms/src/rdbms/forms.py index 3c0238fe166..5a6c033d93d 100644 --- a/apps/rdbms/src/rdbms/forms.py +++ b/apps/rdbms/src/rdbms/forms.py @@ -16,11 +16,10 @@ # limitations under the License. import sys + from django import forms -if sys.version_info[0] > 2: - from django.utils.translation import gettext_lazy as _t -else: - from django.utils.translation import ugettext_lazy as _t +from django.utils.translation import gettext_lazy as _t + class SQLForm(forms.Form): query = forms.CharField(label=_t("Query Editor"), diff --git a/apps/rdbms/src/rdbms/urls.py b/apps/rdbms/src/rdbms/urls.py index a05783273e9..e978f6ae76f 100644 --- a/apps/rdbms/src/rdbms/urls.py +++ b/apps/rdbms/src/rdbms/urls.py @@ -17,15 +17,10 @@ import sys -from rdbms import views as rdbms_views -from rdbms import api as rdbms_api -from beeswax import views as beeswax_views -from beeswax import api as beeswax_api +from django.urls import re_path -if sys.version_info[0] > 2: - from django.urls import re_path -else: - from django.conf.urls import url as re_path +from beeswax import api as beeswax_api, views as beeswax_views +from rdbms import api as rdbms_api, views as rdbms_views # Views urlpatterns = [ diff --git a/apps/rdbms/src/rdbms/views.py b/apps/rdbms/src/rdbms/views.py index ee1a9eb3ab1..ae9b5cc13ac 100644 --- a/apps/rdbms/src/rdbms/views.py +++ b/apps/rdbms/src/rdbms/views.py @@ -15,30 +15,22 @@ # See the License for the specific language governing permissions and # limitations under the License. -import logging -import json import sys - +import json +import logging from functools import wraps from django.urls import reverse +from django.utils.translation import gettext as _ +from beeswax import models as beeswax_models +from beeswax.views import safe_get_design from desktop.context_processors import get_app_name -from desktop.models import Document from desktop.lib.django_util import render - +from desktop.models import Document from librdbms import conf from librdbms.design import SQLdesign -from beeswax import models as beeswax_models -from beeswax.views import safe_get_design - -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - - LOG = logging.getLogger() @@ -53,6 +45,8 @@ def configuration_error(request, *args, **kwargs): """ Decorators """ + + def ensure_configuration(view_func): def _decorator(*args, **kwargs): if conf.DATABASES.get(): @@ -65,6 +59,8 @@ def _decorator(*args, **kwargs): """ Queries Views """ + + @ensure_configuration def execute_query(request, design_id=None, query_history_id=None): """ diff --git a/apps/search/src/search/conf.py b/apps/search/src/search/conf.py index fcd2db0b5e9..bc3e73b9546 100644 --- a/apps/search/src/search/conf.py +++ b/apps/search/src/search/conf.py @@ -17,13 +17,9 @@ import sys -from desktop.lib.conf import Config, coerce_bool - -if sys.version_info[0] > 2: - from django.utils.translation import gettext_lazy as _ -else: - from django.utils.translation import ugettext_lazy as _ +from django.utils.translation import gettext_lazy as _ +from desktop.lib.conf import Config, coerce_bool SOLR_URL = Config( key="solr_url", diff --git a/apps/search/src/search/management/commands/search_setup.py b/apps/search/src/search/management/commands/search_setup.py index 59886159e6b..04257d4dbbf 100644 --- a/apps/search/src/search/management/commands/search_setup.py +++ b/apps/search/src/search/management/commands/search_setup.py @@ -22,10 +22,9 @@ from django.core.management.base import BaseCommand from django.db import transaction -from desktop.models import Directory, Document, Document2, Document2Permission, SAMPLE_USER_OWNERS +from desktop.models import SAMPLE_USER_OWNERS, Directory, Document, Document2, Document2Permission from useradmin.models import get_default_user_group, install_sample_user - LOG = logging.getLogger() @@ -44,10 +43,7 @@ def handle(self, *args, **options): if not Document2.objects.filter(type='search-dashboard', owner__username__in=SAMPLE_USER_OWNERS).exists(): with transaction.atomic(): - if sys.version_info[0] > 2: - management.call_command('loaddata', 'initial_search_examples.json', verbosity=2) - else: - management.call_command('loaddata', 'initial_search_examples.json', verbosity=2, commit=False) + management.call_command('loaddata', 'initial_search_examples.json', verbosity=2) Document.objects.sync() Document2.objects.filter(type='search-dashboard', owner__username__in=SAMPLE_USER_OWNERS).update(parent_directory=examples_dir) diff --git a/apps/search/src/search/models.py b/apps/search/src/search/models.py index e66e38326bf..b7ffa1018c5 100644 --- a/apps/search/src/search/models.py +++ b/apps/search/src/search/models.py @@ -15,26 +15,20 @@ # See the License for the specific language governing permissions and # limitations under the License. -from builtins import str -import json -import logging import re import sys +import json +import logging +from builtins import str -from django.urls import reverse from django.db import models +from django.urls import reverse from django.utils.html import escape +from django.utils.translation import gettext_lazy as _t from libsolr.api import SolrApi -from useradmin.models import User - from search.conf import SOLR_URL - -if sys.version_info[0] > 2: - from django.utils.translation import gettext_lazy as _t -else: - from django.utils.translation import ugettext_lazy as _t - +from useradmin.models import User LOG = logging.getLogger() @@ -58,7 +52,6 @@ def update_from_post(self, post_data): self.data = json.dumps(data_dict) - def get_query_params(self): data_dict = json.loads(self.data) @@ -132,14 +125,13 @@ def update_from_post(self, post_data): self.data = json.dumps(data_dict) - def get_template(self, with_highlighting=False): data_dict = json.loads(self.data) template = data_dict.get('template') if with_highlighting and data_dict.get('highlighting'): for field in data_dict.get('highlighting', []): - template = re.sub('\{\{%s\}\}' % field, '{{{%s}}}' % field, template) + template = re.sub(r'\{\{%s\}\}' % field, '{{{%s}}}' % field, template) return template @@ -184,7 +176,6 @@ def update_from_post(self, post_data): self.data = json.dumps(data_dict) - def get_query_params(self, client_query=None): params = () data_dict = json.loads(self.data) @@ -233,11 +224,11 @@ def create2(self, name, label, is_core_only=False, owner=None): # Deprecated see Collection2 class Collection(models.Model): """All the data is now saved into the properties field""" - enabled = models.BooleanField(default=False) # Aka shared + enabled = models.BooleanField(default=False) # Aka shared name = models.CharField(max_length=40, verbose_name=_t('Solr index name pointing to')) label = models.CharField(max_length=100, verbose_name=_t('Friendlier name in UI')) is_core_only = models.BooleanField(default=False) - cores = models.TextField(default=json.dumps({}), verbose_name=_t('Collection with cores data'), help_text=_t('Solr json')) # Unused + cores = models.TextField(default=json.dumps({}), verbose_name=_t('Collection with cores data'), help_text=_t('Solr json')) # Unused properties = models.TextField( default=json.dumps({}), verbose_name=_t('Properties'), help_text=_t('Hue properties (e.g. results by pages number)') @@ -267,7 +258,7 @@ def get_c(self, user): if 'collection' not in props: props['collection'] = self.get_default(user) - if self.cores != '{}': # Convert collections from < Hue 3.6 + if self.cores != '{}': # Convert collections from < Hue 3.6 try: self._import_hue_3_5_collections(props, user) except Exception as e: @@ -295,11 +286,11 @@ def get_c(self, user): for facet in props['collection']['facets']: properties = facet['properties'] - if 'gap' in properties and not 'initial_gap' in properties: + if 'gap' in properties and 'initial_gap' not in properties: properties['initial_gap'] = properties['gap'] - if 'start' in properties and not 'initial_start' in properties: + if 'start' in properties and 'initial_start' not in properties: properties['initial_start'] = properties['start'] - if 'end' in properties and not 'initial_end' in properties: + if 'end' in properties and 'initial_end' not in properties: properties['initial_end'] = properties['end'] if facet['widgetType'] == 'histogram-widget': diff --git a/apps/search/src/search/urls.py b/apps/search/src/search/urls.py index fb61c3ea5bc..1dbc27f0dac 100644 --- a/apps/search/src/search/urls.py +++ b/apps/search/src/search/urls.py @@ -17,14 +17,10 @@ import sys -from search import views as search_views -from dashboard import views as dashboard_views -from dashboard import api as dashboard_api +from django.urls import re_path -if sys.version_info[0] > 2: - from django.urls import re_path -else: - from django.conf.urls import url as re_path +from dashboard import api as dashboard_api, views as dashboard_views +from search import views as search_views urlpatterns = [ re_path(r'^install_examples$', search_views.install_examples, name='install_examples'), diff --git a/apps/search/src/search/views.py b/apps/search/src/search/views.py index 054539537a4..e53a03fdecf 100644 --- a/apps/search/src/search/views.py +++ b/apps/search/src/search/views.py @@ -14,24 +14,18 @@ # See the License for the specific language governing permissions and # limitations under the License. -from builtins import str -import logging import sys +import logging +from builtins import str + +from django.utils.translation import gettext as _ +from desktop.auth.backend import is_admin from desktop.lib.django_util import JsonResponse from desktop.lib.exceptions_renderable import PopupException from indexer.management.commands import indexer_setup - from search.management.commands import search_setup -from desktop.auth.backend import is_admin - -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - - LOG = logging.getLogger() @@ -47,7 +41,7 @@ def install_examples(request): try: data = request.POST.get('data') indexer_setup.Command().handle(data=data) - if 'log_analytics_demo' == data: # Hue documents installed only one time + if 'log_analytics_demo' == data: # Hue documents installed only one time search_setup.Command().handle() result['status'] = 0 except Exception as e: diff --git a/apps/security/src/security/api/hdfs.py b/apps/security/src/security/api/hdfs.py index b298407b540..54d14a9d189 100644 --- a/apps/security/src/security/api/hdfs.py +++ b/apps/security/src/security/api/hdfs.py @@ -16,25 +16,25 @@ # limitations under the License. from __future__ import print_function -from builtins import str -import json + import sys +import json +from builtins import str + +from django.utils.translation import gettext as _ from desktop.lib.django_util import JsonResponse from desktop.lib.exceptions_renderable import PopupException from filebrowser.views import display, listdir_paged -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - def _get_acl_name(acl): return ('default:' if acl['isDefault'] else '') + acl['type'] + ':' + acl['name'] + ':' + def _get_acl(acl): - return _get_acl_name(acl) + ('r' if acl['r'] else '-') + ('w' if acl['w'] else '-') + ('x' if acl['x'] else '-') + return _get_acl_name(acl) + ('r' if acl['r'] else '-') + ('w' if acl['w'] else '-') + ('x' if acl['x'] else '-') + def _diff_list_dir(user_listing, hdfs_listing): user_files = [f['stats']['path'] for f in user_listing['files']] @@ -60,10 +60,10 @@ def list_hdfs(request, path): else: json_response = display(request, path) except IOError: - # AccessControlException: Permission denied: user=test, access=READ_EXECUTE, inode="/tmp/dir":romain:supergroup:drwxr-xr-x:group::r-x,group:bob:---,group:test:---,default:user::rwx,default:group::r--,default:mask::r--,default:other::rwx (error 403) + # AccessControlException: Permission denied: user=test, access=READ_EXECUTE, inode="/tmp/dir":romain:supergroup:drwxr-xr-x:group::r-x,group:bob:---,group:test:---,default:user::rwx,default:group::r--,default:mask::r--,default:other::rwx (error 403) # noqa: E501 json_response = JsonResponse({'files': [], 'page': {}, 'error': 'FILE_NOT_FOUND'}) except Exception as e: - # AccessControlException: Permission denied: user=test, access=READ_EXECUTE, inode="/tmp/dir":romain:supergroup:drwxr-xr-x:group::r-x,group:bob:---,group:test:---,default:user::rwx,default:group::r--,default:mask::r--,default:other::rwx (error 403) + # AccessControlException: Permission denied: user=test, access=READ_EXECUTE, inode="/tmp/dir":romain:supergroup:drwxr-xr-x:group::r-x,group:bob:---,group:test:---,default:user::rwx,default:group::r--,default:mask::r--,default:other::rwx (error 403) # noqa: E501 json_response = JsonResponse({'files': [], 'page': {}, 'error': 'ACCESS_DENIED'}) if json.loads(request.GET.get('isDiffMode', 'false')): @@ -98,7 +98,8 @@ def update_acls(request): if all([acl['status'] == 'deleted' for acl in acls]): request.fs.remove_acl(path) else: - renamed_acls = set([_get_acl_name(acl) for acl in original_acls]) - set([_get_acl_name(acl) for acl in acls]) # We need to remove ACLs that have been renamed + # We need to remove ACLs that have been renamed + renamed_acls = set([_get_acl_name(acl) for acl in original_acls]) - set([_get_acl_name(acl) for acl in acls]) _remove_acl_names(request.fs, path, list(renamed_acls)) _remove_acl_entries(request.fs, path, [acl for acl in acls if acl['status'] == 'deleted']) _modify_acl_entries(request.fs, path, [acl for acl in acls if acl['status'] in ('new', 'modified')]) @@ -132,9 +133,9 @@ def bulk_add_acls(request): recursive = json.loads(request.POST.get('recursive')) try: - checked_paths = [path['path'] for path in checked_paths if path['path'] != path] # Don't touch current path + checked_paths = [path['path'] for path in checked_paths if path['path'] != path] # Don't touch current path for path in checked_paths: - _modify_acl_entries(request.fs, path, [acl for acl in acls if acl['status'] == ''], recursive) # Only saved ones + _modify_acl_entries(request.fs, path, [acl for acl in acls if acl['status'] == ''], recursive) # Only saved ones except Exception as e: raise PopupException(str(e.message)) diff --git a/apps/security/src/security/api/hive.py b/apps/security/src/security/api/hive.py index 5812f150428..a72dd71a716 100644 --- a/apps/security/src/security/api/hive.py +++ b/apps/security/src/security/api/hive.py @@ -15,25 +15,19 @@ # See the License for the specific language governing permissions and # limitations under the License. -from builtins import str -import json -import logging import sys +import json import time +import logging +from builtins import str -from desktop.lib.django_util import JsonResponse +from django.utils.translation import gettext as _ +from beeswax.api import autocomplete +from desktop.lib.django_util import JsonResponse +from hadoop.cluster import get_defaultfs from libsentry.api import get_api from libsentry.sentry_site import get_sentry_server_admin_groups -from hadoop.cluster import get_defaultfs - -from beeswax.api import autocomplete - -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - LOG = logging.getLogger() @@ -50,7 +44,7 @@ def fetch_hive_path(request): if '/' in path: database, table = path.split('/', 1) if '.' in table: - table, column = table.split('.', 1) + table, column = table.split('.', 1) resp = autocomplete(request, database, table, column) @@ -94,7 +88,10 @@ def list_sentry_privileges_by_role(request): try: roleName = request.POST.get('roleName') sentry_privileges = get_api(request.user).list_sentry_privileges_by_role(roleName) - result['sentry_privileges'] = sorted(sentry_privileges, key=lambda privilege: '%s.%s.%s.%s' % (privilege['server'], privilege['database'], privilege['table'], privilege['URI'])) + result['sentry_privileges'] = sorted( + sentry_privileges, + key=lambda privilege: '%s.%s.%s.%s' % (privilege['server'], privilege['database'], privilege['table'], privilege['URI']), + ) result['message'] = '' result['status'] = 0 except Exception as e: @@ -423,7 +420,9 @@ def list_sentry_privileges_for_provider(request): roleSet = json.loads(request.POST.get('roleSet')) authorizableHierarchy = json.loads(request.POST.get('authorizableHierarchy')) - sentry_privileges = get_api(request.user).list_sentry_privileges_for_provider(groups=groups, roleSet=roleSet, authorizableHierarchy=authorizableHierarchy) + sentry_privileges = get_api(request.user).list_sentry_privileges_for_provider( + groups=groups, roleSet=roleSet, authorizableHierarchy=authorizableHierarchy + ) result['sentry_privileges'] = sentry_privileges result['message'] = '' result['status'] = 0 diff --git a/apps/security/src/security/api/sentry.py b/apps/security/src/security/api/sentry.py index 91625229d7e..a7911a7910b 100644 --- a/apps/security/src/security/api/sentry.py +++ b/apps/security/src/security/api/sentry.py @@ -15,25 +15,20 @@ # See the License for the specific language governing permissions and # limitations under the License. -from builtins import str -import json -import logging import sys +import json import time +import logging +from builtins import str + +from django.utils.translation import gettext as _ from desktop.lib.django_util import JsonResponse from desktop.lib.exceptions_renderable import PopupException from hadoop.cluster import get_defaultfs - from libsentry.api2 import get_api from libsentry.sentry_site import get_sentry_server_admin_groups -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - - LOG = logging.getLogger() @@ -60,7 +55,7 @@ def _fetch_hive_path(request): if '/' in path: database, table = path.split('/', 1) if '.' in table: - table, column = table.split('.', 1) + table, column = table.split('.', 1) resp = autocomplete(request, database, table, column) @@ -138,7 +133,9 @@ def list_sentry_privileges_by_role(request): sentry_privileges = get_api(request.user, component).list_sentry_privileges_by_role(serviceName, roleName) - result['sentry_privileges'] = sorted(sentry_privileges, key=lambda privilege: '.'.join([auth['name'] for auth in privilege['authorizables']])) + result['sentry_privileges'] = sorted( + sentry_privileges, key=lambda privilege: '.'.join([auth['name'] for auth in privilege['authorizables']]) + ) result['message'] = '' result['status'] = 0 except Exception as e: @@ -153,7 +150,8 @@ def _to_sentry_privilege(privilege): return { 'component': privilege['component'], 'serviceName': privilege['serverName'], - 'authorizables': [{'type': auth['type'], 'name': auth['name_']} for auth in privilege['authorizables']], # TODO URI {'type': 'URI', 'name': _massage_uri('/path')} + # TODO URI {'type': 'URI', 'name': _massage_uri('/path')} + 'authorizables': [{'type': auth['type'], 'name': auth['name_']} for auth in privilege['authorizables']], 'action': '*' if privilege['action'].upper() == 'ALL' else privilege['action'], 'createTime': privilege['timestamp'], 'grantorPrincipal': privilege['grantorPrincipal'], @@ -286,7 +284,7 @@ def save_privileges(request): for privilege in role['originalPrivileges']: if privilege['id'] in old_privileges_ids: _drop_sentry_privilege(request.user, role, privilege, component) - _hive_add_privileges(request.user, role, modified_privileges, component) # After REVOKES as REVOKE ALL removes everything. + _hive_add_privileges(request.user, role, modified_privileges, component) # After REVOKES as REVOKE ALL removes everything. result['message'] = _('Privileges updated') result['status'] = 0 @@ -365,7 +363,9 @@ def list_sentry_privileges_by_authorizable(request): _privileges = [] - for authorizable, roles in get_api(request.user, component).list_sentry_privileges_by_authorizable(serviceName=serviceName, authorizableSet=authorizableSet, groups=groups): + for authorizable, roles in get_api(request.user, component).list_sentry_privileges_by_authorizable( + serviceName=serviceName, authorizableSet=authorizableSet, groups=groups + ): for role, privileges in roles.items(): for privilege in privileges: privilege['roleName'] = role @@ -475,7 +475,9 @@ def list_sentry_privileges_for_provider(request): authorizableHierarchy = json.loads(request.POST.get('authorizableHierarchy')) component = request.POST.get('component') - sentry_privileges = get_api(request.user, component).list_sentry_privileges_for_provider(groups=groups, roleSet=roleSet, authorizableHierarchy=authorizableHierarchy) + sentry_privileges = get_api(request.user, component).list_sentry_privileges_for_provider( + groups=groups, roleSet=roleSet, authorizableHierarchy=authorizableHierarchy + ) result['sentry_privileges'] = sentry_privileges result['message'] = '' result['status'] = 0 diff --git a/apps/security/src/security/conf.py b/apps/security/src/security/conf.py index 61941019a65..793ae56be25 100644 --- a/apps/security/src/security/conf.py +++ b/apps/security/src/security/conf.py @@ -17,16 +17,11 @@ import sys -from desktop.lib.conf import Config, coerce_bool +from django.utils.translation import gettext as _, gettext_lazy as _t +from desktop.lib.conf import Config, coerce_bool from security.settings import NICE_NAME -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _, gettext_lazy as _t -else: - from django.utils.translation import ugettext as _, ugettext_lazy as _t - - HIVE_V1 = Config( key="hive_v1", help=_t("Use Sentry API V1 for Hive."), diff --git a/apps/security/src/security/urls.py b/apps/security/src/security/urls.py index 843e3405ff0..432fc81e13a 100644 --- a/apps/security/src/security/urls.py +++ b/apps/security/src/security/urls.py @@ -17,15 +17,10 @@ import sys -from security import views as security_views -from security.api import hdfs as security_api_hdfs -from security.api import hive as security_api_hive -from security.api import sentry as security_api_sentry +from django.urls import re_path -if sys.version_info[0] > 2: - from django.urls import re_path -else: - from django.conf.urls import url as re_path +from security import views as security_views +from security.api import hdfs as security_api_hdfs, hive as security_api_hive, sentry as security_api_sentry urlpatterns = [ re_path(r'^$', security_views.hive, name='index'), diff --git a/apps/spark/src/spark/conf.py b/apps/spark/src/spark/conf.py index 92d33cd4c79..c88c3d61be7 100644 --- a/apps/spark/src/spark/conf.py +++ b/apps/spark/src/spark/conf.py @@ -15,20 +15,16 @@ # See the License for the specific language governing permissions and # limitations under the License. -import logging import os import sys +import logging +from django.utils.translation import gettext as _, gettext_lazy as _t + +from beeswax.conf import get_use_sasl_default from desktop.conf import default_ssl_validate from desktop.lib.conf import Config, coerce_bool from spark.settings import NICE_NAME -from beeswax.conf import get_use_sasl_default - -if sys.version_info[0] > 2: - from django.utils.translation import gettext_lazy as _t, gettext as _ -else: - from django.utils.translation import ugettext_lazy as _t, ugettext as _ - LOG = logging.getLogger() @@ -68,7 +64,7 @@ default="8998") # Deprecated -LIVY_SERVER_SESSION_KIND = Config( # Note: this one is ignored by Livy, this should match the current Spark mode +LIVY_SERVER_SESSION_KIND = Config( # Note: this one is ignored by Livy, this should match the current Spark mode key="livy_server_session_kind", help=_t("Configure livy to start in local 'process' mode, or 'yarn' workers."), default="yarn" @@ -114,10 +110,10 @@ def get_spark_status(user): status = None try: - if not 'test' in sys.argv: # Avoid tests hanging + if 'test' not in sys.argv: # Avoid tests hanging get_api(user).get_status() status = 'OK' - except: + except Exception: LOG.exception('failed to get spark status') return status diff --git a/apps/sqoop/src/sqoop/api/connector.py b/apps/sqoop/src/sqoop/api/connector.py index 77cb468f4ad..76a3f995823 100644 --- a/apps/sqoop/src/sqoop/api/connector.py +++ b/apps/sqoop/src/sqoop/api/connector.py @@ -16,30 +16,29 @@ # limitations under the License. from __future__ import absolute_import + +import sys import json -import logging import socket -import sys +import logging + +from django.utils.translation import gettext as _ +from django.views.decorators.cache import never_cache -from sqoop import client, conf -from sqoop.api.decorators import get_connector_or_exception from desktop.lib.django_util import JsonResponse from desktop.lib.exceptions import StructuredException from desktop.lib.rest.http_client import RestException +from sqoop import client, conf +from sqoop.api.decorators import get_connector_or_exception from sqoop.api.exception import handle_rest_exception from sqoop.api.utils import list_to_dict -from django.views.decorators.cache import never_cache - -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ __all__ = ['get_connectors', 'connectors', 'connector'] LOG = logging.getLogger() + @never_cache def get_connectors(request): response = { @@ -48,18 +47,22 @@ def get_connectors(request): 'connectors': [] } try: - c = client.SqoopClient(conf.SERVER_URL.get(), request.user.username, request.LANGUAGE_CODE, ssl_cert_ca_verify=conf.SSL_CERT_CA_VERIFY.get()) + c = client.SqoopClient( + conf.SERVER_URL.get(), request.user.username, request.LANGUAGE_CODE, ssl_cert_ca_verify=conf.SSL_CERT_CA_VERIFY.get() + ) response['connectors'] = list_to_dict(c.get_connectors()) except RestException as e: response.update(handle_rest_exception(e, _('Could not get connectors.'))) return JsonResponse(response) + def connectors(request): if request.method == 'GET': return get_connectors(request) else: raise StructuredException(code="INVALID_METHOD", message=_('GET request required.'), error_code=405) + @never_cache @get_connector_or_exception() def connector(request, connector): diff --git a/apps/sqoop/src/sqoop/api/decorators.py b/apps/sqoop/src/sqoop/api/decorators.py index a9b0f9ac8e2..675bc978733 100644 --- a/apps/sqoop/src/sqoop/api/decorators.py +++ b/apps/sqoop/src/sqoop/api/decorators.py @@ -16,23 +16,19 @@ # limitations under the License. from __future__ import absolute_import + +import sys import json import logging -import sys from django.utils.functional import wraps +from django.utils.translation import gettext as _ from desktop.lib.django_util import render from desktop.lib.exceptions_renderable import PopupException from desktop.lib.rest.http_client import RestException -from sqoop.api.exception import handle_rest_exception from sqoop import client, conf - -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - +from sqoop.api.exception import handle_rest_exception __all__ = ['get_job_or_exception'] @@ -43,7 +39,9 @@ def get_connector_or_exception(exception_class=PopupException): def inner(view_func): def decorate(request, connector_id, *args, **kwargs): try: - c = client.SqoopClient(conf.SERVER_URL.get(), request.user.username, request.LANGUAGE_CODE, ssl_cert_ca_verify=conf.SSL_CERT_CA_VERIFY.get()) + c = client.SqoopClient( + conf.SERVER_URL.get(), request.user.username, request.LANGUAGE_CODE, ssl_cert_ca_verify=conf.SSL_CERT_CA_VERIFY.get() + ) connector = c.get_connector(int(connector_id)) except RestException as e: handle_rest_exception(e, _('Could not get connector.')) @@ -56,7 +54,9 @@ def get_link_or_exception(exception_class=PopupException): def inner(view_func): def decorate(request, link_id, *args, **kwargs): try: - c = client.SqoopClient(conf.SERVER_URL.get(), request.user.username, request.LANGUAGE_CODE, ssl_cert_ca_verify=conf.SSL_CERT_CA_VERIFY.get()) + c = client.SqoopClient( + conf.SERVER_URL.get(), request.user.username, request.LANGUAGE_CODE, ssl_cert_ca_verify=conf.SSL_CERT_CA_VERIFY.get() + ) link = c.get_link(int(link_id)) except RestException as e: handle_rest_exception(e, _('Could not get link.')) @@ -69,7 +69,9 @@ def get_job_or_exception(exception_class=PopupException): def inner(view_func): def decorate(request, job_id, *args, **kwargs): try: - c = client.SqoopClient(conf.SERVER_URL.get(), request.user.username, request.LANGUAGE_CODE, ssl_cert_ca_verify=conf.SSL_CERT_CA_VERIFY.get()) + c = client.SqoopClient( + conf.SERVER_URL.get(), request.user.username, request.LANGUAGE_CODE, ssl_cert_ca_verify=conf.SSL_CERT_CA_VERIFY.get() + ) job = c.get_job(int(job_id)) except RestException as e: handle_rest_exception(e, _('Could not get job.')) @@ -82,7 +84,9 @@ def get_submission_or_exception(exception_class=PopupException): def inner(view_func): def decorate(request, submission_id, *args, **kwargs): try: - c = client.SqoopClient(conf.SERVER_URL.get(), request.user.username, request.LANGUAGE_CODE, ssl_cert_ca_verify=conf.SSL_CERT_CA_VERIFY.get()) + c = client.SqoopClient( + conf.SERVER_URL.get(), request.user.username, request.LANGUAGE_CODE, ssl_cert_ca_verify=conf.SSL_CERT_CA_VERIFY.get() + ) submission = c.get_submission(int(submission_id)) except RestException as e: handle_rest_exception(e, _('Could not get submission.')) diff --git a/apps/sqoop/src/sqoop/api/driver.py b/apps/sqoop/src/sqoop/api/driver.py index 0fe7ee30b27..ca8f634a344 100644 --- a/apps/sqoop/src/sqoop/api/driver.py +++ b/apps/sqoop/src/sqoop/api/driver.py @@ -16,28 +16,27 @@ # limitations under the License. from __future__ import absolute_import + +import sys import json -import logging import socket -import sys +import logging + +from django.utils.translation import gettext as _ +from django.views.decorators.cache import never_cache -from sqoop import client, conf from desktop.lib.django_util import JsonResponse from desktop.lib.exceptions import StructuredException from desktop.lib.rest.http_client import RestException +from sqoop import client, conf from sqoop.api.exception import handle_rest_exception -from django.views.decorators.cache import never_cache - -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ __all__ = ['driver'] LOG = logging.getLogger() + @never_cache def driver(request): response = { @@ -47,7 +46,9 @@ def driver(request): } if request.method == 'GET': try: - c = client.SqoopClient(conf.SERVER_URL.get(), request.user.username, request.LANGUAGE_CODE, ssl_cert_ca_verify=conf.SSL_CERT_CA_VERIFY.get()) + c = client.SqoopClient( + conf.SERVER_URL.get(), request.user.username, request.LANGUAGE_CODE, ssl_cert_ca_verify=conf.SSL_CERT_CA_VERIFY.get() + ) response['driver'] = c.get_driver().to_dict() except RestException as e: response.update(handle_rest_exception(e, _('Could not get driver.'))) diff --git a/apps/sqoop/src/sqoop/api/exception.py b/apps/sqoop/src/sqoop/api/exception.py index 749105a6c3a..dfd97b8e816 100644 --- a/apps/sqoop/src/sqoop/api/exception.py +++ b/apps/sqoop/src/sqoop/api/exception.py @@ -15,18 +15,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -from builtins import str -import logging -import socket import sys +import socket +import logging +from builtins import str from django.utils.encoding import smart_str - -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - +from django.utils.translation import gettext as _ LOG = logging.getLogger() diff --git a/apps/sqoop/src/sqoop/api/job.py b/apps/sqoop/src/sqoop/api/job.py index 8d027dec859..0e2a961f3ed 100644 --- a/apps/sqoop/src/sqoop/api/job.py +++ b/apps/sqoop/src/sqoop/api/job.py @@ -16,33 +16,30 @@ # limitations under the License. from __future__ import absolute_import + +import sys import json import logging -import sys from django.utils.encoding import smart_str +from django.utils.translation import gettext as _ from django.views.decorators.cache import never_cache -from sqoop import client, conf -from sqoop.client.exception import SqoopException -from sqoop.api.decorators import get_job_or_exception from desktop.lib.django_util import JsonResponse from desktop.lib.exceptions import StructuredException from desktop.lib.rest.http_client import RestException +from sqoop import client, conf +from sqoop.api.decorators import get_job_or_exception from sqoop.api.exception import handle_rest_exception from sqoop.api.utils import list_to_dict - -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - +from sqoop.client.exception import SqoopException __all__ = ['get_jobs', 'create_job', 'update_job', 'job', 'jobs', 'job_clone', 'job_delete', 'job_start', 'job_stop', 'job_status'] LOG = logging.getLogger() + @never_cache def get_jobs(request): response = { @@ -51,13 +48,16 @@ def get_jobs(request): 'jobs': [] } try: - c = client.SqoopClient(conf.SERVER_URL.get(), request.user.username, request.LANGUAGE_CODE, ssl_cert_ca_verify=conf.SSL_CERT_CA_VERIFY.get()) + c = client.SqoopClient( + conf.SERVER_URL.get(), request.user.username, request.LANGUAGE_CODE, ssl_cert_ca_verify=conf.SSL_CERT_CA_VERIFY.get() + ) jobs = c.get_jobs() response['jobs'] = list_to_dict(jobs) except RestException as e: response.update(handle_rest_exception(e, _('Could not get jobs.'))) return JsonResponse(response) + @never_cache def create_job(request): if request.method != 'POST': @@ -70,13 +70,17 @@ def create_job(request): } if 'job' not in request.POST: - raise StructuredException(code="INVALID_REQUEST_ERROR", message=_('Error saving job'), data={'errors': 'job is missing.'}, error_code=400) + raise StructuredException( + code="INVALID_REQUEST_ERROR", message=_('Error saving job'), data={'errors': 'job is missing.'}, error_code=400 + ) d = json.loads(smart_str(request.POST.get('job'))) job = client.Job.from_dict(d) try: - c = client.SqoopClient(conf.SERVER_URL.get(), request.user.username, request.LANGUAGE_CODE, ssl_cert_ca_verify=conf.SSL_CERT_CA_VERIFY.get()) + c = client.SqoopClient( + conf.SERVER_URL.get(), request.user.username, request.LANGUAGE_CODE, ssl_cert_ca_verify=conf.SSL_CERT_CA_VERIFY.get() + ) response['job'] = c.create_job(job).to_dict() except RestException as e: response.update(handle_rest_exception(e, _('Could not create job.'))) @@ -85,6 +89,7 @@ def create_job(request): response['errors'] = e.to_dict() return JsonResponse(response) + @never_cache def update_job(request, job): if request.method != 'POST': @@ -97,12 +102,16 @@ def update_job(request, job): } if 'job' not in request.POST: - raise StructuredException(code="INVALID_REQUEST_ERROR", message=_('Error saving job'), data={'errors': 'job is missing.'}, error_code=400) + raise StructuredException( + code="INVALID_REQUEST_ERROR", message=_('Error saving job'), data={'errors': 'job is missing.'}, error_code=400 + ) job.update_from_dict(json.loads(smart_str(request.POST.get('job')))) try: - c = client.SqoopClient(conf.SERVER_URL.get(), request.user.username, request.LANGUAGE_CODE, ssl_cert_ca_verify=conf.SSL_CERT_CA_VERIFY.get()) + c = client.SqoopClient( + conf.SERVER_URL.get(), request.user.username, request.LANGUAGE_CODE, ssl_cert_ca_verify=conf.SSL_CERT_CA_VERIFY.get() + ) response['job'] = c.update_job(job).to_dict() except RestException as e: response.update(handle_rest_exception(e, _('Could not update job.'))) @@ -111,6 +120,7 @@ def update_job(request, job): response['errors'] = e.to_dict() return JsonResponse(response) + @never_cache def jobs(request): if request.method == 'GET': @@ -120,6 +130,7 @@ def jobs(request): else: raise StructuredException(code="INVALID_METHOD", message=_('GET or POST request required.'), error_code=405) + @never_cache @get_job_or_exception() def job(request, job): @@ -136,6 +147,7 @@ def job(request, job): else: raise StructuredException(code="INVALID_METHOD", message=_('GET or POST request required.'), error_code=405) + @never_cache @get_job_or_exception() def job_clone(request, job): @@ -151,7 +163,9 @@ def job_clone(request, job): job.id = -1 job.name = '%s-copy' % job.name try: - c = client.SqoopClient(conf.SERVER_URL.get(), request.user.username, request.LANGUAGE_CODE, ssl_cert_ca_verify=conf.SSL_CERT_CA_VERIFY.get()) + c = client.SqoopClient( + conf.SERVER_URL.get(), request.user.username, request.LANGUAGE_CODE, ssl_cert_ca_verify=conf.SSL_CERT_CA_VERIFY.get() + ) response['job'] = c.create_job(job).to_dict() except RestException as e: response.update(handle_rest_exception(e, _('Could not clone job.'))) @@ -160,6 +174,7 @@ def job_clone(request, job): response['errors'] = e.to_dict() return JsonResponse(response) + @never_cache @get_job_or_exception() def job_delete(request, job): @@ -173,7 +188,9 @@ def job_delete(request, job): } try: - c = client.SqoopClient(conf.SERVER_URL.get(), request.user.username, request.LANGUAGE_CODE, ssl_cert_ca_verify=conf.SSL_CERT_CA_VERIFY.get()) + c = client.SqoopClient( + conf.SERVER_URL.get(), request.user.username, request.LANGUAGE_CODE, ssl_cert_ca_verify=conf.SSL_CERT_CA_VERIFY.get() + ) c.delete_job(job) except RestException as e: response.update(handle_rest_exception(e, _('Could not delete job.'))) @@ -182,6 +199,7 @@ def job_delete(request, job): response['errors'] = e.to_dict() return JsonResponse(response) + @never_cache @get_job_or_exception() def job_start(request, job): @@ -195,7 +213,9 @@ def job_start(request, job): } try: - c = client.SqoopClient(conf.SERVER_URL.get(), request.user.username, request.LANGUAGE_CODE, ssl_cert_ca_verify=conf.SSL_CERT_CA_VERIFY.get()) + c = client.SqoopClient( + conf.SERVER_URL.get(), request.user.username, request.LANGUAGE_CODE, ssl_cert_ca_verify=conf.SSL_CERT_CA_VERIFY.get() + ) response['submission'] = c.start_job(job).to_dict() except RestException as e: response.update(handle_rest_exception(e, _('Could not start job.'))) @@ -204,6 +224,7 @@ def job_start(request, job): response['errors'] = [e.to_dict()] return JsonResponse(response) + @never_cache @get_job_or_exception() def job_stop(request, job): @@ -217,7 +238,9 @@ def job_stop(request, job): } try: - c = client.SqoopClient(conf.SERVER_URL.get(), request.user.username, request.LANGUAGE_CODE, ssl_cert_ca_verify=conf.SSL_CERT_CA_VERIFY.get()) + c = client.SqoopClient( + conf.SERVER_URL.get(), request.user.username, request.LANGUAGE_CODE, ssl_cert_ca_verify=conf.SSL_CERT_CA_VERIFY.get() + ) response['submission'] = c.stop_job(job).to_dict() except RestException as e: response.update(handle_rest_exception(e, _('Could not stop job.'))) @@ -226,6 +249,7 @@ def job_stop(request, job): response['errors'] = e.to_dict() return JsonResponse(response) + @never_cache @get_job_or_exception() def job_status(request, job): @@ -239,7 +263,9 @@ def job_status(request, job): } try: - c = client.SqoopClient(conf.SERVER_URL.get(), request.user.username, request.LANGUAGE_CODE, ssl_cert_ca_verify=conf.SSL_CERT_CA_VERIFY.get()) + c = client.SqoopClient( + conf.SERVER_URL.get(), request.user.username, request.LANGUAGE_CODE, ssl_cert_ca_verify=conf.SSL_CERT_CA_VERIFY.get() + ) response['submission'] = c.get_job_status(job).to_dict() except RestException as e: response.update(handle_rest_exception(e, _('Could not get job status.'))) diff --git a/apps/sqoop/src/sqoop/api/link.py b/apps/sqoop/src/sqoop/api/link.py index 2c383cd8d5a..63dd12fa359 100644 --- a/apps/sqoop/src/sqoop/api/link.py +++ b/apps/sqoop/src/sqoop/api/link.py @@ -16,33 +16,31 @@ # limitations under the License. from __future__ import absolute_import + +import sys import json -import logging import socket -import sys +import logging from django.utils.encoding import smart_str +from django.utils.translation import gettext as _ +from django.views.decorators.cache import never_cache -from sqoop import client, conf -from sqoop.client.exception import SqoopException -from sqoop.api.decorators import get_link_or_exception from desktop.lib.django_util import JsonResponse from desktop.lib.exceptions import StructuredException from desktop.lib.rest.http_client import RestException +from sqoop import client, conf +from sqoop.api.decorators import get_link_or_exception from sqoop.api.exception import handle_rest_exception from sqoop.api.utils import list_to_dict -from django.views.decorators.cache import never_cache - -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ +from sqoop.client.exception import SqoopException __all__ = ['get_links', 'create_link', 'update_link', 'link', 'links', 'link_clone', 'link_delete'] LOG = logging.getLogger() + @never_cache def get_links(request): response = { @@ -51,12 +49,15 @@ def get_links(request): 'links': [] } try: - c = client.SqoopClient(conf.SERVER_URL.get(), request.user.username, request.LANGUAGE_CODE, ssl_cert_ca_verify=conf.SSL_CERT_CA_VERIFY.get()) + c = client.SqoopClient( + conf.SERVER_URL.get(), request.user.username, request.LANGUAGE_CODE, ssl_cert_ca_verify=conf.SSL_CERT_CA_VERIFY.get() + ) response['links'] = list_to_dict(c.get_links()) except RestException as e: response.update(handle_rest_exception(e, _('Could not get links.'))) return JsonResponse(response) + @never_cache def create_link(request): response = { @@ -66,13 +67,17 @@ def create_link(request): } if 'link' not in request.POST: - raise StructuredException(code="INVALID_REQUEST_ERROR", message=_('Error saving link'), data={'errors': 'Link is missing.'}, error_code=400) + raise StructuredException( + code="INVALID_REQUEST_ERROR", message=_('Error saving link'), data={'errors': 'Link is missing.'}, error_code=400 + ) d = json.loads(smart_str(request.POST.get('link'))) link = client.Link.from_dict(d) try: - c = client.SqoopClient(conf.SERVER_URL.get(), request.user.username, request.LANGUAGE_CODE, ssl_cert_ca_verify=conf.SSL_CERT_CA_VERIFY.get()) + c = client.SqoopClient( + conf.SERVER_URL.get(), request.user.username, request.LANGUAGE_CODE, ssl_cert_ca_verify=conf.SSL_CERT_CA_VERIFY.get() + ) response['link'] = c.create_link(link).to_dict() except RestException as e: response.update(handle_rest_exception(e, _('Could not create link.'))) @@ -81,6 +86,7 @@ def create_link(request): response['errors'] = e.to_dict() return JsonResponse(response) + @never_cache def update_link(request, link): response = { @@ -90,12 +96,16 @@ def update_link(request, link): } if 'link' not in request.POST: - raise StructuredException(code="INVALID_REQUEST_ERROR", message=_('Error saving link'), data={'errors': 'Link is missing.'}, error_code=400) + raise StructuredException( + code="INVALID_REQUEST_ERROR", message=_('Error saving link'), data={'errors': 'Link is missing.'}, error_code=400 + ) link.update_from_dict(json.loads(smart_str(request.POST.get('link')))) try: - c = client.SqoopClient(conf.SERVER_URL.get(), request.user.username, request.LANGUAGE_CODE, ssl_cert_ca_verify=conf.SSL_CERT_CA_VERIFY.get()) + c = client.SqoopClient( + conf.SERVER_URL.get(), request.user.username, request.LANGUAGE_CODE, ssl_cert_ca_verify=conf.SSL_CERT_CA_VERIFY.get() + ) response['link'] = c.update_link(link).to_dict() except RestException as e: response.update(handle_rest_exception(e, _('Could not update link.'))) @@ -104,6 +114,7 @@ def update_link(request, link): response['errors'] = e.to_dict() return JsonResponse(response) + @never_cache def links(request): if request.method == 'GET': @@ -113,6 +124,7 @@ def links(request): else: raise StructuredException(code="INVALID_METHOD", message=_('GET or POST request required.'), error_code=405) + @never_cache @get_link_or_exception() def link(request, link): @@ -129,6 +141,7 @@ def link(request, link): else: raise StructuredException(code="INVALID_METHOD", message=_('GET or POST request required.'), error_code=405) + @never_cache @get_link_or_exception() def link_clone(request, link): @@ -144,7 +157,9 @@ def link_clone(request, link): link.id = -1 link.name = '%s-copy' % link.name try: - c = client.SqoopClient(conf.SERVER_URL.get(), request.user.username, request.LANGUAGE_CODE, ssl_cert_ca_verify=conf.SSL_CERT_CA_VERIFY.get()) + c = client.SqoopClient( + conf.SERVER_URL.get(), request.user.username, request.LANGUAGE_CODE, ssl_cert_ca_verify=conf.SSL_CERT_CA_VERIFY.get() + ) response['link'] = c.create_link(link).to_dict() except RestException as e: response.update(handle_rest_exception(e, _('Could not clone link.'))) @@ -153,6 +168,7 @@ def link_clone(request, link): response['errors'] = e.to_dict() return JsonResponse(response) + @never_cache @get_link_or_exception() def link_delete(request, link): @@ -165,7 +181,9 @@ def link_delete(request, link): } try: - c = client.SqoopClient(conf.SERVER_URL.get(), request.user.username, request.LANGUAGE_CODE, ssl_cert_ca_verify=conf.SSL_CERT_CA_VERIFY.get()) + c = client.SqoopClient( + conf.SERVER_URL.get(), request.user.username, request.LANGUAGE_CODE, ssl_cert_ca_verify=conf.SSL_CERT_CA_VERIFY.get() + ) c.delete_link(link) except RestException as e: response.update(handle_rest_exception(e, _('Could not delete link.'))) diff --git a/apps/sqoop/src/sqoop/api/submission.py b/apps/sqoop/src/sqoop/api/submission.py index dd735ede6d4..0f59400a772 100644 --- a/apps/sqoop/src/sqoop/api/submission.py +++ b/apps/sqoop/src/sqoop/api/submission.py @@ -16,30 +16,29 @@ # limitations under the License. from __future__ import absolute_import + +import sys import json -import logging import socket -import sys +import logging + +from django.utils.translation import gettext as _ +from django.views.decorators.cache import never_cache -from sqoop import client, conf -from sqoop.api.decorators import get_submission_or_exception from desktop.lib.django_util import JsonResponse from desktop.lib.exceptions import StructuredException from desktop.lib.rest.http_client import RestException +from sqoop import client, conf +from sqoop.api.decorators import get_submission_or_exception from sqoop.api.exception import handle_rest_exception from sqoop.api.utils import list_to_dict -from django.views.decorators.cache import never_cache - -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ __all__ = ['get_submissions', 'submissions'] LOG = logging.getLogger() + @never_cache def get_submissions(request): response = { @@ -49,13 +48,16 @@ def get_submissions(request): } status = request.GET.get('status', 'submissions').split(',') try: - c = client.SqoopClient(conf.SERVER_URL.get(), request.user.username, request.LANGUAGE_CODE, ssl_cert_ca_verify=conf.SSL_CERT_CA_VERIFY.get()) + c = client.SqoopClient( + conf.SERVER_URL.get(), request.user.username, request.LANGUAGE_CODE, ssl_cert_ca_verify=conf.SSL_CERT_CA_VERIFY.get() + ) submissions = c.get_submissions() response['submissions'] = list_to_dict(submissions) except RestException as e: response.update(handle_rest_exception(e, _('Could not get submissions.'))) return JsonResponse(response) + @never_cache def submissions(request): if request.method == 'GET': diff --git a/apps/sqoop/src/sqoop/client/resource.py b/apps/sqoop/src/sqoop/client/resource.py index 5a501835205..b1e78d46874 100644 --- a/apps/sqoop/src/sqoop/client/resource.py +++ b/apps/sqoop/src/sqoop/client/resource.py @@ -14,17 +14,14 @@ # See the License for the specific language governing permissions and # limitations under the License. -import json import sys +import json + +from django.utils.translation import gettext as _ from desktop.lib.python_util import force_dict_to_strings from desktop.lib.rest.resource import Resource -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - class SqoopResource(Resource): """ @@ -73,7 +70,6 @@ def delete(self, relpath=None, params=None, headers=None): """ return self.invoke("DELETE", relpath, params, None, headers) - def post(self, relpath=None, params=None, data=None, headers=None): """ Invoke the POST method on a resource. @@ -86,7 +82,6 @@ def post(self, relpath=None, params=None, data=None, headers=None): """ return self.invoke("POST", relpath, params, data, headers) - def put(self, relpath=None, params=None, data=None, headers=None): """ Invoke the PUT method on a resource. @@ -99,8 +94,7 @@ def put(self, relpath=None, params=None, data=None, headers=None): """ return self.invoke("PUT", relpath, params, data, headers) - def _make_headers(self, contenttype=None): if contenttype: - return { 'Content-Type': contenttype } + return {'Content-Type': contenttype} return None diff --git a/apps/sqoop/src/sqoop/conf.py b/apps/sqoop/src/sqoop/conf.py index 2049b23d1f6..293cb468031 100644 --- a/apps/sqoop/src/sqoop/conf.py +++ b/apps/sqoop/src/sqoop/conf.py @@ -18,17 +18,12 @@ import os import sys +from django.utils.translation import gettext_lazy as _t + from desktop.conf import default_ssl_validate from desktop.lib.conf import Config, coerce_bool - from sqoop.settings import NICE_NAME -if sys.version_info[0] > 2: - from django.utils.translation import gettext_lazy as _t -else: - from django.utils.translation import ugettext_lazy as _t - - SERVER_URL = Config( key="server_url", default='http://localhost:12000/sqoop', @@ -53,10 +48,11 @@ default=False ) + def config_validator(user): res = [] - from hadoop import cluster # Avoid dependencies conflicts + from hadoop import cluster # Avoid dependencies conflicts yarn_cluster = cluster.get_cluster_conf_for_job_submission() if yarn_cluster.SECURITY_ENABLED.get() and not os.path.exists(SQOOP_CONF_DIR.get()): diff --git a/apps/sqoop/src/sqoop/test_client.py b/apps/sqoop/src/sqoop/test_client.py index 596336a9c6a..5ff291dc08c 100644 --- a/apps/sqoop/src/sqoop/test_client.py +++ b/apps/sqoop/src/sqoop/test_client.py @@ -17,17 +17,12 @@ import os import shutil -import sys import tempfile -from sqoop.conf import SQOOP_CONF_DIR from sqoop.client.base import SqoopClient +from sqoop.conf import SQOOP_CONF_DIR from sqoop.sqoop_properties import reset -if sys.version_info[0] > 2: - open_file = open -else: - open_file = file def test_security_plain(): tmpdir = tempfile.mkdtemp() @@ -35,7 +30,7 @@ def test_security_plain(): try: xml = sqoop_properties(authentication='SIMPLE') - with open_file(os.path.join(tmpdir, 'sqoop.properties'), 'w') as f: + with open(os.path.join(tmpdir, 'sqoop.properties'), 'w') as f: f.write(xml) reset() @@ -53,7 +48,7 @@ def test_security_kerberos(): try: xml = sqoop_properties(authentication='KERBEROS') - with open_file(os.path.join(tmpdir, 'sqoop.properties'), 'w') as f: + with open(os.path.join(tmpdir, 'sqoop.properties'), 'w') as f: f.write(xml) reset() diff --git a/apps/sqoop/src/sqoop/urls.py b/apps/sqoop/src/sqoop/urls.py index 7be0703c93e..d16cd6cac88 100644 --- a/apps/sqoop/src/sqoop/urls.py +++ b/apps/sqoop/src/sqoop/urls.py @@ -17,13 +17,9 @@ import sys -from sqoop import views as sqoop_views -from sqoop import api as sqoop_api +from django.urls import re_path -if sys.version_info[0] > 2: - from django.urls import re_path -else: - from django.conf.urls import url as re_path +from sqoop import api as sqoop_api, views as sqoop_views urlpatterns = [ re_path(r'^$', sqoop_views.app, name='index') diff --git a/apps/useradmin/src/useradmin/api.py b/apps/useradmin/src/useradmin/api.py index 77952ebea31..c99083a7e10 100644 --- a/apps/useradmin/src/useradmin/api.py +++ b/apps/useradmin/src/useradmin/api.py @@ -19,10 +19,8 @@ from desktop.decorators import check_superuser_permission from desktop.lib.django_util import JsonResponse -from desktop.lib.i18n import smart_unicode - -from useradmin.models import User, Group - +from desktop.lib.i18n import smart_str +from useradmin.models import Group, User LOG = logging.getLogger() @@ -35,7 +33,7 @@ def decorator(*args, **kwargs): except Exception as e: LOG.exception('Error running %s' % view_fn) response['status'] = -1 - response['message'] = smart_unicode(e) + response['message'] = smart_str(e) return JsonResponse(response) return decorator diff --git a/apps/useradmin/src/useradmin/conf.py b/apps/useradmin/src/useradmin/conf.py index 2b7f48fe44d..788fef23b6b 100644 --- a/apps/useradmin/src/useradmin/conf.py +++ b/apps/useradmin/src/useradmin/conf.py @@ -20,12 +20,9 @@ import sys -from desktop.lib.conf import Config, ConfigSection, coerce_bool +from django.utils.translation import gettext_lazy as _ -if sys.version_info[0] > 2: - from django.utils.translation import gettext_lazy as _ -else: - from django.utils.translation import ugettext_lazy as _ +from desktop.lib.conf import Config, ConfigSection, coerce_bool HOME_DIR_PERMISSIONS = Config( key="home_dir_permissions", @@ -63,14 +60,14 @@ "uppercase and lowercase letters, at least one number, and at least one " "special character."), type=str, - default="^(?=.*?[A-Z])(?=(.*[a-z]){1,})(?=(.*[\d]){1,})(?=(.*[\W_]){1,}).{8,}$"), + default=r"^(?=.*?[A-Z])(?=(.*[a-z]){1,})(?=(.*[\d]){1,})(?=(.*[\W_]){1,}).{8,}$"), PWD_HINT=Config( key="pwd_hint", help=_("Message about the password rule defined in pwd_regex"), type=str, - default="The password must be at least 8 characters long, and must contain both " + \ - "uppercase and lowercase letters, at least one number, and at least " + \ + default="The password must be at least 8 characters long, and must contain both " + + "uppercase and lowercase letters, at least one number, and at least " + "one special character."), PWD_ERROR_MESSAGE=Config( @@ -78,8 +75,8 @@ help=_("The error message displayed if the provided password does not " "meet the enhanced password rule"), type=str, - default="The password must be at least 8 characters long, and must contain both " + \ - "uppercase and lowercase letters, at least one number, and at least " + \ + default="The password must be at least 8 characters long, and must contain both " + + "uppercase and lowercase letters, at least one number, and at least " + "one special character.") ) ) diff --git a/apps/useradmin/src/useradmin/forms.py b/apps/useradmin/src/useradmin/forms.py index 5c4b4163f75..6bcd6d13367 100644 --- a/apps/useradmin/src/useradmin/forms.py +++ b/apps/useradmin/src/useradmin/forms.py @@ -15,31 +15,24 @@ # See the License for the specific language governing permissions and # limitations under the License. -from builtins import object -import logging import re -import sys +import logging +from builtins import object import django.contrib.auth.forms from django import forms from django.forms import ValidationError from django.forms.utils import ErrorList +from django.utils.translation import gettext as _, gettext_lazy as _t from desktop import conf as desktop_conf from desktop.conf import ENABLE_ORGANIZATIONS -from desktop.lib.django_util import get_username_re_rule, get_groupname_re_rule +from desktop.lib.django_util import get_groupname_re_rule, get_username_re_rule from desktop.settings import LANGUAGES - from useradmin.hue_password_policy import hue_get_password_validators -from useradmin.models import GroupPermission, HuePermission, get_default_user_group, User, Group, Organization +from useradmin.models import Group, GroupPermission, HuePermission, Organization, User, get_default_user_group from useradmin.organization import get_user_request_organization -if sys.version_info[0] > 2: - from django.utils.translation import get_language, gettext as _, gettext_lazy as _t -else: - from django.utils.translation import get_language, ugettext as _, ugettext_lazy as _t - - LOG = logging.getLogger() @@ -49,10 +42,12 @@ def get_server_choices(): else: return [] + def validate_dn(dn): if not dn: raise ValidationError(_('Full Distinguished Name required.')) + def validate_username(username_pattern): validator = re.compile(r"^%s$" % get_username_re_rule()) @@ -63,6 +58,7 @@ def validate_username(username_pattern): if not validator.match(username_pattern): raise ValidationError(_("Username must not contain whitespaces and ':'")) + def validate_groupname(groupname_pattern): validator = re.compile(r"^%s$" % get_groupname_re_rule()) @@ -73,10 +69,12 @@ def validate_groupname(groupname_pattern): if not validator.match(groupname_pattern): raise ValidationError(_("Group name can be any character as long as it's 80 characters or fewer.")) + def validate_first_name(first_name): if first_name and len(first_name) > 30: raise ValidationError(_('first_name must be fewer than 30 characters.')) + def validate_last_name(last_name): if last_name and len(last_name) > 30: raise ValidationError(_('last_name must be fewer than 30 characters.')) @@ -92,9 +90,9 @@ class UserChangeForm(django.contrib.auth.forms.UserChangeForm): username = forms.RegexField( label=_t("Username"), max_length=30, - regex='^%s$' % (get_username_re_rule(),), # Could use UnicodeUsernameValidator() - help_text = _t("Required. 30 characters or fewer. No whitespaces or colons."), - error_messages = {'invalid': _t("Whitespaces and ':' not allowed") }) + regex='^%s$' % (get_username_re_rule(),), # Could use UnicodeUsernameValidator() + help_text=_t("Required. 30 characters or fewer. No whitespaces or colons."), + error_messages={'invalid': _t("Whitespaces and ':' not allowed")}) password1 = forms.CharField( label=_t("New Password"), @@ -133,7 +131,7 @@ class UserChangeForm(django.contrib.auth.forms.UserChangeForm): ) class Meta(django.contrib.auth.forms.UserChangeForm.Meta): - model = User + model = User fields = ["username", "first_name", "last_name", "email", "ensure_home_directory"] def __init__(self, *args, **kwargs): @@ -209,6 +207,7 @@ def save(self, commit=True): self.save_m2m() return user + if ENABLE_ORGANIZATIONS.get(): class OrganizationUserChangeForm(UserChangeForm): username = None @@ -218,10 +217,10 @@ class OrganizationUserChangeForm(UserChangeForm): ) class Meta(django.contrib.auth.forms.UserChangeForm.Meta): - model = User + model = User fields = ["first_name", "last_name", "email", "ensure_home_directory"] if ENABLE_ORGANIZATIONS.get(): - fields.append('organization') # Because of import logic + fields.append('organization') # Because of import logic def __init__(self, *args, **kwargs): super(OrganizationUserChangeForm, self).__init__(*args, **kwargs) @@ -236,7 +235,7 @@ def __init__(self, *args, **kwargs): def clean_organization(self): try: return Organization.objects.get(id=int(self.cleaned_data.get('organization'))) - except: + except Exception: LOG.exception('The organization does not exist.') return None @@ -268,10 +267,10 @@ class OrganizationUserChangeForm(UserChangeForm): ) class Meta(django.contrib.auth.forms.UserChangeForm.Meta): - model = User + model = User fields = ["first_name", "last_name", "email", "ensure_home_directory"] if ENABLE_ORGANIZATIONS.get(): - fields.append('organization') # Because of import logic + fields.append('organization') # Because of import logic def __init__(self, *args, **kwargs): super(OrganizationUserChangeForm, self).__init__(*args, **kwargs) @@ -286,7 +285,7 @@ def __init__(self, *args, **kwargs): def clean_organization(self): try: return Organization.objects.get(id=int(self.cleaned_data.get('organization'))) - except: + except Exception: LOG.exception('The organization does not exist.') return None @@ -488,7 +487,7 @@ def clean_name(self): def clean_organization(self): try: return Organization.objects.get(id=int(self.cleaned_data.get('organization'))) - except: + except Exception: LOG.exception('The organization does not exist.') return None diff --git a/apps/useradmin/src/useradmin/hue_password_policy.py b/apps/useradmin/src/useradmin/hue_password_policy.py index 4c4d92925b4..7a310bc2d92 100644 --- a/apps/useradmin/src/useradmin/hue_password_policy.py +++ b/apps/useradmin/src/useradmin/hue_password_policy.py @@ -17,15 +17,12 @@ import re import sys - from builtins import object + from django.core.exceptions import ValidationError -from useradmin.conf import PASSWORD_POLICY +from django.utils.translation import gettext_lazy as _ -if sys.version_info[0] > 2: - from django.utils.translation import gettext_lazy as _ -else: - from django.utils.translation import ugettext_lazy as _ +from useradmin.conf import PASSWORD_POLICY _PASSWORD_POLICY = None @@ -81,4 +78,4 @@ def is_password_policy_enabled(): def get_password_hint(): - return get_password_policy().password_hint + return get_password_policy().password_hint diff --git a/apps/useradmin/src/useradmin/ldap_access.py b/apps/useradmin/src/useradmin/ldap_access.py index 2d01e33b488..33e10205868 100644 --- a/apps/useradmin/src/useradmin/ldap_access.py +++ b/apps/useradmin/src/useradmin/ldap_access.py @@ -18,10 +18,15 @@ This module provides access to LDAP servers, along with some basic functionality required for Hue and User Admin to work seamlessly with LDAP. """ -from builtins import str, object +import re import logging -import sys + +from django.utils.encoding import smart_str + +import desktop.conf +from desktop.lib.python_util import CaseInsensitiveDict +from useradmin.models import User LOG = logging.getLogger() @@ -32,17 +37,6 @@ except ImportError: LOG.warning('ldap module not found') SCOPE_SUBTREE = None -import re - -import desktop.conf -from desktop.lib.python_util import CaseInsensitiveDict - -from useradmin.models import User - -if sys.version_info[0] > 2: - from django.utils.encoding import smart_str -else: - from django.utils.encoding import smart_text as smart_str CACHED_LDAP_CONN = None @@ -66,6 +60,7 @@ def get_connection_from_server(server=None): return get_connection(ldap_config) + def get_connection(ldap_config): global CACHED_LDAP_CONN if CACHED_LDAP_CONN is not None: @@ -85,6 +80,7 @@ def get_connection(ldap_config): else: return LdapConnection(ldap_config, ldap_url, get_ldap_username(username, ldap_config.NT_DOMAIN.get()), password, ldap_cert) + def get_auth(ldap_config): ldap_url = ldap_config.LDAP_URL.get() if ldap_url is None: @@ -103,6 +99,7 @@ def get_auth(ldap_config): return ldap_conf + def get_ldap_username(username, nt_domain): if nt_domain: return '%s@%s' % (username, nt_domain) @@ -273,7 +270,6 @@ def _transform_find_user_results(cls, result_data, user_name_attr): user_info.append(ldap_info) return user_info - def _transform_find_group_results(self, result_data, group_name_attr, group_member_attr): group_info = [] if result_data: diff --git a/apps/useradmin/src/useradmin/management/commands/import_ldap_group.py b/apps/useradmin/src/useradmin/management/commands/import_ldap_group.py index 226caa7de8e..2a8ba648303 100644 --- a/apps/useradmin/src/useradmin/management/commands/import_ldap_group.py +++ b/apps/useradmin/src/useradmin/management/commands/import_ldap_group.py @@ -18,17 +18,12 @@ import sys from django.core.management.base import BaseCommand, CommandError +from django.utils.translation import gettext as _, gettext_lazy as _t from desktop.conf import LDAP - from useradmin import ldap_access from useradmin.views import import_ldap_groups -if sys.version_info[0] > 2: - from django.utils.translation import gettext_lazy as _t, gettext as _ -else: - from django.utils.translation import ugettext_lazy as _t, ugettext as _ - class Command(BaseCommand): """ @@ -41,7 +36,9 @@ class Command(BaseCommand): def add_arguments(self, parser): parser.add_argument('group', type=str) - parser.add_argument("--dn", help=_t("Whether or not the user should be imported by distinguished name."), action="store_true", default=False) + parser.add_argument( + "--dn", help=_t("Whether or not the user should be imported by distinguished name."), action="store_true", default=False + ) parser.add_argument("--import-members", help=_t("Import users from the group."), action="store_true", default=False) parser.add_argument( "--import-members-recursive", @@ -50,7 +47,7 @@ def add_arguments(self, parser): default=False ) parser.add_argument("--sync-users", help=_t("Sync users in the group."), action="store_true", default=False) - parser.add_argument("--server", help=_t("Server to connect to."), action="store", default=None) + parser.add_argument("--server", help=_t("Server to connect to."), action="store", default=None) def handle(self, group=None, **options): if group is None: diff --git a/apps/useradmin/src/useradmin/management/commands/import_ldap_user.py b/apps/useradmin/src/useradmin/management/commands/import_ldap_user.py index 62ae5833aa4..cbec9bbee23 100644 --- a/apps/useradmin/src/useradmin/management/commands/import_ldap_user.py +++ b/apps/useradmin/src/useradmin/management/commands/import_ldap_user.py @@ -18,17 +18,12 @@ import sys from django.core.management.base import BaseCommand, CommandError +from django.utils.translation import gettext as _, gettext_lazy as _t from desktop.conf import LDAP - from useradmin import ldap_access from useradmin.views import import_ldap_users -if sys.version_info[0] > 2: - from django.utils.translation import gettext_lazy as _t, gettext as _ -else: - from django.utils.translation import ugettext_lazy as _t, ugettext as _ - class Command(BaseCommand): """ @@ -39,8 +34,10 @@ class Command(BaseCommand): def add_arguments(self, parser): parser.add_argument('user', type=str) - parser.add_argument("--dn", help=_t("Whether or not the user should be imported by distinguished name."), action="store_true", default=False) - parser.add_argument("--sync-groups", help=_t("Sync groups of the users."), action="store_true", default=False) + parser.add_argument( + "--dn", help=_t("Whether or not the user should be imported by distinguished name."), action="store_true", default=False + ) + parser.add_argument("--sync-groups", help=_t("Sync groups of the users."), action="store_true", default=False) parser.add_argument("--server", help=_t("Server to connect to."), action="store", default=None) def handle(self, user=None, **options): diff --git a/apps/useradmin/src/useradmin/management/commands/sync_ldap_users_and_groups.py b/apps/useradmin/src/useradmin/management/commands/sync_ldap_users_and_groups.py index fc193818ff9..c73992fc39b 100644 --- a/apps/useradmin/src/useradmin/management/commands/sync_ldap_users_and_groups.py +++ b/apps/useradmin/src/useradmin/management/commands/sync_ldap_users_and_groups.py @@ -18,16 +18,12 @@ import sys from django.core.management.base import BaseCommand +from django.utils.translation import gettext_lazy as _t from desktop.conf import LDAP - from useradmin import ldap_access from useradmin.views import sync_ldap_users_and_groups -if sys.version_info[0] > 2: - from django.utils.translation import gettext_lazy as _t -else: - from django.utils.translation import ugettext_lazy as _t class Command(BaseCommand): """ diff --git a/apps/useradmin/src/useradmin/management/commands/useradmin_sync_with_unix.py b/apps/useradmin/src/useradmin/management/commands/useradmin_sync_with_unix.py index 39c7850a61c..bea67620b58 100644 --- a/apps/useradmin/src/useradmin/management/commands/useradmin_sync_with_unix.py +++ b/apps/useradmin/src/useradmin/management/commands/useradmin_sync_with_unix.py @@ -18,14 +18,10 @@ import sys from django.core.management.base import BaseCommand +from django.utils.translation import gettext_lazy as _ from useradmin.views import sync_unix_users_and_groups -if sys.version_info[0] > 2: - from django.utils.translation import gettext_lazy as _ -else: - from django.utils.translation import ugettext_lazy as _ - class Command(BaseCommand): """ diff --git a/apps/useradmin/src/useradmin/middleware.py b/apps/useradmin/src/useradmin/middleware.py index e5a9e8b3116..9926bad4f44 100644 --- a/apps/useradmin/src/useradmin/middleware.py +++ b/apps/useradmin/src/useradmin/middleware.py @@ -15,35 +15,28 @@ # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import division -from __future__ import absolute_import -from builtins import next -from builtins import object -import logging +from __future__ import absolute_import, division + +import sys import math +import logging +from builtins import next, object from datetime import datetime -import sys from django.contrib import messages from django.contrib.sessions.models import Session from django.db import DatabaseError from django.db.models import Q from django.utils.deprecation import MiddlewareMixin +from django.utils.translation import gettext as _ from desktop.auth.views import dt_logout from desktop.conf import AUTH, LDAP, SESSION from desktop.lib.security_util import get_localhost_name - from useradmin import ldap_access -from useradmin.models import UserProfile, get_profile, User +from useradmin.models import User, UserProfile, get_profile from useradmin.views import import_ldap_users -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - - LOG = logging.getLogger() @@ -116,7 +109,6 @@ def process_request(self, request): if logout: dt_logout(request, next_page='/') - def _total_seconds(self, dt): # Keep backward compatibility with Python 2.6 which doesn't support total_seconds() if hasattr(dt, 'total_seconds'): @@ -124,6 +116,7 @@ def _total_seconds(self, dt): else: return math.floor((dt.microseconds + (dt.seconds + dt.days * 24 * 3600) * 10**6) / 10**6) + class ConcurrentUserSessionMiddleware(MiddlewareMixin): """ Middleware that remove concurrent user sessions when configured @@ -131,7 +124,7 @@ class ConcurrentUserSessionMiddleware(MiddlewareMixin): def process_response(self, request, response): try: user = request.user - except AttributeError: # When the request does not store user. We care only about the login request which does store the user. + except AttributeError: # When the request does not store user. We care only about the login request which does store the user. return response # request.session.modified checks if a user just logged in diff --git a/apps/useradmin/src/useradmin/models.py b/apps/useradmin/src/useradmin/models.py index 71c1e6c275c..37302e91162 100644 --- a/apps/useradmin/src/useradmin/models.py +++ b/apps/useradmin/src/useradmin/models.py @@ -37,41 +37,36 @@ notion of permissions. The permissions notion in Django is strongly tied to what models you may or may not edit, and there are elaborations to manipulate this row by row. This does not map nicely onto actions which may not relate to database models. """ -import collections +import sys import json import logging -import sys - +import collections from datetime import datetime from enum import Enum -from django.db import connection, models, transaction from django.contrib.auth import models as auth_models from django.contrib.auth.models import AbstractUser, BaseUserManager from django.core.cache import cache +from django.db import connection, models, transaction from django.utils import timezone as dtz +from django.utils.translation import gettext_lazy as _t from desktop import appmanager -from desktop.conf import ENABLE_ORGANIZATIONS, ENABLE_CONNECTORS -from desktop.lib.connectors.models import _get_installed_connectors, Connector +from desktop.conf import ENABLE_CONNECTORS, ENABLE_ORGANIZATIONS +from desktop.lib.connectors.models import Connector, _get_installed_connectors from desktop.lib.exceptions_renderable import PopupException from desktop.lib.idbroker.conf import is_idbroker_enabled from desktop.monkey_patches import monkey_patch_username_validator - from useradmin.conf import DEFAULT_USER_GROUP -from useradmin.permissions import HuePermission, GroupPermission, LdapGroup - -if sys.version_info[0] > 2: - from django.utils.translation import gettext_lazy as _t -else: - from django.utils.translation import ugettext_lazy as _t +from useradmin.permissions import GroupPermission, HuePermission, LdapGroup if ENABLE_ORGANIZATIONS.get(): - from useradmin.organization import OrganizationUser as User, OrganizationGroup as Group, get_organization, Organization + from useradmin.organization import Organization, OrganizationGroup as Group, OrganizationUser as User, get_organization else: - from django.contrib.auth.models import User, Group + from django.contrib.auth.models import Group, User def get_organization(): pass - class Organization(): pass + class Organization(): + pass monkey_patch_username_validator() @@ -167,9 +162,11 @@ def get_profile(user): user._cached_userman_profile = profile return profile + def group_has_permission(group, perm): return GroupPermission.objects.filter(group=group, hue_permission=perm).exists() + def group_permissions(group): return HuePermission.objects.filter(grouppermission__group=group).all() @@ -182,7 +179,7 @@ def create_profile_for_user(user): try: p.save() return p - except: + except Exception: LOG.exception("Failed to automatically create user profile.") return None @@ -235,7 +232,7 @@ def update_app_permissions(**kwargs): try: for dp in HuePermission.objects.all(): current.setdefault(dp.app, {})[dp.action] = dp - except: + except Exception: LOG.exception('failed to get permissions') return @@ -363,7 +360,7 @@ def install_sample_user(django_user=None): user = User.objects.get(id=SAMPLE_USER_ID) user.username = django_username user.save() - except: + except Exception: LOG.exception('Failed to get or create sample user') # If sample user doesn't belong to default group, add to default group diff --git a/apps/useradmin/src/useradmin/organization.py b/apps/useradmin/src/useradmin/organization.py index b099f363ee9..3d3c2f7ec2c 100644 --- a/apps/useradmin/src/useradmin/organization.py +++ b/apps/useradmin/src/useradmin/organization.py @@ -15,23 +15,17 @@ # See the License for the specific language governing permissions and # limitations under the License. -import logging import sys import uuid +import logging from crequest.middleware import CrequestMiddleware - -from django.contrib.auth.models import models, AbstractUser, BaseUserManager +from django.contrib.auth.models import AbstractUser, BaseUserManager, models from django.utils.functional import SimpleLazyObject +from django.utils.translation import gettext_lazy as _t from desktop.conf import ENABLE_ORGANIZATIONS -if sys.version_info[0] > 2: - from django.utils.translation import gettext_lazy as _t -else: - from django.utils.translation import ugettext_lazy as _t - - LOG = logging.getLogger() @@ -68,7 +62,8 @@ def get_organization(email, is_multi_user=False): organization, created = Organization.objects.get_or_create(name=domain, domain=domain, is_multi_user=is_multi_user) LOG.info("Materializing organization %s in the database, is_multi_user=%s" % (domain, is_multi_user)) else: - LOG.warning('No organization domain found for email %s' % email) # For Backends without emails or when organization enabled by default + # For Backends without emails or when organization enabled by default + LOG.warning('No organization domain found for email %s' % email) organization = None return organization @@ -78,7 +73,6 @@ def uuid_default(): return str(uuid.uuid4()) - class OrganizationManager(models.Manager): use_in_migrations = True diff --git a/apps/useradmin/src/useradmin/organization_tests.py b/apps/useradmin/src/useradmin/organization_tests.py index 8ec62426dfd..ca30de5ea6b 100644 --- a/apps/useradmin/src/useradmin/organization_tests.py +++ b/apps/useradmin/src/useradmin/organization_tests.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -## -*- coding: utf-8 -*- +# -*- coding: utf-8 -*- # Licensed to Cloudera, Inc. under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information @@ -16,26 +16,20 @@ # See the License for the specific language governing permissions and # limitations under the License. +import sys import json import logging -import pytest -import sys -from django.test import TestCase +from unittest.mock import Mock, patch +import pytest from django.core.exceptions import FieldError +from django.test import TestCase -from desktop.auth.backend import rewrite_user, create_user +from desktop.auth.backend import create_user, rewrite_user from desktop.conf import ENABLE_ORGANIZATIONS from desktop.lib.django_test_util import make_logged_in_client from desktop.models import Document2 - -from useradmin.models import User, Group, Organization, HuePermission - -if sys.version_info[0] > 2: - from unittest.mock import patch, Mock -else: - from mock import patch, Mock - +from useradmin.models import Group, HuePermission, Organization, User LOG = logging.getLogger() diff --git a/apps/useradmin/src/useradmin/permissions.py b/apps/useradmin/src/useradmin/permissions.py index 0084822a888..6c9ecbc254b 100644 --- a/apps/useradmin/src/useradmin/permissions.py +++ b/apps/useradmin/src/useradmin/permissions.py @@ -18,21 +18,15 @@ import sys from crequest.middleware import CrequestMiddleware - from django.db import connection, models, transaction +from django.utils.translation import gettext_lazy as _t -from desktop.conf import ENABLE_ORGANIZATIONS, ENABLE_CONNECTORS +from desktop.conf import ENABLE_CONNECTORS, ENABLE_ORGANIZATIONS from desktop.lib.connectors.models import Connector - from useradmin.organization import _fitered_queryset, get_user_request_organization -if sys.version_info[0] > 2: - from django.utils.translation import gettext_lazy as _t -else: - from django.utils.translation import ugettext_lazy as _t - if ENABLE_ORGANIZATIONS.get(): - from useradmin.organization import OrganizationGroup as Group, Organization + from useradmin.organization import Organization, OrganizationGroup as Group else: from django.contrib.auth.models import Group @@ -88,7 +82,6 @@ class Meta(object): unique_together = ('connector', 'action',) - if ENABLE_ORGANIZATIONS.get(): class OrganizationConnectorPermissionManager(models.Manager): @@ -115,8 +108,11 @@ class Meta(ConnectorPermission.Meta): if ENABLE_CONNECTORS.get(): if ENABLE_ORGANIZATIONS.get(): - class HuePermission(OrganizationConnectorPermission): pass + class HuePermission(OrganizationConnectorPermission): + pass else: - class HuePermission(ConnectorPermission): pass + class HuePermission(ConnectorPermission): + pass else: - class HuePermission(BasePermission): pass + class HuePermission(BasePermission): + pass diff --git a/apps/useradmin/src/useradmin/test_ldap.py b/apps/useradmin/src/useradmin/test_ldap.py index 454bfa3ed91..f5fa76d481b 100644 --- a/apps/useradmin/src/useradmin/test_ldap.py +++ b/apps/useradmin/src/useradmin/test_ldap.py @@ -17,31 +17,33 @@ # limitations under the License. from __future__ import absolute_import -import ldap -import pytest + import sys +from unittest.mock import MagicMock, Mock, patch +import ldap +import pytest from django.conf import settings from django.db.utils import DatabaseError from django.urls import reverse import desktop.conf -from desktop.lib.test_utils import grant_access from desktop.lib.django_test_util import make_logged_in_client +from desktop.lib.test_utils import grant_access from hadoop import pseudo_hdfs4 from hadoop.pseudo_hdfs4 import is_live_cluster - from useradmin import ldap_access -from useradmin.models import LdapGroup, UserProfile -from useradmin.models import get_profile, User, Group -from useradmin.views import sync_ldap_users, sync_ldap_groups, import_ldap_users, import_ldap_groups, \ - add_ldap_users, add_ldap_groups, sync_ldap_users_groups -from useradmin.tests import BaseUserAdminTests, LdapTestConnection, reset_all_groups, reset_all_users, create_long_username - -if sys.version_info[0] > 2: - from unittest.mock import patch, Mock, MagicMock -else: - from mock import patch, Mock, MagicMock +from useradmin.models import Group, LdapGroup, User, UserProfile, get_profile +from useradmin.tests import BaseUserAdminTests, LdapTestConnection, create_long_username, reset_all_groups, reset_all_users +from useradmin.views import ( + add_ldap_groups, + add_ldap_users, + import_ldap_groups, + import_ldap_users, + sync_ldap_groups, + sync_ldap_users, + sync_ldap_users_groups, +) def get_multi_ldap_config(): @@ -110,7 +112,6 @@ def test_useradmin_ldap_user_group_membership_sync(self): for finish in reset: finish() - def test_useradmin_ldap_suboordinate_group_integration(self): reset = [] @@ -183,7 +184,6 @@ def test_useradmin_ldap_suboordinate_group_integration(self): for finish in reset: finish() - def test_useradmin_ldap_nested_group_integration(self): reset = [] @@ -277,7 +277,6 @@ def test_useradmin_ldap_nested_group_integration(self): for finish in reset: finish() - def test_useradmin_ldap_suboordinate_posix_group_integration(self): reset = [] @@ -350,7 +349,6 @@ def test_useradmin_ldap_suboordinate_posix_group_integration(self): for finish in reset: finish() - def test_useradmin_ldap_nested_posix_group_integration(self): reset = [] @@ -436,8 +434,6 @@ def test_useradmin_ldap_nested_posix_group_integration(self): for finish in reset: finish() - - def test_useradmin_ldap_user_integration(self): if is_live_cluster(): raise SkipTest('HUE-2897: Skipping because the DB may not be case sensitive') @@ -518,7 +514,6 @@ def test_useradmin_ldap_user_integration(self): for finish in done: finish() - def test_useradmin_ldap_force_uppercase(self): if is_live_cluster(): raise SkipTest('HUE-2897: Skipping because the DB may not be case sensitive') @@ -545,8 +540,6 @@ def test_useradmin_ldap_force_uppercase(self): for finish in done: finish() - - def test_add_ldap_users(self): if is_live_cluster(): raise SkipTest('HUE-2897: Skipping because the DB may not be case sensitive') @@ -612,11 +605,11 @@ def test_add_ldap_users(self): ) assert b"Could not get LDAP details for users in pattern" in response.content, response.content # Removing this test because we are not running log listener - #response = c.get(reverse(desktop.views.log_view)) - #whitespaces_message = "{username}: Username must not contain whitespaces".format(username='user with space') - #if not isinstance(whitespaces_message, bytes): + # response = c.get(reverse(desktop.views.log_view)) + # whitespaces_message = "{username}: Username must not contain whitespaces".format(username='user with space') + # if not isinstance(whitespaces_message, bytes): # whitespaces_message = whitespaces_message.encode('utf-8') - #assert_true(whitespaces_message in response.content, response.content) + # assert_true(whitespaces_message in response.content, response.content) # Test dn with spaces in dn, but not username (should succeed) response = c.post( @@ -630,7 +623,6 @@ def test_add_ldap_users(self): for finish in done: finish() - def test_add_ldap_users_force_uppercase(self): if is_live_cluster(): raise SkipTest('HUE-2897: Skipping because the DB may not be case sensitive') @@ -710,9 +702,6 @@ def test_ldap_import_truncate_first_last_name(self): user, created = ldap_access.get_or_create_ldap_user(username=user_info[0]['username']) user.first_name = user_info[0]['first'] user.last_name = 'ชมหรือด่า อย่าไปรับ ให้กลับคืนไป'[:30] - if sys.version_info[0] == 2: - with pytest.raises(DatabaseError): - user.save() # 'Incorrect string value: '\\xE0\\xB8\\' for column 'last_name' at row 1' user.last_name = user_info[0]['last'] user.save() @@ -745,14 +734,11 @@ def test_add_ldap_groups(self): # Import test_longfirstname user ldap_access.CACHED_LDAP_CONN.add_user_group_for_test('uid=test_longfirstname,ou=People,dc=example,dc=com', 'TestUsers') response = c.post(URL, dict(server='multi_ldap_conf', groupname_pattern='TestUsers', import_members=True), follow=True) - if sys.version_info[0] > 2: - user_list_a = create_long_username().encode('utf-8') + b", test_longfirstname" - user_list_b = b"test_longfirstname, " + create_long_username().encode('utf-8') - else: - user_list_a = create_long_username() + b", test_longfirstname" - user_list_b = b"test_longfirstname, " + create_long_username() - - assert (b'Failed to import following users: %s' % user_list_a in response.content \ + + user_list_a = create_long_username().encode('utf-8') + b", test_longfirstname" + user_list_b = b"test_longfirstname, " + create_long_username().encode('utf-8') + + assert (b'Failed to import following users: %s' % user_list_a in response.content or b'Failed to import following users: %s' % user_list_b in response.content), response.content # Test with space diff --git a/apps/useradmin/src/useradmin/test_ldap_deprecated.py b/apps/useradmin/src/useradmin/test_ldap_deprecated.py index 8c3d253dec4..5b7e6e18244 100644 --- a/apps/useradmin/src/useradmin/test_ldap_deprecated.py +++ b/apps/useradmin/src/useradmin/test_ldap_deprecated.py @@ -17,29 +17,32 @@ # limitations under the License. from __future__ import absolute_import -import ldap -import pytest + import sys +from unittest.mock import MagicMock, Mock, patch +import ldap +import pytest from django.conf import settings from django.urls import reverse import desktop.conf -from desktop.lib.test_utils import grant_access from desktop.lib.django_test_util import make_logged_in_client +from desktop.lib.test_utils import grant_access from hadoop import pseudo_hdfs4 from hadoop.pseudo_hdfs4 import is_live_cluster -from useradmin.models import LdapGroup, UserProfile, get_profile, User, Group -from useradmin.views import sync_ldap_users, sync_ldap_groups, import_ldap_users, import_ldap_groups, \ - add_ldap_users, add_ldap_groups, sync_ldap_users_groups - from useradmin import ldap_access +from useradmin.models import Group, LdapGroup, User, UserProfile, get_profile from useradmin.tests import BaseUserAdminTests, LdapTestConnection, reset_all_groups, reset_all_users - -if sys.version_info[0] > 2: - from unittest.mock import patch, Mock, MagicMock -else: - from mock import patch, Mock, MagicMock +from useradmin.views import ( + add_ldap_groups, + add_ldap_users, + import_ldap_groups, + import_ldap_users, + sync_ldap_groups, + sync_ldap_users, + sync_ldap_users_groups, +) @pytest.mark.django_db @@ -92,7 +95,6 @@ def test_useradmin_ldap_user_group_membership_sync(self): finally: settings.MIDDLEWARE.remove('useradmin.middleware.LdapSynchronizationMiddleware') - def test_useradmin_ldap_suboordinate_group_integration(self): reset = [] @@ -161,7 +163,6 @@ def test_useradmin_ldap_suboordinate_group_integration(self): for finish in reset: finish() - def test_useradmin_ldap_nested_group_integration(self): reset = [] @@ -251,7 +252,6 @@ def test_useradmin_ldap_nested_group_integration(self): for finish in reset: finish() - def test_useradmin_ldap_suboordinate_posix_group_integration(self): reset = [] @@ -320,7 +320,6 @@ def test_useradmin_ldap_suboordinate_posix_group_integration(self): for finish in reset: finish() - def test_useradmin_ldap_nested_posix_group_integration(self): reset = [] @@ -401,7 +400,6 @@ def test_useradmin_ldap_nested_posix_group_integration(self): for finish in reset: finish() - def test_useradmin_ldap_user_integration(self): done = [] try: @@ -450,7 +448,6 @@ def test_useradmin_ldap_user_integration(self): for finish in done: finish() - @pytest.mark.integration def test_useradmin_ldap_case_sensitivity(self): if is_live_cluster(): @@ -487,7 +484,6 @@ def test_useradmin_ldap_case_sensitivity(self): for finish in done: finish() - def test_add_ldap_users(self): done = [] try: @@ -522,11 +518,11 @@ def test_add_ldap_users(self): assert b"Could not get LDAP details for users in pattern" in response.content, response # Removing this test because we are not running log listener - #response = c.get(reverse(desktop.views.log_view)) - #whitespaces_message = "{username}: Username must not contain whitespaces".format(username='user with space') - #if not isinstance(whitespaces_message, bytes): + # response = c.get(reverse(desktop.views.log_view)) + # whitespaces_message = "{username}: Username must not contain whitespaces".format(username='user with space') + # if not isinstance(whitespaces_message, bytes): # whitespaces_message = whitespaces_message.encode('utf-8') - #assert_true(whitespaces_message in response.content, response.content) + # assert_true(whitespaces_message in response.content, response.content) # Test dn with spaces in dn, but not username (should succeed) response = c.post( @@ -538,7 +534,6 @@ def test_add_ldap_users(self): for finish in done: finish() - @pytest.mark.integration def test_add_ldap_users_case_sensitivity(self): if is_live_cluster(): @@ -581,14 +576,12 @@ def test_add_ldap_users_case_sensitivity(self): for finish in done: finish() - def test_add_ldap_groups(self): URL = reverse('useradmin:useradmin.views.add_ldap_groups') # Set up LDAP tests to use a LdapTestConnection instead of an actual LDAP connection ldap_access.CACHED_LDAP_CONN = LdapTestConnection() - c = make_logged_in_client(username='test', is_superuser=True) assert c.get(URL) @@ -636,6 +629,7 @@ def test_ldap_exception_handling(self): ) assert b'There was an error when communicating with LDAP' in response.content, response + @pytest.mark.django_db @pytest.mark.requires_hadoop @pytest.mark.integration diff --git a/apps/useradmin/src/useradmin/tests.py b/apps/useradmin/src/useradmin/tests.py index 07b41387007..eaa5f1fe806 100644 --- a/apps/useradmin/src/useradmin/tests.py +++ b/apps/useradmin/src/useradmin/tests.py @@ -16,61 +16,59 @@ # See the License for the specific language governing permissions and # limitations under the License. -from future import standard_library -standard_library.install_aliases() -from builtins import object -import json -import ldap import re -import pytest import sys +import json import time -import urllib.request, urllib.parse, urllib.error - +import urllib.error +import urllib.parse +import urllib.request +from builtins import object from datetime import datetime +from unittest.mock import Mock, patch + +import ldap +import pytest from django.conf import settings from django.contrib.sessions.models import Session from django.db.models import Q -from django.urls import reverse from django.test import override_settings from django.test.client import Client +from django.urls import reverse import desktop.conf - +import libsaml.conf +import useradmin.conf +import useradmin.ldap_access from desktop import appmanager -from desktop.auth.backend import is_admin, create_user +from desktop.auth.backend import create_user, is_admin from desktop.conf import APP_BLACKLIST, ENABLE_ORGANIZATIONS, ENABLE_PROMETHEUS from desktop.lib.django_test_util import make_logged_in_client -from desktop.lib.i18n import smart_unicode +from desktop.lib.i18n import smart_str from desktop.lib.test_utils import grant_access from desktop.views import home, samlgroup_check from hadoop import pseudo_hdfs4 from hadoop.pseudo_hdfs4 import is_live_cluster - -import libsaml.conf -import useradmin.conf -import useradmin.ldap_access from useradmin.forms import UserChangeForm +from useradmin.hue_password_policy import reset_password_policy from useradmin.metrics import active_users, active_users_per_instance from useradmin.middleware import ConcurrentUserSessionMiddleware -from useradmin.models import HuePermission, GroupPermission, UserProfile, get_profile, get_default_user_group, User, Group -from useradmin.hue_password_policy import reset_password_policy +from useradmin.models import Group, GroupPermission, HuePermission, User, UserProfile, get_default_user_group, get_profile -if sys.version_info[0] > 2: - from unittest.mock import patch, Mock -else: - from mock import patch, Mock class MockRequest(dict): pass + class MockUser(dict): def is_authenticated(self): return True + class MockSession(dict): pass + def reset_all_users(): """Reset to a clean state by deleting all users""" for user in User.objects.all(): @@ -293,9 +291,11 @@ def __init__(self): 'posix_members': ['posix_person2']}, } + def create_long_username(): return "A" * 151 + @pytest.mark.django_db def test_invalid_username(): BAD_NAMES = ('-foo', 'foo:o', 'foo o', ' foo') @@ -343,7 +343,6 @@ def test_get_profile(self): assert 1 == UserProfile.objects.filter(user=user).count() - @override_settings(AUTHENTICATION_BACKENDS=['desktop.auth.backend.AllowFirstUserDjangoBackend']) def test_get_and_update_profile(self): c = make_logged_in_client(username='test', password='test', is_superuser=False, recreate=True) @@ -387,15 +386,15 @@ def test_saml_group_conditions_check(self): assert not desktop.views.samlgroup_check(request) # mock saml response - userprofile.update_data({"saml_attributes":{"first_name":["test2"], - "last_name":["test2"], - "email":["test2@test.com"], - "groups":["aaa","bbb","ccc"]}}) + userprofile.update_data({"saml_attributes": {"first_name": ["test2"], + "last_name": ["test2"], + "email": ["test2@test.com"], + "groups": ["aaa", "bbb", "ccc"]}}) userprofile.save() # valid one or more valid required groups reset.append(libsaml.conf.REQUIRED_GROUPS_ATTRIBUTE.set_for_testing("groups")) - reset.append(libsaml.conf.REQUIRED_GROUPS.set_for_testing(["aaa","ddd"])) + reset.append(libsaml.conf.REQUIRED_GROUPS.set_for_testing(["aaa", "ddd"])) assert desktop.views.samlgroup_check(request) # invalid required group @@ -453,12 +452,10 @@ def setup_method(self): userprofile3.hostname = 'host2' userprofile3.save() - def teardown_method(self): reset_all_user_profile() super(TestUserAdminMetrics, self).teardown_method() - @override_settings(AUTHENTICATION_BACKENDS=['desktop.auth.backend.AllowFirstUserDjangoBackend']) def test_active_users(self): with patch('useradmin.metrics.get_localhost_name') as get_hostname: @@ -473,7 +470,6 @@ def test_active_users(self): assert 3 == metric['users.active.total']['value'] assert 2 == metric['users.active']['value'] - @override_settings(AUTHENTICATION_BACKENDS=['desktop.auth.backend.AllowFirstUserDjangoBackend']) def test_active_users_prometheus(self): if not ENABLE_PROMETHEUS.get(): @@ -599,7 +595,6 @@ def test_group_permissions(self): response = c1.get('/useradmin/users') assert b'You do not have permission to access the Useradmin application.' in response.content - def test_list_permissions(self): c1 = make_logged_in_client(username="nonadmin", is_superuser=False) grant_access('nonadmin', 'nonadmin', 'useradmin') @@ -609,7 +604,7 @@ def test_list_permissions(self): assert 200 == response.status_code perms = response.context[0]['permissions'] - assert perms.filter(app='beeswax').exists(), perms # Assumes beeswax is there + assert perms.filter(app='beeswax').exists(), perms # Assumes beeswax is there reset = APP_BLACKLIST.set_for_testing('beeswax') appmanager.DESKTOP_MODULES = [] @@ -618,14 +613,13 @@ def test_list_permissions(self): try: response = c1.get('/useradmin/permissions/') perms = response.context[0]['permissions'] - assert not perms.filter(app='beeswax').exists(), perms # beeswax is not there now + assert not perms.filter(app='beeswax').exists(), perms # beeswax is not there now finally: reset() appmanager.DESKTOP_MODULES = [] appmanager.DESKTOP_APPS = None appmanager.load_apps(APP_BLACKLIST.get()) - def test_list_users(self): c = make_logged_in_client(username="test", is_superuser=True) @@ -636,7 +630,6 @@ def test_list_users(self): assert b'Is active' in response.content - def test_default_group(self): resets = [ useradmin.conf.DEFAULT_USER_GROUP.set_for_testing('test_default') @@ -666,7 +659,6 @@ def test_default_group(self): for reset in resets: reset() - def test_group_admin(self): c = make_logged_in_client(username="test", is_superuser=True) response = c.get('/useradmin/groups') @@ -725,13 +717,12 @@ def test_group_admin(self): response = c.post('/useradmin/groups/new', dict(name="with space")) assert len(Group.objects.all()) == group_count + 1 - def test_user_admin_password_policy(self): # Set up password policy password_hint = password_error_msg = ("The password must be at least 8 characters long, " "and must contain both uppercase and lowercase letters, " "at least one number, and at least one special character.") - password_rule = "^(?=.*?[A-Z])(?=(.*[a-z]){1,})(?=(.*[\d]){1,})(?=(.*[\W_]){1,}).{8,}$" + password_rule = r"^(?=.*?[A-Z])(?=(.*[a-z]){1,})(?=(.*[\d]){1,})(?=(.*[\W_]){1,}).{8,}$" resets = [ useradmin.conf.PASSWORD_POLICY.IS_ENABLED.set_for_testing(True), @@ -848,7 +839,6 @@ def test_user_admin_password_policy(self): for reset in resets: reset() - def test_user_admin(self): FUNNY_NAME = 'أحمد@cloudera.com' FUNNY_NAME_QUOTED = urllib.parse.quote(FUNNY_NAME) @@ -896,7 +886,7 @@ def test_user_admin(self): assert b"You cannot change a username" in response.content # Now make sure that those were materialized response = c.get('/useradmin/users/edit/test') - assert smart_unicode("Inglés") == response.context[0]["form"].instance.first_name + assert smart_str("Inglés") == response.context[0]["form"].instance.first_name assert ("Español" if isinstance(response.content, str) else "Español".encode('utf-8')) in response.content # Shouldn't be able to demote to non-superuser response = c.post('/useradmin/users/edit/test', dict( @@ -971,7 +961,7 @@ def test_user_admin(self): group = get_default_user_group() response = c.get('/useradmin/users/new') assert response - assert (('' % (group.id, group.name)) in \ + assert (('' % (group.id, group.name)) in (response.content if isinstance(response.content, str) else response.content.decode())) # Create a new regular user (duplicate name) @@ -1132,17 +1122,16 @@ def test_list_for_autocomplete(self): 'user_test_list_for_autocomplete3', is_superuser=False, groupname='group_test_list_for_autocomplete_other_group' ) - # c1 users should list only 'user_test_list_for_autocomplete2' and group should not list 'group_test_list_for_autocomplete_other_group' response = c1.get(reverse('useradmin_views_list_for_autocomplete')) content = json.loads(response.content) - users = [smart_unicode(user['username']) for user in content['users']] - groups = [smart_unicode(user['name']) for user in content['groups']] + users = [smart_str(user['username']) for user in content['users']] + groups = [smart_str(user['name']) for user in content['groups']] assert [u'user_test_list_for_autocomplete2'] == users assert u'group_test_list_for_autocomplete' in groups, groups - assert not u'group_test_list_for_autocomplete_other_group' in groups, groups + assert u'group_test_list_for_autocomplete_other_group' not in groups, groups reset = ENABLE_ORGANIZATIONS.set_for_testing(True) try: @@ -1155,19 +1144,19 @@ def test_list_for_autocomplete(self): response = c1.get(reverse('useradmin_views_list_for_autocomplete'), {'include_myself': True}) content = json.loads(response.content) - users = [smart_unicode(user['username']) for user in content['users']] - groups = [smart_unicode(user['name']) for user in content['groups']] + users = [smart_str(user['username']) for user in content['users']] + groups = [smart_str(user['name']) for user in content['groups']] assert [u'user_test_list_for_autocomplete', u'user_test_list_for_autocomplete2'] == users assert u'group_test_list_for_autocomplete' in groups, groups - assert not u'group_test_list_for_autocomplete_other_group' in groups, groups + assert u'group_test_list_for_autocomplete_other_group' not in groups, groups # c3 is alone response = c3_other_group.get(reverse('useradmin_views_list_for_autocomplete'), {'include_myself': True}) content = json.loads(response.content) - users = [smart_unicode(user['username']) for user in content['users']] - groups = [smart_unicode(user['name']) for user in content['groups']] + users = [smart_str(user['username']) for user in content['users']] + groups = [smart_str(user['name']) for user in content['groups']] assert [u'user_test_list_for_autocomplete3'] == users assert u'group_test_list_for_autocomplete_other_group' in groups, groups @@ -1178,7 +1167,7 @@ def test_list_for_autocomplete(self): response = c4_super_user.get('/desktop/api/users/autocomplete', {'include_myself': True, 'only_mygroups': True}) content = json.loads(response.content) - users = [smart_unicode(user['username']) for user in content['users']] + users = [smart_str(user['username']) for user in content['users']] assert ( [u'test', u'user_test_list_for_autocomplete', u'user_test_list_for_autocomplete2', u'user_test_list_for_autocomplete3'] == users) @@ -1190,8 +1179,8 @@ def test_list_for_autocomplete(self): response = c4_super_user.get('/desktop/api/users/autocomplete', {'include_myself': True, 'filter': 'Test_list_for_autocomplete'}) content = json.loads(response.content) - users = [smart_unicode(user['username']) for user in content['users']] - groups = [smart_unicode(user['name']) for user in content['groups']] + users = [smart_str(user['username']) for user in content['users']] + groups = [smart_str(user['name']) for user in content['groups']] assert [u'user_test_list_for_autocomplete', u'user_test_list_for_autocomplete2', u'user_test_list_for_autocomplete3'] == users assert [u'group_test_list_for_autocomplete', u'group_test_list_for_autocomplete_other_group'] == groups @@ -1210,7 +1199,7 @@ def test_language_preference(self): superuser = User.objects.get(username='test_super') response = other_client.get('/useradmin/users/edit/test') - assert not b"Language Preference" in response.content, response.content + assert b"Language Preference" not in response.content, response.content # Changing language preference will change language setting response = client.post('/useradmin/users/edit/test', dict(language='ko')) @@ -1227,12 +1216,10 @@ def test_edit_user_xss(self): language="en-us>" ) ) - if sys.version_info[0] < 3: - assert (b'Select a valid choice. en-us><script>alert('Hacked')</script> '\ - b'is not one of the available choices.' in response.content) - else: - assert (b'Select a valid choice. en-us><script>alert('Hacked')</script> '\ - b'is not one of the available choices.' in response.content) + assert ( + b'Select a valid choice. en-us><script>alert('Hacked')</script> ' + b'is not one of the available choices.' in response.content + ) # Hue 4 Admin response = edit_user.post('/useradmin/users/edit/admin', dict( username="admin", @@ -1254,12 +1241,10 @@ def test_edit_user_xss(self): language="en-us>" ) ) - if sys.version_info[0] < 3: - assert (b'Select a valid choice. en-us><script>alert('Hacked')</script> '\ - b'is not one of the available choices.' in response.content) - else: - assert (b'Select a valid choice. en-us><script>alert('Hacked')</script> '\ - b'is not one of the available choices.' in response.content) + assert ( + b'Select a valid choice. en-us><script>alert('Hacked')</script> ' + b'is not one of the available choices.' in response.content + ) # Hue 4, User with access to useradmin app response = edit_user.post('/useradmin/users/edit/edit_user', dict( username="edit_user", @@ -1270,6 +1255,7 @@ def test_edit_user_xss(self): content = json.loads(response.content) assert 'Select a valid choice. en-us>alert(\'Hacked\') is not one of the available choices.', content['errors'][0]['message'][0] + @pytest.mark.django_db @pytest.mark.requires_hadoop @pytest.mark.integration @@ -1437,7 +1423,6 @@ def test_last_activity(self): profile = UserProfile.objects.get(user__username='test') assert profile.last_activity != 0 - def test_idle_timeout(self): timeout = 5 reset = [ @@ -1512,7 +1497,7 @@ def test_concurrent_session_logout(self): now = datetime.now() # Session 1 is expired assert list(Session.objects.filter(Q(session_key=c.session.session_key)))[0].expire_date <= now - assert 302 == c.get('/editor', follow=False).status_code # Redirect to login page + assert 302 == c.get('/editor', follow=False).status_code # Redirect to login page # Session 2 is still active assert list(Session.objects.filter(Q(session_key=c2.session.session_key)))[0].expire_date > now diff --git a/apps/useradmin/src/useradmin/urls.py b/apps/useradmin/src/useradmin/urls.py index 856b8256e0b..a3de87e7b4c 100644 --- a/apps/useradmin/src/useradmin/urls.py +++ b/apps/useradmin/src/useradmin/urls.py @@ -17,15 +17,10 @@ import sys -from desktop.lib.django_util import get_username_re_rule, get_groupname_re_rule +from django.urls import re_path -from useradmin import views as useradmin_views -from useradmin import api as useradmin_api - -if sys.version_info[0] > 2: - from django.urls import re_path -else: - from django.conf.urls import url as re_path +from desktop.lib.django_util import get_groupname_re_rule, get_username_re_rule +from useradmin import api as useradmin_api, views as useradmin_views username_re = get_username_re_rule() groupname_re = get_groupname_re_rule() diff --git a/apps/useradmin/src/useradmin/views.py b/apps/useradmin/src/useradmin/views.py index ff7ac956b41..fa0ac4529db 100644 --- a/apps/useradmin/src/useradmin/views.py +++ b/apps/useradmin/src/useradmin/views.py @@ -15,58 +15,57 @@ # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import absolute_import -from builtins import map -import pwd import grp +import pwd +import json import logging import subprocess -import sys -import json - -LOG = logging.getLogger() from axes.conf import settings from axes.models import AccessAttempt from axes.utils import reset -try: - import ldap -except ImportError: - LOG.warning('ldap module not found') - -from django.urls import reverse from django.forms import ValidationError from django.forms.utils import ErrorList from django.http import HttpResponse from django.shortcuts import redirect +from django.urls import reverse from django.utils.encoding import smart_str +from django.utils.translation import get_language, gettext as _ import desktop.conf from desktop.auth.backend import is_admin -from desktop.conf import LDAP, ENABLE_ORGANIZATIONS, ENABLE_CONNECTORS, ENABLE_SHARING +from desktop.conf import ENABLE_CONNECTORS, ENABLE_ORGANIZATIONS, ENABLE_SHARING, LDAP from desktop.lib.django_util import JsonResponse, render from desktop.lib.exceptions_renderable import PopupException from desktop.models import _get_apps from desktop.views import antixss, serve_403_error from hadoop.fs.exceptions import WebHdfsException - from useradmin import ldap_access -from useradmin.forms import SyncLdapUsersGroupsForm, AddLdapGroupsForm, AddLdapUsersForm, \ - PermissionsEditForm, GroupEditForm, SuperUserChangeForm, validate_username, validate_first_name, \ - validate_last_name, PasswordChangeForm +from useradmin.forms import ( + AddLdapGroupsForm, + AddLdapUsersForm, + GroupEditForm, + PasswordChangeForm, + PermissionsEditForm, + SuperUserChangeForm, + SyncLdapUsersGroupsForm, + validate_first_name, + validate_last_name, + validate_username, +) from useradmin.ldap_access import LdapBindException, LdapSearchException -from useradmin.models import HuePermission, UserProfile, LdapGroup, get_profile, get_default_user_group, User, Group, Organization +from useradmin.models import Group, HuePermission, LdapGroup, Organization, User, UserProfile, get_default_user_group, get_profile -if sys.version_info[0] > 2: - unicode = str - from django.utils.translation import get_language, gettext as _ -else: - from django.utils.translation import get_language, ugettext as _ +LOG = logging.getLogger() +try: + import ldap +except ImportError: + LOG.warning('ldap module not found') if ENABLE_ORGANIZATIONS.get(): - from useradmin.forms import OrganizationUserChangeForm as UserChangeForm, OrganizationSuperUserChangeForm as SuperUserChangeForm + from useradmin.forms import OrganizationSuperUserChangeForm as SuperUserChangeForm, OrganizationUserChangeForm as UserChangeForm else: - from useradmin.forms import UserChangeForm, SuperUserChangeForm + from useradmin.forms import SuperUserChangeForm, UserChangeForm def is_ldap_setup(): @@ -861,19 +860,20 @@ def ensure_home_directory(fs, user): home_directory = userprofile.home_directory.split('@')[0] if userprofile is not None and userprofile.home_directory: - if not isinstance(home_directory, unicode): + if not isinstance(home_directory, str): home_directory = home_directory.decode("utf-8") fs.do_as_user(username, fs.create_home_dir, home_directory) else: LOG.warning("Not creating home directory of %s as his profile is empty" % user) + def sync_unix_users_and_groups(min_uid, max_uid, min_gid, max_gid, check_shell): """ Syncs the Hue database with the underlying Unix system, by importing users and groups from 'getent passwd' and 'getent groups'. This should also pull in users who are accessible via NSS. """ - hadoop_groups = dict((group.gr_name, group) for group in grp.getgrall() \ + hadoop_groups = dict((group.gr_name, group) for group in grp.getgrall() if (group.gr_gid >= min_gid and group.gr_gid < max_gid) or group.gr_name == 'hadoop') user_groups = dict() @@ -896,7 +896,7 @@ def sync_unix_users_and_groups(min_uid, max_uid, min_gid, max_gid, check_shell): user_groups[member].append(hue_group) # Now let's import the users - hadoop_users = dict((user.pw_name, user) for user in pwd.getpwall() \ + hadoop_users = dict((user.pw_name, user) for user in pwd.getpwall() if (user.pw_uid >= min_uid and user.pw_uid < max_uid) or user.pw_name in grp.getgrnam('hadoop').gr_mem) for username, user in hadoop_users.items(): try: @@ -973,7 +973,6 @@ def _get_find_groups_filter(ldap_info, server=None): sanitized_dn = ldap.filter.escape_filter_chars(ldap_info['dn']).replace(r'\2a', r'*') sanitized_dn = sanitized_dn.replace(r'\5c,', r'\5c\2c') - if (group_member_attr.lower() == 'memberuid'): find_groups_filter = "(&" + group_filter + "(" + group_member_attr + "=" + ldap_info['username'] + "))" elif (group_member_attr.lower() == 'member' or group_member_attr.lower() == 'uniquemember'): @@ -1034,7 +1033,7 @@ def _import_ldap_users_info(connection, user_info, sync_groups=False, import_by_ group_ldap_info = connection.find_groups("*", group_filter=find_groups_filter) for group_info in group_ldap_info: if Group.objects.filter(name=group_info['name']).exists(): - # Add only if user isn't part of group. + # Add only if user isn't part of group. current_ldap_groups.add(Group.objects.get(name=group_info['name'])) if not user.groups.filter(name=group_info['name']).exists(): groups = import_ldap_groups( @@ -1093,7 +1092,7 @@ def _import_ldap_members(connection, group, ldap_info, count=0, max_count=1, fai LOG.warning('Found multiple groups for member %s.' % smart_str(group_info['dn'])) else: for group in groups: - _import_ldap_members(connection, group, group_info, count+1, max_count, failed_users=failed_users) + _import_ldap_members(connection, group, group_info, count + 1, max_count, failed_users=failed_users) for posix_member in posix_members: LOG.debug("Importing posix user %s into group %s" % (smart_str(posix_member), smart_str(group.name))) @@ -1145,7 +1144,7 @@ def _sync_ldap_members(connection, group, ldap_info, count=0, max_count=1, faile try: group = Group.objects.get(name=group_info['name']) - _sync_ldap_members(connection, group, group_info, count+1, max_count, failed_users=failed_users) + _sync_ldap_members(connection, group, group_info, count + 1, max_count, failed_users=failed_users) except Group.DoesNotExist: LOG.warning("Synchronizing group %s failed. Group does not exist." % smart_str(group.name)) diff --git a/apps/zookeeper/src/zookeeper/stats.py b/apps/zookeeper/src/zookeeper/stats.py index 8aef97ff229..e2a70284b33 100644 --- a/apps/zookeeper/src/zookeeper/stats.py +++ b/apps/zookeeper/src/zookeeper/stats.py @@ -15,54 +15,48 @@ # See the License for the specific language governing permissions and # limitations under the License. -from future import standard_library -standard_library.install_aliases() -from builtins import map -from builtins import object -import logging -import socket import re -import sys - -if sys.version_info[0] > 2: - from io import StringIO as string_io -else: - from cStringIO import StringIO as string_io +import socket +import logging +from builtins import map, object +from io import StringIO as string_io LOG = logging.getLogger() + class Session(object): - class BrokenLine(Exception): pass + class BrokenLine(Exception): + pass def __init__(self, session): - m = re.search('/(\d+\.\d+\.\d+\.\d+):(\d+)\[(\d+)\]\((.*)\)', session) + m = re.search(r'/(\d+\.\d+\.\d+\.\d+):(\d+)\[(\d+)\]\((.*)\)', session) if m: - self.host = m.group(1) - self.port = m.group(2) - self.interest_ops = m.group(3) - for d in m.group(4).split(","): - k,v = d.split("=") - self.__dict__[k] = v + self.host = m.group(1) + self.port = m.group(2) + self.interest_ops = m.group(3) + for d in m.group(4).split(","): + k, v = d.split("=") + self.__dict__[k] = v else: - raise Session.BrokenLine() + raise Session.BrokenLine() class ZooKeeperStats(object): def __init__(self, host='localhost', port='2181', timeout=1): - self._address = (host, int(port)) - self._timeout = timeout - self._host = host + self._address = (host, int(port)) + self._timeout = timeout + self._host = host def get_stats(self): - """ Get ZooKeeper server stats as a map """ - data = self._send_cmd('mntr') - if data: - return self._parse(data) - else: - data = self._send_cmd('stat') - return self._parse_stat(data) + """ Get ZooKeeper server stats as a map """ + data = self._send_cmd('mntr') + if data: + return self._parse(data) + else: + data = self._send_cmd('stat') + return self._parse_stat(data) def get_clients(self): """ Get ZooKeeper server clients """ @@ -74,7 +68,7 @@ def get_clients(self): sio = string_io(stat) - #skip two lines + # skip two lines sio.readline() sio.readline() @@ -89,98 +83,99 @@ def get_clients(self): return clients def _create_socket(self): - return socket.socket() + return socket.socket() def _send_cmd(self, cmd): - """ Send a 4letter word command to the server """ - s = self._create_socket() - s.settimeout(self._timeout) - data = "" - try: - s.connect(self._address) - s.send(cmd) - data = s.recv(2048) - s.close() - except Exception as e: - LOG.error('Problem connecting to host %s, exception raised : %s' % (self._host, e)) - return data + """ Send a 4letter word command to the server """ + s = self._create_socket() + s.settimeout(self._timeout) + data = "" + try: + s.connect(self._address) + s.send(cmd) + data = s.recv(2048) + s.close() + except Exception as e: + LOG.error('Problem connecting to host %s, exception raised : %s' % (self._host, e)) + return data def _parse(self, data): - """ Parse the output from the 'mntr' 4letter word command """ - h = string_io(data) + """ Parse the output from the 'mntr' 4letter word command """ + h = string_io(data) - result = {} - for line in h.readlines(): - try: - key, value = self._parse_line(line) - result[key] = value - except ValueError: - pass # ignore broken lines + result = {} + for line in h.readlines(): + try: + key, value = self._parse_line(line) + result[key] = value + except ValueError: + pass # ignore broken lines - return result + return result def _parse_stat(self, data): - """ Parse the output from the 'stat' 4letter word command """ - - result = {} - if not data: - return result - h = string_io(data) - - version = h.readline() - if version: - result['zk_version'] = version[version.index(':')+1:].strip() - - # skip all lines until we find the empty one - while h.readline().strip(): pass - - for line in h.readlines(): - m = re.match('Latency min/avg/max: (\d+)/(\d+)/(\d+)', line) - if m is not None: - result['zk_min_latency'] = int(m.group(1)) - result['zk_avg_latency'] = int(m.group(2)) - result['zk_max_latency'] = int(m.group(3)) - continue - - m = re.match('Received: (\d+)', line) - if m is not None: - result['zk_packets_received'] = int(m.group(1)) - continue - - m = re.match('Sent: (\d+)', line) - if m is not None: - result['zk_packets_sent'] = int(m.group(1)) - continue - - m = re.match('Outstanding: (\d+)', line) - if m is not None: - result['zk_outstanding_requests'] = int(m.group(1)) - continue - - m = re.match('Mode: (.*)', line) - if m is not None: - result['zk_server_state'] = m.group(1) - continue - - m = re.match('Node count: (\d+)', line) - if m is not None: - result['zk_znode_count'] = int(m.group(1)) - continue + """ Parse the output from the 'stat' 4letter word command """ + result = {} + if not data: return result + h = string_io(data) + + version = h.readline() + if version: + result['zk_version'] = version[version.index(':') + 1:].strip() + + # skip all lines until we find the empty one + while h.readline().strip(): + pass + + for line in h.readlines(): + m = re.match(r'Latency min/avg/max: (\d+)/(\d+)/(\d+)', line) + if m is not None: + result['zk_min_latency'] = int(m.group(1)) + result['zk_avg_latency'] = int(m.group(2)) + result['zk_max_latency'] = int(m.group(3)) + continue + + m = re.match(r'Received: (\d+)', line) + if m is not None: + result['zk_packets_received'] = int(m.group(1)) + continue + + m = re.match(r'Sent: (\d+)', line) + if m is not None: + result['zk_packets_sent'] = int(m.group(1)) + continue + + m = re.match(r'Outstanding: (\d+)', line) + if m is not None: + result['zk_outstanding_requests'] = int(m.group(1)) + continue + + m = re.match('Mode: (.*)', line) + if m is not None: + result['zk_server_state'] = m.group(1) + continue + + m = re.match(r'Node count: (\d+)', line) + if m is not None: + result['zk_znode_count'] = int(m.group(1)) + continue + + return result def _parse_line(self, line): - try: - key, value = list(map(str.strip, line.split('\t'))) - except ValueError: - raise ValueError('Found invalid line: %s' % line) + try: + key, value = list(map(str.strip, line.split('\t'))) + except ValueError: + raise ValueError('Found invalid line: %s' % line) - if not key: - raise ValueError('The key is mandatory and should not be empty') + if not key: + raise ValueError('The key is mandatory and should not be empty') - try: - value = int(value) - except (TypeError, ValueError): - pass + try: + value = int(value) + except (TypeError, ValueError): + pass - return key, value + return key, value diff --git a/apps/zookeeper/src/zookeeper/urls.py b/apps/zookeeper/src/zookeeper/urls.py index 015d4c27fc7..516ce7c548f 100644 --- a/apps/zookeeper/src/zookeeper/urls.py +++ b/apps/zookeeper/src/zookeeper/urls.py @@ -17,12 +17,9 @@ import sys -from zookeeper import views as zookeeper_views +from django.urls import re_path -if sys.version_info[0] > 2: - from django.urls import re_path -else: - from django.conf.urls import url as re_path +from zookeeper import views as zookeeper_views urlpatterns = [ re_path(r'^$', zookeeper_views.index, name='index'), diff --git a/apps/zookeeper/src/zookeeper/views.py b/apps/zookeeper/src/zookeeper/views.py index a29fa3427a1..923514cd096 100644 --- a/apps/zookeeper/src/zookeeper/views.py +++ b/apps/zookeeper/src/zookeeper/views.py @@ -16,31 +16,24 @@ # limitations under the License. -from builtins import map +import sys import json import logging -import sys +from builtins import map from django.http import Http404 from django.urls import reverse +from django.utils.translation import gettext as _ +from desktop.auth.backend import is_admin from desktop.lib.django_util import JsonResponse, render from desktop.lib.exceptions_renderable import PopupException - -from zookeeper import settings -from zookeeper import stats +from zookeeper import settings, stats from zookeeper.conf import CLUSTERS from zookeeper.forms import CreateZNodeForm, EditZNodeForm from zookeeper.rest import ZooKeeper from zookeeper.utils import get_cluster_or_404 -from desktop.auth.backend import is_admin - -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - def _get_global_overview(): clusters = CLUSTERS.get() @@ -124,7 +117,17 @@ def tree(request, id, path): znode = zk.get(path) children = sorted(zk.get_children_paths(path)) - return render('tree.mako', request, {'cluster': cluster, 'path': path, 'znode': znode, 'children': children, 'clusters': CLUSTERS.get(),}) + return render( + 'tree.mako', + request, + { + 'cluster': cluster, + 'path': path, + 'znode': znode, + 'children': children, + 'clusters': CLUSTERS.get(), + }, + ) def delete(request, id, path): @@ -140,7 +143,7 @@ def delete(request, id, path): except ZooKeeper.NotFound: pass redir = { - 'redirect': reverse('zookeeper:tree', kwargs={'id':id, 'path': path[:path.rindex('/')] or '/'}) + 'redirect': reverse('zookeeper:tree', kwargs={'id': id, 'path': path[:path.rindex('/')] or '/'}) } return JsonResponse(redir) @@ -158,12 +161,12 @@ def create(request, id, path): full_path = ("%s/%s" % (path, form.cleaned_data['name'])).replace('//', '/') - zk.create(full_path, form.cleaned_data['data'], sequence = form.cleaned_data['sequence']) + zk.create(full_path, form.cleaned_data['data'], sequence=form.cleaned_data['sequence']) return tree(request, id, path) else: form = CreateZNodeForm() - return render('create.mako', request, {'cluster': cluster, 'path': path, 'form': form, 'clusters': CLUSTERS.get(),}) + return render('create.mako', request, {'cluster': cluster, 'path': path, 'form': form, 'clusters': CLUSTERS.get(), }) def edit_as_base64(request, id, path): @@ -182,11 +185,11 @@ def edit_as_base64(request, id, path): return tree(request, id, path) else: - form = EditZNodeForm(dict(\ + form = EditZNodeForm(dict( data=node.get('data64', ''), version=node.get('version', '-1'))) - return render('edit.mako', request, {'cluster': cluster, 'path': path, 'form': form, 'clusters': CLUSTERS.get(),}) + return render('edit.mako', request, {'cluster': cluster, 'path': path, 'form': form, 'clusters': CLUSTERS.get(), }) def edit_as_text(request, id, path): @@ -205,4 +208,4 @@ def edit_as_text(request, id, path): else: form = EditZNodeForm(dict(data=node.get('data64', '').decode('base64').strip(), version=node.get('version', '-1'))) - return render('edit.mako', request, {'cluster': cluster, 'path': path, 'form': form, 'clusters': CLUSTERS.get(),}) + return render('edit.mako', request, {'cluster': cluster, 'path': path, 'form': form, 'clusters': CLUSTERS.get(), }) diff --git a/desktop/core/src/desktop/api.py b/desktop/core/src/desktop/api.py index 5db3c9227b7..715b117d67d 100644 --- a/desktop/core/src/desktop/api.py +++ b/desktop/core/src/desktop/api.py @@ -15,26 +15,20 @@ # See the License for the specific language governing permissions and # limitations under the License. -import logging -import json import sys +import json import time - +import logging from collections import defaultdict from django.utils import html +from django.utils.translation import gettext as _ from django.views.decorators.http import require_GET, require_POST import desktop.conf from desktop.lib.django_util import JsonResponse from desktop.lib.i18n import force_unicode -from desktop.models import Document, DocumentTag, Document2, Directory - -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - +from desktop.models import Directory, Document, Document2, DocumentTag LOG = logging.getLogger() @@ -89,14 +83,16 @@ def massaged_tags_for_json(docs, user): trash_tag = DocumentTag.objects.get_trash_tag(user) history_tag = DocumentTag.objects.get_history_tag(user) - tag_doc_mapping = defaultdict(set) # List of documents available in each tag + tag_doc_mapping = defaultdict(set) # List of documents available in each tag for doc in docs: for tag in doc.tags.all(): tag_doc_mapping[tag].add(doc) ts['trash'] = massaged_tags(trash_tag, tag_doc_mapping) ts['history'] = massaged_tags(history_tag, tag_doc_mapping) - tags = list(set(list(tag_doc_mapping.keys()) + [tag for tag in DocumentTag.objects.get_tags(user=user)])) # List of all personal and shared tags + + # List of all personal and shared tags + tags = list(set(list(tag_doc_mapping.keys()) + [tag for tag in DocumentTag.objects.get_tags(user=user)])) for tag in tags: massaged_tag = massaged_tags(tag, tag_doc_mapping) @@ -123,9 +119,10 @@ def massaged_tags(tag, tag_doc_mapping): 'id': tag.id, 'name': html.conditional_escape(tag.tag), 'owner': tag.owner.username, - 'docs': [doc.id for doc in tag_doc_mapping[tag]] # Could get with one request groupy + 'docs': [doc.id for doc in tag_doc_mapping[tag]] # Could get with one request groupy } + def massage_permissions(document): """ Returns the permissions for a given document as a dictionary @@ -135,15 +132,15 @@ def massage_permissions(document): return { 'perms': { 'read': { - 'users': [{'id': perm_user.id, 'username': perm_user.username} \ + 'users': [{'id': perm_user.id, 'username': perm_user.username} for perm_user in read_perms.users.all()], - 'groups': [{'id': perm_group.id, 'name': perm_group.name} \ + 'groups': [{'id': perm_group.id, 'name': perm_group.name} for perm_group in read_perms.groups.all()] }, 'write': { - 'users': [{'id': perm_user.id, 'username': perm_user.username} \ + 'users': [{'id': perm_user.id, 'username': perm_user.username} for perm_user in write_perms.users.all()], - 'groups': [{'id': perm_group.id, 'name': perm_group.name} \ + 'groups': [{'id': perm_group.id, 'name': perm_group.name} for perm_group in write_perms.groups.all()] } } @@ -179,13 +176,18 @@ def massaged_documents_for_json(documents, user): 'lastModified': '03/11/14 16:06:49', 'owner': 'admin', 'lastModifiedInMillis': 1394579209.0, 'isMine': true } }; - """ + """ # noqa: E501 docs = {} for document in documents: try: - url = document.content_object and hasattr(document.content_object, 'get_absolute_url') and document.content_object.get_absolute_url() or '' - except: + url = ( + document.content_object + and hasattr(document.content_object, 'get_absolute_url') + and document.content_object.get_absolute_url() + or '' + ) + except Exception: LOG.exception('failed to get absolute url') # If app of document is disabled url = '' @@ -225,7 +227,7 @@ def massage_doc_for_json(document, user, url=''): 'name': html.conditional_escape(document.name), 'url': html.conditional_escape(url), 'description': html.conditional_escape(document.description), - 'tags': [{'id': tag.id, 'name': html.conditional_escape(tag.tag)} \ + 'tags': [{'id': tag.id, 'name': html.conditional_escape(tag.tag)} for tag in document.tags.all()], 'owner': document.owner.username, 'isMine': document.owner == user, diff --git a/desktop/core/src/desktop/api2.py b/desktop/core/src/desktop/api2.py index 90ef2e949c7..3939587f16e 100644 --- a/desktop/core/src/desktop/api2.py +++ b/desktop/core/src/desktop/api2.py @@ -17,7 +17,6 @@ import os import re -import sys import json import logging import zipfile @@ -33,12 +32,14 @@ from filebrowser.utils import parse_broker_url from collections import defaultdict from datetime import datetime +from io import StringIO as string_io from django.core import management from django.db import transaction from django.http import HttpResponse, JsonResponse from django.shortcuts import redirect from django.utils.html import escape +from django.utils.translation import gettext as _ from django.views.decorators.csrf import ensure_csrf_cookie from django.views.decorators.http import require_POST @@ -88,14 +89,6 @@ from notebook.connectors.base import Notebook from useradmin.models import Group, User -if sys.version_info[0] > 2: - from io import StringIO as string_io - - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - from StringIO import StringIO as string_io - LOG = logging.getLogger() @@ -179,11 +172,10 @@ def recurse_conf(modules): conf['default'] = str(module.config.default) if module.config.secret or 'password' in module.config.key: conf['value'] = '*' * 10 - elif sys.version_info[0] > 2: - conf['value'] = str(module.get_raw()) else: - conf['value'] = str(module.get_raw()).decode('utf-8', 'replace') + conf['value'] = str(module.get_raw()) conf['value'] = re.sub('(.*)://(.*):(.*)@(.*)', r'\1://\2:**********@\4', conf['value']) + attrs.append(conf) return attrs @@ -956,54 +948,6 @@ def export_documents(request): return make_response(f.getvalue(), 'json', filename) -def topological_sort(docs): - - '''There is a bug in django 1.11 (https://code.djangoproject.com/ticket/26291) - and we are handling it via sorting the given documents in topological format. - - Hence this function is needed only if we are using Python2 based Hue as it uses django 1.11 - and python3 based Hue don't require this method as it uses django 3.2. - - input => docs: -> list of documents which needs to import in Hue - output => serialized_doc: -> list of sorted documents - (if document1 is dependent on document2 then document1 is listed after document2)''' - - size = len(docs) - graph = defaultdict(list) - for doc in docs: # creating a graph, assuming a document is a node of graph - dep_size = len(doc['fields']['dependencies']) - for i in range(dep_size): - graph[(doc['fields']['dependencies'])[i][0]].append(doc['fields']['uuid']) - - visited = {} - _doc = {} - for doc in docs: # making all the nodes of graph unvisited and capturing the doc in the dict with uuid as key - _doc[doc['fields']['uuid']] = doc - visited[doc['fields']['uuid']] = False - - stack = [] - for doc in docs: # calling _topological_sort function to sort the doc if node is not visited - if not visited[doc['fields']['uuid']]: - _topological_sort(doc['fields']['uuid'], visited, stack, graph) - - stack = stack[::-1] # list is in revered order so we are just reversing it - - serialized_doc = [] - for i in range(size): - serialized_doc.append(_doc[stack[i]]) - - return serialized_doc - - -def _topological_sort(vertex, visited, stack, graph): - visited[vertex] = True - for i in graph[vertex]: - if not visited[i]: - _topological_sort(i, visited, stack, graph) - - stack.append(vertex) - - @ensure_csrf_cookie def import_documents(request): def is_reserved_directory(doc): @@ -1059,11 +1003,6 @@ def is_reserved_directory(doc): doc['fields']['last_modified'] = datetime.now().replace(microsecond=0).isoformat() docs.append(doc) - if sys.version_info[0] < 3: - # In Django 1.11 loaddata cannot deserialize fixtures with forward references hence - # calling the topological_sort function to sort the document - docs = topological_sort(docs) - f = tempfile.NamedTemporaryFile(mode='w+', suffix='.json') f.write(json.dumps(docs)) f.flush() diff --git a/desktop/core/src/desktop/api_public_urls_v1.py b/desktop/core/src/desktop/api_public_urls_v1.py index 43c66cdc6a5..0ba0dd0e893 100644 --- a/desktop/core/src/desktop/api_public_urls_v1.py +++ b/desktop/core/src/desktop/api_public_urls_v1.py @@ -17,15 +17,11 @@ import sys +from django.urls import re_path + from desktop import api_public from desktop.lib.botserver import api as botserver_api -if sys.version_info[0] > 2: - from django.urls import re_path -else: - from django.conf.urls import url as re_path - - # "New" query API (i.e. connector based, lean arguments). # e.g. https://demo.gethue.com/api/query/execute/hive urlpatterns = [ diff --git a/desktop/core/src/desktop/app_template/src/app_name/urls.py b/desktop/core/src/desktop/app_template/src/app_name/urls.py index 89e4cf6fb91..b4cfe2c86be 100644 --- a/desktop/core/src/desktop/app_template/src/app_name/urls.py +++ b/desktop/core/src/desktop/app_template/src/app_name/urls.py @@ -19,10 +19,7 @@ from ${app_name} import views -if sys.version_info[0] > 2: - from django.urls import re_path -else: - from django.conf.urls import url as re_path +from django.urls import re_path urlpatterns = [ re_path(r'^$', views.index), diff --git a/desktop/core/src/desktop/app_template_proxy/src/app_name/urls.py b/desktop/core/src/desktop/app_template_proxy/src/app_name/urls.py index 356ac3acf8a..4603f4ec4b7 100644 --- a/desktop/core/src/desktop/app_template_proxy/src/app_name/urls.py +++ b/desktop/core/src/desktop/app_template_proxy/src/app_name/urls.py @@ -19,10 +19,7 @@ import ${app_name} -if sys.version_info[0] > 2: - from django.urls import re_path -else: - from django.conf.urls import url as re_path +from django.urls import re_path urlpatterns = [ re_path(r'^$', ${app_name}.views.index), diff --git a/desktop/core/src/desktop/appmanager.py b/desktop/core/src/desktop/appmanager.py index 2c697cb6a19..c5fa0bff705 100644 --- a/desktop/core/src/desktop/appmanager.py +++ b/desktop/core/src/desktop/appmanager.py @@ -15,24 +15,19 @@ # See the License for the specific language governing permissions and # limitations under the License. -from builtins import object import os -import logging import re import sys +import logging import traceback +from builtins import object + import pkg_resources +from django.utils.translation import gettext as _ import desktop - from desktop.lib.paths import get_desktop_root -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - - # Directories where apps and libraries are to be found APP_DIRS = [ get_desktop_root('core-apps'), @@ -51,6 +46,7 @@ DESKTOP_APPS = None DESKTOP_MODULES = [] # Sum of APPS and LIBS + def _import_module_or_none(module): """Like import_module, but returns None if the module does not exist. This will properly handle nested ImportErrors in such a way that, if the @@ -67,14 +63,15 @@ def _import_module_or_none(module): # an import error itself. tb = sys.exc_info()[2] top_frame = traceback.extract_tb(tb)[-1] - err_file = re.sub(r'\.pyc','.py', top_frame[0]) - my_file = re.sub(r'\.pyc','.py', __file__) + err_file = re.sub(r'\.pyc', '.py', top_frame[0]) + my_file = re.sub(r'\.pyc', '.py', __file__) if err_file == my_file: return None else: LOG.error("Failed to import '%s'" % (module,)) raise + class DesktopModuleInfo(object): """ Desktop app, specified via module. @@ -213,6 +210,7 @@ def _submodule(self, name): def __str__(self): return "DesktopModule(%s: %s)" % (self.nice_name, self.module.__name__) + def get_apps(user): return [ app @@ -220,6 +218,7 @@ def get_apps(user): if user.has_hue_permission(action="access", app=app.display_name) ] + def get_apps_dict(user=None): if user is not None: apps = get_apps(user) @@ -228,6 +227,7 @@ def get_apps_dict(user=None): return dict([(app.name, app) for app in apps]) + def load_libs(): global DESKTOP_MODULES global DESKTOP_LIBS diff --git a/desktop/core/src/desktop/auth/api_authentications_tests.py b/desktop/core/src/desktop/auth/api_authentications_tests.py index 8b9616a3e97..f472f3abf24 100644 --- a/desktop/core/src/desktop/auth/api_authentications_tests.py +++ b/desktop/core/src/desktop/auth/api_authentications_tests.py @@ -15,26 +15,20 @@ # See the License for the specific language governing permissions and # limitations under the License. +import sys import json +from unittest.mock import MagicMock, Mock, patch + import pytest -import sys +from rest_framework import exceptions -from desktop.auth.backend import rewrite_user from desktop.auth.api_authentications import JwtAuthentication -from desktop.lib.django_test_util import make_logged_in_client +from desktop.auth.backend import rewrite_user from desktop.conf import AUTH - -from rest_framework import exceptions - +from desktop.lib.django_test_util import make_logged_in_client from useradmin.models import User -if sys.version_info[0] > 2: - from unittest.mock import patch, Mock, MagicMock -else: - from mock import patch, Mock, MagicMock - - @pytest.mark.django_db class TestJwtAuthentication(): @@ -56,7 +50,6 @@ def setup_method(self): } ) - def test_authenticate_existing_user(self): with patch('desktop.auth.api_authentications.jwt.decode') as jwt_decode: with patch('desktop.auth.api_authentications.requests.get'): @@ -78,7 +71,6 @@ def test_authenticate_existing_user(self): for reset in resets: reset() - def test_authenticate_new_user(self): with patch('desktop.auth.api_authentications.jwt.decode') as jwt_decode: with patch('desktop.auth.api_authentications.requests.get'): @@ -103,7 +95,6 @@ def test_authenticate_new_user(self): for reset in resets: reset() - def test_failed_authentication(self): with patch('desktop.auth.api_authentications.jwt.decode') as jwt_decode: with patch('desktop.auth.api_authentications.requests.get'): @@ -119,7 +110,6 @@ def test_failed_authentication(self): with pytest.raises(exceptions.AuthenticationFailed): JwtAuthentication().authenticate(self.request) - def test_check_user_token_storage(self): with patch('desktop.auth.api_authentications.jwt.decode') as jwt_decode: with patch('desktop.auth.api_authentications.requests.get'): @@ -139,7 +129,6 @@ def test_check_user_token_storage(self): for reset in resets: reset() - def test_check_token_verification_flag(self): with patch('desktop.auth.api_authentications.requests.get'): with patch('desktop.auth.api_authentications.jwt.algorithms.RSAAlgorithm.from_jwk'): @@ -170,7 +159,6 @@ def test_check_token_verification_flag(self): for reset in resets: reset() - def test_handle_public_key(self): with patch('desktop.auth.api_authentications.requests.get') as key_server_request: with patch('desktop.auth.api_authentications.jwt.decode') as jwt_decode: @@ -212,7 +200,7 @@ def test_handle_public_key(self): algorithms=['RS256'], audience='audience', issuer='issuer', - jwt=self.sample_token, + jwt=self.sample_token, key=b'-----BEGIN PUBLIC KEY-----\n' b'MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAwe9gTbRxHl4Ye9mY9abY\n' b'l/WHgx5QYZTwnHO5G5MX9gOiCbbxBqcOifVywX1/ienElksDIvjuQFL7zOSoXipu\n' @@ -228,7 +216,6 @@ def test_handle_public_key(self): for reset in resets: reset() - def test_handle_jku_ha(self): with patch('desktop.auth.api_authentications.requests.get') as requests_get: requests_get.return_value = Mock(status_code=200) @@ -257,6 +244,6 @@ def test_handle_jku_ha(self): try: jku = JwtAuthentication()._handle_jku_ha() - assert jku == None + assert jku is None finally: reset() diff --git a/desktop/core/src/desktop/auth/backend.py b/desktop/core/src/desktop/auth/backend.py index fd6686a8d57..6c13ba485a2 100644 --- a/desktop/core/src/desktop/auth/backend.py +++ b/desktop/core/src/desktop/auth/backend.py @@ -297,10 +297,7 @@ class AllowFirstUserDjangoBackend(django.contrib.auth.backends.ModelBackend): ModelBackend. """ def authenticate(self, *args, **kwargs): - if sys.version_info[0] > 2: - request = args[0] - else: - request = None + request = args[0] password = kwargs['password'] @@ -350,10 +347,7 @@ class ImpersonationBackend(django.contrib.auth.backends.ModelBackend): Does not support a multiple backends setup. """ def authenticate(self, *args, **kwargs): - if sys.version_info[0] > 2: - request = args[0] - else: - request = None + request = args[0] username = kwargs['username'] password = kwargs['password'] @@ -605,10 +599,7 @@ def authenticate(self, request=None, username=None, password=None, server=None): try: allowed_group = self.check_ldap_access_groups(server, username) if allowed_group: - if sys.version_info[0] > 2: - user = self._backend.authenticate(request, username=username, password=password) - else: - user = self._backend.authenticate(username=username, password=password) + user = self._backend.authenticate(request, username=username, password=password) else: LOG.warning("%s not in an allowed login group" % username) return None @@ -820,10 +811,7 @@ def get_user(self, user_id): class OIDCBackend(OIDCAuthenticationBackend): def authenticate(self, *args, **kwargs): - if sys.version_info[0] > 2: - self.request = args[0] - else: - self.request = None + self.request = args[0] if not self.request: return None diff --git a/desktop/core/src/desktop/auth/backend_tests.py b/desktop/core/src/desktop/auth/backend_tests.py index 9b8eb9ea044..a5d2fea63ec 100644 --- a/desktop/core/src/desktop/auth/backend_tests.py +++ b/desktop/core/src/desktop/auth/backend_tests.py @@ -15,20 +15,19 @@ # See the License for the specific language governing permissions and # limitations under the License. -import pytest import sys +from unittest.mock import Mock, patch + +import pytest + from desktop.auth.backend import LdapBackend, rewrite_user from desktop.lib.django_test_util import make_logged_in_client from useradmin.models import User -if sys.version_info[0] > 2: - from unittest.mock import patch, Mock -else: - from mock import patch, Mock @pytest.mark.django_db class TestLdapBackend(): - + def setup_method(self): self.client = make_logged_in_client(username="test", groupname="default", recreate=True, is_superuser=False) self.user = rewrite_user(User.objects.get(username="test")) @@ -39,4 +38,4 @@ def test_authenticate(self): user = LdapBackend().authenticate(request=Mock(), username=Mock(), password=Mock(), server=Mock()) - assert user == None + assert user is None diff --git a/desktop/core/src/desktop/auth/decorators.py b/desktop/core/src/desktop/auth/decorators.py index 3c06bc1e436..8128f58f9e2 100644 --- a/desktop/core/src/desktop/auth/decorators.py +++ b/desktop/core/src/desktop/auth/decorators.py @@ -1,5 +1,5 @@ -#!/usr/bin/env python +# !/usr/bin/env python # Licensed to Cloudera, Inc. under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information @@ -16,22 +16,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +import sys import json import logging -import sys from django.utils.functional import wraps +from django.utils.translation import gettext as _ from desktop.auth.backend import is_admin, is_hue_admin from desktop.conf import ENABLE_ORGANIZATIONS from desktop.lib.exceptions_renderable import PopupException -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - - LOG = logging.getLogger() diff --git a/desktop/core/src/desktop/auth/decorators_tests.py b/desktop/core/src/desktop/auth/decorators_tests.py index 031dbe73111..7ac08ce91bb 100644 --- a/desktop/core/src/desktop/auth/decorators_tests.py +++ b/desktop/core/src/desktop/auth/decorators_tests.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -## -*- coding: utf-8 -*- +# -*- coding: utf-8 -*- # Licensed to Cloudera, Inc. under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information @@ -16,23 +16,17 @@ # See the License for the specific language governing permissions and # limitations under the License. -import pytest import sys import unittest +from unittest.mock import Mock, patch +import pytest from django.test import TestCase from desktop.auth.decorators import admin_required, hue_admin_required from desktop.lib.django_test_util import make_logged_in_client from desktop.lib.exceptions_renderable import PopupException - -from useradmin.models import User, Group, Organization - - -if sys.version_info[0] > 2: - from unittest.mock import patch, Mock -else: - from mock import patch, Mock +from useradmin.models import Group, Organization, User class TestDecorator(TestCase): @@ -42,7 +36,6 @@ def setup_class(cls): cls.client1 = make_logged_in_client(username='admin', recreate=True, is_superuser=True) cls.client2 = make_logged_in_client(username='joe', recreate=True, is_superuser=False) - def test_admin_required(self): request = Mock(user=User.objects.get(username='admin')) hello_admin(request) @@ -51,7 +44,6 @@ def test_admin_required(self): with pytest.raises(PopupException): hello_admin(request) - def test_hue_admin_required(self): request = Mock(user=User.objects.get(username='admin')) hello_hue_admin(request) @@ -65,6 +57,7 @@ def test_hue_admin_required(self): def hello_admin(request, *args, **kwargs): return 'Hello' + @admin_required def hello_hue_admin(request, *args, **kwargs): return 'Hello' diff --git a/desktop/core/src/desktop/auth/forms.py b/desktop/core/src/desktop/auth/forms.py index 7771abcbb21..8b31c6efdc2 100644 --- a/desktop/core/src/desktop/auth/forms.py +++ b/desktop/core/src/desktop/auth/forms.py @@ -15,26 +15,20 @@ # See the License for the specific language governing permissions and # limitations under the License. -import datetime import logging -import sys +import datetime from django.conf import settings from django.contrib.auth import authenticate, get_backends from django.contrib.auth.forms import AuthenticationForm as DjangoAuthenticationForm, UserCreationForm as DjangoUserCreationForm -from django.forms import CharField, TextInput, PasswordInput, ChoiceField, ValidationError, Form -from django.utils.safestring import mark_safe +from django.forms import CharField, ChoiceField, Form, PasswordInput, TextInput, ValidationError from django.utils.encoding import smart_str +from django.utils.safestring import mark_safe +from django.utils.translation import gettext as _, gettext_lazy as _t from desktop import conf -from useradmin.hue_password_policy import hue_get_password_validators - from desktop.auth.backend import is_admin - -if sys.version_info[0] > 2: - from django.utils.translation import gettext_lazy as _t, gettext as _ -else: - from django.utils.translation import ugettext_lazy as _t, ugettext as _ +from useradmin.hue_password_policy import hue_get_password_validators if conf.ENABLE_ORGANIZATIONS.get(): from useradmin.models import User @@ -48,14 +42,17 @@ def get_backend_names(): return get_backends and [backend.__class__.__name__ for backend in get_backends()] + def is_active_directory(): return 'LdapBackend' in get_backend_names() and ( bool(conf.LDAP.NT_DOMAIN.get()) or bool(conf.LDAP.LDAP_SERVERS.get()) or conf.LDAP.LDAP_URL.get() is not None ) + def get_ldap_server_keys(): return [(ldap_server_record_key) for ldap_server_record_key in conf.LDAP.LDAP_SERVERS.get()] + def get_server_choices(): if conf.LDAP.LDAP_SERVERS.get(): auth_choices = [(ldap_server_record_key, ldap_server_record_key) for ldap_server_record_key in conf.LDAP.LDAP_SERVERS.get()] @@ -77,7 +74,7 @@ class AuthenticationForm(DjangoAuthenticationForm): 'inactive': _t("Account deactivated. Please contact an administrator."), } - username = CharField(label=_t("Username"), widget=TextInput(attrs={'maxlength': 150, 'placeholder': _t("Username"), 'autocomplete': 'off', 'autofocus': 'autofocus'})) + username = CharField(label=_t("Username"), widget=TextInput(attrs={'maxlength': 150, 'placeholder': _t("Username"), 'autocomplete': 'off', 'autofocus': 'autofocus'})) # noqa: E501 password = CharField(label=_t("Password"), widget=PasswordInput(attrs={'placeholder': _t("Password"), 'autocomplete': 'off'})) def authenticate(self): @@ -103,7 +100,9 @@ def clean(self): if not user.is_active: if settings.ADMINS: - raise ValidationError(mark_safe(_("Account deactivated. Please contact an administrator.") % settings.ADMINS[0][1])) + raise ValidationError( + mark_safe(_("Account deactivated. Please contact an administrator.") % settings.ADMINS[0][1]) + ) else: raise ValidationError(self.error_messages['inactive']) except User.DoesNotExist: @@ -124,7 +123,7 @@ class OrganizationAuthenticationForm(Form): } # username = None - email = CharField(label=_t("Email"), widget=TextInput(attrs={'maxlength': 150, 'placeholder': _t("Email"), 'autocomplete': 'off', 'autofocus': 'autofocus'})) + email = CharField(label=_t("Email"), widget=TextInput(attrs={'maxlength': 150, 'placeholder': _t("Email"), 'autocomplete': 'off', 'autofocus': 'autofocus'})) # noqa: E501 password = CharField(label=_t("Password"), widget=PasswordInput(attrs={'placeholder': _t("Password"), 'autocomplete': 'off'})) def __init__(self, request=None, *args, **kwargs): @@ -182,16 +181,18 @@ def get_invalid_login_error(self): class ImpersonationAuthenticationForm(AuthenticationForm): - login_as = CharField(label=_t("Login as"), max_length=30, widget=TextInput(attrs={'placeholder': _t("Login as username"), 'autocomplete': 'off'})) + login_as = CharField( + label=_t("Login as"), max_length=30, widget=TextInput(attrs={'placeholder': _t("Login as username"), 'autocomplete': 'off'}) + ) def authenticate(self): try: super(AuthenticationForm, self).clean() - except: + except Exception: # Expected to fail as login_as is nor provided by the parent Django AuthenticationForm, hence we redo it properly below. pass request = None - self.user_cache = authenticate(request, username=self.cleaned_data.get('username'), password=self.cleaned_data.get('password'), login_as=self.cleaned_data.get('login_as')) + self.user_cache = authenticate(request, username=self.cleaned_data.get('username'), password=self.cleaned_data.get('password'), login_as=self.cleaned_data.get('login_as')) # noqa: E501 return self.user_cache @@ -206,9 +207,7 @@ def __init__(self, *args, **kwargs): self.error_messages['invalid_login'] = _t("Invalid username or password, or your LDAP groups not allowed") def authenticate(self): - request = None - if sys.version_info[0] > 2: - request = self.request + request = self.request username = self.cleaned_data.get('username') or '' password = self.cleaned_data.get('password') server = self.cleaned_data.get('server') diff --git a/desktop/core/src/desktop/auth/views.py b/desktop/core/src/desktop/auth/views.py index a5ca6220cb0..7cc741f4bb3 100644 --- a/desktop/core/src/desktop/auth/views.py +++ b/desktop/core/src/desktop/auth/views.py @@ -14,53 +14,41 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import json -from future import standard_library -standard_library.install_aliases() try: import oauth2 as oauth -except: +except Exception: oauth = None import cgi import logging -import sys from datetime import datetime +from urllib.parse import urlencode as urllib_urlencode -from axes.decorators import axes_dispatch import django.contrib.auth.views -from django.core.exceptions import SuspiciousOperation -from django.contrib.auth import login, get_backends, authenticate +from axes.decorators import axes_dispatch +from django.contrib.auth import authenticate, get_backends, login from django.contrib.sessions.models import Session +from django.core.exceptions import SuspiciousOperation from django.http import HttpResponseRedirect from django.urls import reverse - -from hadoop.fs.exceptions import WebHdfsException -from notebook.connectors.base import get_api -from useradmin.models import get_profile, UserProfile, User, Group -from useradmin.views import ensure_home_directory, require_change_password +from django.utils.encoding import smart_str +from django.utils.translation import gettext as _ from desktop.auth import forms as auth_forms from desktop.auth.backend import OIDCBackend -from desktop.auth.forms import ImpersonationAuthenticationForm, OrganizationUserCreationForm, OrganizationAuthenticationForm -from desktop.conf import OAUTH, ENABLE_ORGANIZATIONS, SESSION +from desktop.auth.forms import ImpersonationAuthenticationForm, OrganizationAuthenticationForm, OrganizationUserCreationForm +from desktop.conf import ENABLE_ORGANIZATIONS, OAUTH, SESSION from desktop.lib import fsmanager -from desktop.lib.django_util import render, login_notrequired, JsonResponse +from desktop.lib.django_util import JsonResponse, login_notrequired, render from desktop.lib.exceptions_renderable import PopupException -from desktop.log.access import access_log, access_warn, last_access_map -from desktop.views import samlgroup_check, saml_login_headers +from desktop.log.access import access_warn, last_access_map from desktop.settings import LOAD_BALANCER_COOKIE -from django.utils.encoding import smart_str - - -if sys.version_info[0] > 2: - from urllib.parse import urlencode as urllib_urlencode - from django.utils.translation import gettext as _ -else: - from urllib import urlencode as urllib_urlencode - from django.utils.translation import ugettext as _ - +from desktop.views import saml_login_headers, samlgroup_check +from hadoop.fs.exceptions import WebHdfsException +from notebook.connectors.base import get_api +from useradmin.models import Group, User, UserProfile, get_profile +from useradmin.views import ensure_home_directory, require_change_password LOG = logging.getLogger() @@ -161,7 +149,7 @@ def dt_login(request, from_modal=False): userprofile.first_login = False userprofile.last_activity = datetime.now() - if userprofile.creation_method == UserProfile.CreationMethod.EXTERNAL: # This is to fix a bug in Hue 4.3 + if userprofile.creation_method == UserProfile.CreationMethod.EXTERNAL: # This is to fix a bug in Hue 4.3 userprofile.creation_method = UserProfile.CreationMethod.EXTERNAL.name userprofile.update_data({'auth_backend': user.backend}) try: @@ -209,7 +197,7 @@ def dt_login(request, from_modal=False): request.method == 'POST' and request.user.username != request.POST.get('username'): # local user login failed, give the right auth_form with 'server' field auth_form = auth_forms.LdapAuthenticationForm() - + if not from_modal and SESSION.ENABLE_TEST_COOKIE.get(): request.session.set_test_cookie() @@ -233,7 +221,7 @@ def dt_login(request, from_modal=False): }) if not request.user.is_authenticated: - response.delete_cookie(LOAD_BALANCER_COOKIE) # Note: might be re-balanced to another Hue on login. + response.delete_cookie(LOAD_BALANCER_COOKIE) # Note: might be re-balanced to another Hue on login. return response @@ -283,12 +271,13 @@ def profile(request): """ return render(None, request, _profile_dict(request.user)) + def _profile_dict(user): return dict( username=user.username, first_name=user.first_name, last_name=user.last_name, - last_login=str(user.last_login), # datetime object needs to be converted + last_login=str(user.last_login), # datetime object needs to be converted email=user.email ) diff --git a/desktop/core/src/desktop/auth/views_test.py b/desktop/core/src/desktop/auth/views_test.py index b77cf3539ec..09d23aa259b 100644 --- a/desktop/core/src/desktop/auth/views_test.py +++ b/desktop/core/src/desktop/auth/views_test.py @@ -15,44 +15,34 @@ # See the License for the specific language governing permissions and # limitations under the License. -from builtins import object -import datetime -import pytest import sys +import datetime +from builtins import object +from unittest.mock import MagicMock, Mock, patch -from django_auth_ldap import backend as django_auth_ldap_backend -from django.db.utils import DataError +import pytest from django.conf import settings -from django.test.client import Client +from django.db.utils import DataError from django.test import TestCase - -from hadoop.test_base import PseudoHdfsTestBase -from hadoop import pseudo_hdfs4 -from useradmin import ldap_access -from useradmin.models import get_default_user_group, User, Group, get_profile -from useradmin.tests import LdapTestConnection -from useradmin.views import import_ldap_groups +from django.test.client import Client +from django_auth_ldap import backend as django_auth_ldap_backend from desktop import conf, middleware from desktop.auth import backend from desktop.auth.backend import create_user from desktop.lib.django_test_util import make_logged_in_client from desktop.lib.test_utils import add_to_group - - -if sys.version_info[0] > 2: - from unittest.mock import patch, Mock, MagicMock -else: - from mock import patch, Mock, MagicMock +from hadoop import pseudo_hdfs4 +from hadoop.test_base import PseudoHdfsTestBase +from useradmin import ldap_access +from useradmin.models import Group, User, get_default_user_group, get_profile +from useradmin.tests import LdapTestConnection +from useradmin.views import import_ldap_groups def get_mocked_config(): - return { - 'mocked_ldap': { - 'users': {}, - 'groups': {} - } - } + return {'mocked_ldap': {'users': {}, 'groups': {}}} + @pytest.mark.django_db @pytest.mark.integration @@ -76,7 +66,7 @@ def teardown_class(cls): def setup_method(self): self.c = Client() - self.reset.append( conf.AUTH.BACKEND.set_for_testing(['desktop.auth.backend.AllowFirstUserDjangoBackend']) ) + self.reset.append(conf.AUTH.BACKEND.set_for_testing(['desktop.auth.backend.AllowFirstUserDjangoBackend'])) self.reset.append(conf.LDAP.SYNC_GROUPS_ON_LOGIN.set_for_testing(False)) def teardown_method(self): @@ -122,10 +112,14 @@ def test_login_home_creation_failure(self): assert not cluster.fs.exists("/user/%s" % self.test_username) fs.do_as_superuser(fs.create, "/user/%s" % self.test_username) - response = self.c.post('/hue/accounts/login/', { + response = self.c.post( + '/hue/accounts/login/', + { 'username': self.test_username, 'password': "test-hue-foo2", - }, follow=True) + }, + follow=True, + ) assert 200 == response.status_code, "Expected ok status." assert '/about' in response.content, response.content @@ -133,10 +127,14 @@ def test_login_home_creation_failure(self): # 'Could not create home directory.' won't show up because the messages are consumed before def test_login_expiration(self): - response = self.c.post('/hue/accounts/login/', { + response = self.c.post( + '/hue/accounts/login/', + { 'username': self.test_username, 'password': "test-hue-foo2", - }, follow=True) + }, + follow=True, + ) assert 200 == response.status_code, "Expected ok status." self.reset.append(conf.AUTH.EXPIRES_AFTER.set_for_testing(10000)) @@ -147,10 +145,14 @@ def test_login_expiration(self): # Deactivate user old_settings = settings.ADMINS settings.ADMINS = [] - response = self.c.post('/hue/accounts/login/', { + response = self.c.post( + '/hue/accounts/login/', + { 'username': self.test_username, 'password': "test-hue-foo2", - }, follow=True) + }, + follow=True, + ) assert 200 == response.status_code, "Expected ok status." assert "Account deactivated. Please contact an administrator." in response.content, response.content settings.ADMINS = old_settings @@ -165,7 +167,6 @@ def test_login_expiration(self): @pytest.mark.django_db class TestLdapLogin(PseudoHdfsTestBase): - reset = [] test_username = 'test_ldap_login' @@ -196,7 +197,7 @@ def teardown_class(cls): def setup_method(self): self.c = Client() - self.reset.append( conf.AUTH.BACKEND.set_for_testing(['desktop.auth.backend.LdapBackend']) ) + self.reset.append(conf.AUTH.BACKEND.set_for_testing(['desktop.auth.backend.LdapBackend'])) self.reset.append(conf.LDAP.LDAP_URL.set_for_testing('does not matter')) self.reset.append(conf.LDAP.SYNC_GROUPS_ON_LOGIN.set_for_testing(False)) @@ -217,11 +218,7 @@ def test_login(self): assert 200 == response.status_code, "Expected ok status." assert not response.context[0]['first_login_ever'] - response = self.c.post('/hue/accounts/login/', { - 'username': self.test_username, - 'password': "ldap1", - 'server': "LDAP" - }) + response = self.c.post('/hue/accounts/login/', {'username': self.test_username, 'password': "ldap1", 'server': "LDAP"}) assert 302 == response.status_code, "Expected ok redirect status." assert response.url == "/" @@ -289,11 +286,9 @@ def test_login_home_creation_failure(self): assert not self.cluster.fs.do_as_user(self.test_username, cluster.fs.exists, "/user/%s" % self.test_username) fs.do_as_superuser(fs.create, "/user/%s" % self.test_username) - response = self.c.post('/hue/accounts/login/', { - 'username': self.test_username, - 'password': "test-hue-ldap2", - 'server': "LDAP" - }, follow=True) + response = self.c.post( + '/hue/accounts/login/', {'username': self.test_username, 'password': "test-hue-ldap2", 'server': "LDAP"}, follow=True + ) assert 200 == response.status_code, "Expected ok status." assert '/about' in response.content, response.content # Custom login process should not do 'http-equiv="refresh"' but call the correct view @@ -302,22 +297,14 @@ def test_login_home_creation_failure(self): def test_login_ignore_case(self): self.reset.append(conf.LDAP.IGNORE_USERNAME_CASE.set_for_testing(True)) - response = self.c.post('/hue/accounts/login/', { - 'username': self.test_username.upper(), - 'password': "ldap1", - 'server': "LDAP" - }) + response = self.c.post('/hue/accounts/login/', {'username': self.test_username.upper(), 'password': "ldap1", 'server': "LDAP"}) assert 302 == response.status_code, "Expected ok redirect status." assert 1 == len(User.objects.all()) assert self.test_username == User.objects.all()[0].username self.c.logout() - response = self.c.post('/hue/accounts/login/', { - 'username': self.test_username, - 'password': "ldap1", - 'server': "LDAP" - }) + response = self.c.post('/hue/accounts/login/', {'username': self.test_username, 'password': "ldap1", 'server': "LDAP"}) assert 302 == response.status_code, "Expected ok redirect status." assert 1 == len(User.objects.all()) assert self.test_username == User.objects.all()[0].username @@ -325,21 +312,13 @@ def test_login_ignore_case(self): def test_login_force_lower_case(self): self.reset.append(conf.LDAP.FORCE_USERNAME_LOWERCASE.set_for_testing(True)) - response = self.c.post('/hue/accounts/login/', { - 'username': self.test_username.upper(), - 'password': "ldap1", - 'server': "LDAP" - }) + response = self.c.post('/hue/accounts/login/', {'username': self.test_username.upper(), 'password': "ldap1", 'server': "LDAP"}) assert 302 == response.status_code, "Expected ok redirect status." assert 1 == len(User.objects.all()) self.c.logout() - response = self.c.post('/hue/accounts/login/', { - 'username': self.test_username, - 'password': "ldap1", - 'server': "LDAP" - }) + response = self.c.post('/hue/accounts/login/', {'username': self.test_username, 'password': "ldap1", 'server': "LDAP"}) assert 302 == response.status_code, "Expected ok redirect status." assert 1 == len(User.objects.all()) assert self.test_username == User.objects.all()[0].username @@ -348,22 +327,14 @@ def test_login_force_lower_case_and_ignore_case(self): self.reset.append(conf.LDAP.IGNORE_USERNAME_CASE.set_for_testing(True)) self.reset.append(conf.LDAP.FORCE_USERNAME_LOWERCASE.set_for_testing(True)) - response = self.c.post('/hue/accounts/login/', { - 'username': self.test_username.upper(), - 'password': "ldap1", - 'server': "LDAP" - }) + response = self.c.post('/hue/accounts/login/', {'username': self.test_username.upper(), 'password': "ldap1", 'server': "LDAP"}) assert 302 == response.status_code, "Expected ok redirect status." assert 1 == len(User.objects.all()) assert self.test_username == User.objects.all()[0].username self.c.logout() - response = self.c.post('/hue/accounts/login/', { - 'username': self.test_username, - 'password': "ldap1", - 'server': "LDAP" - }) + response = self.c.post('/hue/accounts/login/', {'username': self.test_username, 'password': "ldap1", 'server': "LDAP"}) assert 302 == response.status_code, "Expected ok redirect status." assert 1 == len(User.objects.all()) assert self.test_username == User.objects.all()[0].username @@ -372,14 +343,19 @@ def test_import_groups_on_login(self): self.reset.append(conf.LDAP.SYNC_GROUPS_ON_LOGIN.set_for_testing(True)) ldap_access.CACHED_LDAP_CONN = LdapTestConnection() # Make sure LDAP groups exist or they won't sync - import_ldap_groups(ldap_access.CACHED_LDAP_CONN, 'TestUsers', import_members=False, import_members_recursive=False, sync_users=False, import_by_dn=False) - import_ldap_groups(ldap_access.CACHED_LDAP_CONN, 'Test Administrators', import_members=False, import_members_recursive=False, sync_users=False, import_by_dn=False) - - response = self.c.post('/hue/accounts/login/', { - 'username': "curly", - 'password': "ldap1", - 'server': "TestUsers" - }) + import_ldap_groups( + ldap_access.CACHED_LDAP_CONN, 'TestUsers', import_members=False, import_members_recursive=False, sync_users=False, import_by_dn=False + ) + import_ldap_groups( + ldap_access.CACHED_LDAP_CONN, + 'Test Administrators', + import_members=False, + import_members_recursive=False, + sync_users=False, + import_by_dn=False, + ) + + response = self.c.post('/hue/accounts/login/', {'username': "curly", 'password': "ldap1", 'server': "TestUsers"}) assert 302 == response.status_code, response.status_code assert 1 == len(User.objects.all()) # The two curly are a part of in LDAP and the default group. @@ -388,7 +364,6 @@ def test_import_groups_on_login(self): @pytest.mark.django_db class TestRemoteUserLogin(PseudoHdfsTestBase): - reset = [] test_username = "test_remote_user_login" @@ -410,8 +385,8 @@ def teardown_class(cls): settings.AUTHENTICATION_BACKENDS = cls.auth_backends def setup_method(self): - self.reset.append( conf.AUTH.BACKEND.set_for_testing(['desktop.auth.backend.RemoteUserDjangoBackend']) ) - self.reset.append( conf.AUTH.REMOTE_USER_HEADER.set_for_testing('REMOTE_USER') ) # Set for middleware + self.reset.append(conf.AUTH.BACKEND.set_for_testing(['desktop.auth.backend.RemoteUserDjangoBackend'])) + self.reset.append(conf.AUTH.REMOTE_USER_HEADER.set_for_testing('REMOTE_USER')) # Set for middleware self.c = Client() @@ -439,7 +414,7 @@ def test_normal(self): assert self.test_username == User.objects.all()[0].username def test_ignore_case(self): - self.reset.append( conf.AUTH.IGNORE_USERNAME_CASE.set_for_testing(True) ) + self.reset.append(conf.AUTH.IGNORE_USERNAME_CASE.set_for_testing(True)) response = self.c.get('/hue/accounts/login/') assert 200 == response.status_code, "Expected ok status." @@ -466,7 +441,7 @@ def test_ignore_case(self): assert "%s_%s" % (self.test_username, '2') == User.objects.all().order_by('username')[1].username def test_force_lower_case(self): - self.reset.append( conf.AUTH.FORCE_USERNAME_LOWERCASE.set_for_testing(True) ) + self.reset.append(conf.AUTH.FORCE_USERNAME_LOWERCASE.set_for_testing(True)) response = self.c.get('/hue/accounts/login/') assert 200 == response.status_code, "Expected ok status." @@ -482,7 +457,6 @@ def test_force_lower_case(self): assert 1 == len(User.objects.all()) assert self.test_username == User.objects.all()[0].username - def test_ignore_case_and_force_lower_case(self): reset = conf.AUTH.FORCE_USERNAME_LOWERCASE.set_for_testing(False) try: @@ -493,8 +467,8 @@ def test_ignore_case_and_force_lower_case(self): finally: reset() - self.reset.append( conf.AUTH.FORCE_USERNAME_LOWERCASE.set_for_testing(True) ) - self.reset.append( conf.AUTH.IGNORE_USERNAME_CASE.set_for_testing(True) ) + self.reset.append(conf.AUTH.FORCE_USERNAME_LOWERCASE.set_for_testing(True)) + self.reset.append(conf.AUTH.IGNORE_USERNAME_CASE.set_for_testing(True)) # Previously existing users should not be forced to lower case. response = self.c.post('/hue/accounts/login/', {}, **{"REMOTE_USER": self.test_username.upper()}) @@ -527,7 +501,7 @@ def setup_class(cls): # Override auth backend, settings are only loaded from conf at initialization so we can't use set_for_testing cls.auth_backends = settings.AUTHENTICATION_BACKENDS - settings.AUTHENTICATION_BACKENDS = ('desktop.auth.backend.LdapBackend','desktop.auth.backend.AllowFirstUserDjangoBackend') + settings.AUTHENTICATION_BACKENDS = ('desktop.auth.backend.LdapBackend', 'desktop.auth.backend.AllowFirstUserDjangoBackend') # Need to recreate LdapBackend class with new monkey patched base class reload(backend) @@ -542,7 +516,9 @@ def teardown_class(cls): def setup_method(self): self.c = Client() - self.reset.append( conf.AUTH.BACKEND.set_for_testing(['desktop.auth.backend.LdapBackend','desktop.auth.backend.AllowFirstUserDjangoBackend'])) + self.reset.append( + conf.AUTH.BACKEND.set_for_testing(['desktop.auth.backend.LdapBackend', 'desktop.auth.backend.AllowFirstUserDjangoBackend']) + ) self.reset.append(conf.LDAP.LDAP_URL.set_for_testing('does not matter')) def teardown_method(self): @@ -557,11 +533,7 @@ def teardown_method(self): def test_login_with_ldap(self): ldap_access.CACHED_LDAP_CONN = LdapTestConnection() - response = self.c.post('/hue/accounts/login/', { - 'username': "curly", - 'password': "ldap1", - 'server': "LDAP" - }) + response = self.c.post('/hue/accounts/login/', {'username': "curly", 'password': "ldap1", 'server': "LDAP"}) assert 302 == response.status_code, response.status_code assert 1 == len(User.objects.all()) @@ -592,7 +564,7 @@ def setup_class(cls): # Override auth backend, settings are only loaded from conf at initialization so we can't use set_for_testing cls.auth_backends = settings.AUTHENTICATION_BACKENDS - settings.AUTHENTICATION_BACKENDS = (['desktop.auth.backend.LdapBackend', 'desktop.auth.backend.AllowFirstUserDjangoBackend']) + settings.AUTHENTICATION_BACKENDS = ['desktop.auth.backend.LdapBackend', 'desktop.auth.backend.AllowFirstUserDjangoBackend'] # Need to recreate LdapBackend class with new monkey patched base class reload(backend) @@ -607,7 +579,7 @@ def teardown_class(cls): def setup_method(self, method): self.c = Client() - self.reset.append( conf.AUTH.BACKEND.set_for_testing(['AllowFirstUserDjangoBackend', 'LdapBackend']) ) + self.reset.append(conf.AUTH.BACKEND.set_for_testing(['AllowFirstUserDjangoBackend', 'LdapBackend'])) self.reset.append(conf.LDAP.LDAP_URL.set_for_testing('does not matter')) def teardown_method(self, method): @@ -623,30 +595,22 @@ def test_login(self): assert 200 == response.status_code, "Expected ok status." assert response.context[0]['first_login_ever'] - response = self.c.post('/hue/accounts/login/', { - 'username': self.test_username, - 'password': "ldap1", - 'password1': "ldap1", - 'password2': "ldap1", - 'server': "Local" - }) + response = self.c.post( + '/hue/accounts/login/', + {'username': self.test_username, 'password': "ldap1", 'password1': "ldap1", 'password2': "ldap1", 'server': "Local"}, + ) assert 302 == response.status_code, "Expected ok redirect status." assert response.url == "/" self.c.get('/accounts/logout') - response = self.c.post('/hue/accounts/login/', { - 'username': self.test_username, - 'password': "ldap1", - 'server': "LDAP" - }) + response = self.c.post('/hue/accounts/login/', {'username': self.test_username, 'password': "ldap1", 'server': "LDAP"}) assert 302 == response.status_code, "Expected ok redirect status." assert response.url == "/" @pytest.mark.django_db class TestLogin(PseudoHdfsTestBase): - reset = [] test_username = "test_login" @@ -667,7 +631,7 @@ def teardown_class(cls): def setup_method(self): self.c = Client() - self.reset.append( conf.AUTH.BACKEND.set_for_testing(['desktop.auth.backend.AllowFirstUserDjangoBackend']) ) + self.reset.append(conf.AUTH.BACKEND.set_for_testing(['desktop.auth.backend.AllowFirstUserDjangoBackend'])) def teardown_method(self): for finish in self.reset: @@ -679,7 +643,7 @@ def teardown_method(self): self.cluster.fs.do_as_superuser(self.cluster.fs.rmtree, "/user/%s" % self.test_username) def test_bad_first_user(self): - self.reset.append( conf.AUTH.BACKEND.set_for_testing(["desktop.auth.backend.AllowFirstUserDjangoBackend"]) ) + self.reset.append(conf.AUTH.BACKEND.set_for_testing(["desktop.auth.backend.AllowFirstUserDjangoBackend"])) response = self.c.get('/hue/accounts/login/') assert 200 == response.status_code, "Expected ok status." @@ -687,7 +651,7 @@ def test_bad_first_user(self): response = self.c.post('/hue/accounts/login/', dict(username="foo 1", password="foo")) assert 200 == response.status_code, "Expected ok status." - #assert_true('This value may contain only letters, numbers and @/./+/-/_ characters.' in response.content, response) + # assert_true('This value may contain only letters, numbers and @/./+/-/_ characters.' in response.content, response) assert 'This value may contain only ' in response.content, response def test_non_jframe_login(self): @@ -697,14 +661,14 @@ def test_non_jframe_login(self): # Login response = client.post('/hue/accounts/login/', dict(username=self.test_username, password="test"), follow=True) template = 'hue.mako' - assert any([template in _template.filename for _template in response.templates]), response.content # Go to superuser wizard + assert any([template in _template.filename for _template in response.templates]), response.content # Go to superuser wizard def test_login_expiration(self): - """ Expiration test without superusers """ + """Expiration test without superusers""" old_settings = settings.ADMINS - self.reset.append( conf.AUTH.BACKEND.set_for_testing(["desktop.auth.backend.AllowFirstUserDjangoBackend"]) ) - self.reset.append( conf.AUTH.EXPIRES_AFTER.set_for_testing(0) ) - self.reset.append( conf.AUTH.EXPIRE_SUPERUSERS.set_for_testing(False) ) + self.reset.append(conf.AUTH.BACKEND.set_for_testing(["desktop.auth.backend.AllowFirstUserDjangoBackend"])) + self.reset.append(conf.AUTH.EXPIRES_AFTER.set_for_testing(0)) + self.reset.append(conf.AUTH.EXPIRE_SUPERUSERS.set_for_testing(False)) client = make_logged_in_client(username=self.test_username, password="test") client.get('/accounts/logout') @@ -736,10 +700,10 @@ def test_login_expiration(self): settings.ADMINS = old_settings def test_login_expiration_with_superusers(self): - """ Expiration test with superusers """ - self.reset.append( conf.AUTH.BACKEND.set_for_testing(["desktop.auth.backend.AllowFirstUserDjangoBackend"]) ) - self.reset.append( conf.AUTH.EXPIRES_AFTER.set_for_testing(0) ) - self.reset.append( conf.AUTH.EXPIRE_SUPERUSERS.set_for_testing(True) ) + """Expiration test with superusers""" + self.reset.append(conf.AUTH.BACKEND.set_for_testing(["desktop.auth.backend.AllowFirstUserDjangoBackend"])) + self.reset.append(conf.AUTH.EXPIRES_AFTER.set_for_testing(0)) + self.reset.append(conf.AUTH.EXPIRE_SUPERUSERS.set_for_testing(True)) client = make_logged_in_client(username=self.test_username, password="test") client.get('/accounts/logout') @@ -757,8 +721,8 @@ def test_modal_login(self): assert b' % for (file_name, line_number, function_name, text) in traceback: - - - + + + % endfor diff --git a/desktop/core/src/desktop/templates/common_footer.mako b/desktop/core/src/desktop/templates/common_footer.mako index d80aa2c546b..79c58bf6a85 100644 --- a/desktop/core/src/desktop/templates/common_footer.mako +++ b/desktop/core/src/desktop/templates/common_footer.mako @@ -17,7 +17,7 @@ import sys from django.http import HttpRequest -from desktop.lib.i18n import smart_unicode +from desktop.lib.i18n import smart_str from desktop.views import login_modal if sys.version_info[0] > 2: @@ -29,7 +29,7 @@ else: <%namespace name="commonHeaderFooterComponents" file="/common_header_footer_components.mako" /> % if request is not None: -${ smart_unicode(login_modal(request).content) | n,unicode } +${ smart_str(login_modal(request).content) | n,unicode } % endif diff --git a/desktop/core/src/desktop/templates/common_footer_m.mako b/desktop/core/src/desktop/templates/common_footer_m.mako index 723eb7b11fd..19cf13b9b76 100644 --- a/desktop/core/src/desktop/templates/common_footer_m.mako +++ b/desktop/core/src/desktop/templates/common_footer_m.mako @@ -17,7 +17,6 @@ import sys from django.http import HttpRequest from django.template.defaultfilters import escape, escapejs -from desktop.lib.i18n import smart_unicode if sys.version_info[0] > 2: from django.utils.translation import gettext as _ diff --git a/desktop/core/src/desktop/templates/common_header.mako b/desktop/core/src/desktop/templates/common_header.mako index 78dce8825c0..edae07ad5a4 100644 --- a/desktop/core/src/desktop/templates/common_header.mako +++ b/desktop/core/src/desktop/templates/common_header.mako @@ -23,7 +23,7 @@ from desktop import conf from desktop.auth.backend import is_admin from desktop.conf import USE_NEW_EDITOR from desktop.models import hue_version -from desktop.lib.i18n import smart_unicode +from desktop.lib.i18n import smart_str from desktop.webpack_utils import get_hue_bundles if sys.version_info[0] > 2: @@ -55,7 +55,7 @@ if USE_NEW_EDITOR.get(): <%def name="get_title(title)"> % if title: - - ${smart_unicode(title)} + - ${smart_str(title)} % endif diff --git a/desktop/core/src/desktop/templates/common_header_footer_components.mako b/desktop/core/src/desktop/templates/common_header_footer_components.mako index 5be3e6c6f80..14d3ceef759 100644 --- a/desktop/core/src/desktop/templates/common_header_footer_components.mako +++ b/desktop/core/src/desktop/templates/common_header_footer_components.mako @@ -19,7 +19,6 @@ import sys from django.template.defaultfilters import escape, escapejs from desktop import conf -from desktop.lib.i18n import smart_unicode from desktop.views import _ko from beeswax.conf import LIST_PARTITIONS_LIMIT diff --git a/desktop/core/src/desktop/templates/common_header_m.mako b/desktop/core/src/desktop/templates/common_header_m.mako index 257e8549763..9f0bb8a0b81 100644 --- a/desktop/core/src/desktop/templates/common_header_m.mako +++ b/desktop/core/src/desktop/templates/common_header_m.mako @@ -17,7 +17,7 @@ import sys from desktop import conf -from desktop.lib.i18n import smart_unicode +from desktop.lib.i18n import smart_str from desktop.webpack_utils import get_hue_bundles from metadata.conf import has_optimizer, OPTIMIZER @@ -47,7 +47,7 @@ if USE_NEW_EDITOR.get(): <%def name="get_title(title)"> % if title: - ${smart_unicode(title).upper()} + ${smart_str(title).upper()} % endif diff --git a/desktop/core/src/desktop/templates/common_notebook_ko_components.mako b/desktop/core/src/desktop/templates/common_notebook_ko_components.mako index eb48ff749c5..f154dbabd17 100644 --- a/desktop/core/src/desktop/templates/common_notebook_ko_components.mako +++ b/desktop/core/src/desktop/templates/common_notebook_ko_components.mako @@ -19,7 +19,6 @@ import logging import sys from desktop import conf -from desktop.lib.i18n import smart_unicode from desktop.views import _ko from beeswax.conf import DOWNLOAD_ROW_LIMIT, DOWNLOAD_BYTES_LIMIT diff --git a/desktop/core/src/desktop/templates/config_ko_components.mako b/desktop/core/src/desktop/templates/config_ko_components.mako index 3180a35580e..5661c05e351 100644 --- a/desktop/core/src/desktop/templates/config_ko_components.mako +++ b/desktop/core/src/desktop/templates/config_ko_components.mako @@ -17,7 +17,6 @@ <%! import sys from desktop import conf -from desktop.lib.i18n import smart_unicode from desktop.views import _ko if sys.version_info[0] > 2: diff --git a/desktop/core/src/desktop/templates/document_browser.mako b/desktop/core/src/desktop/templates/document_browser.mako index 08fae1c765d..64d3dd83ba0 100644 --- a/desktop/core/src/desktop/templates/document_browser.mako +++ b/desktop/core/src/desktop/templates/document_browser.mako @@ -18,7 +18,6 @@ import sys from desktop import conf -from desktop.lib.i18n import smart_unicode from desktop.views import _ko if sys.version_info[0] > 2: diff --git a/desktop/core/src/desktop/templates/error.mako b/desktop/core/src/desktop/templates/error.mako index 8981f00a972..53a1a5b3070 100644 --- a/desktop/core/src/desktop/templates/error.mako +++ b/desktop/core/src/desktop/templates/error.mako @@ -16,7 +16,7 @@ <%! import sys from desktop.views import commonheader, commonfooter -from desktop.lib.i18n import smart_unicode +from desktop.lib.i18n import smart_str from desktop import conf from desktop.auth.backend import is_admin @@ -37,11 +37,11 @@ ${ commonheader(_('Error'), app_name, user, request, "40px") | n,unicode }

${ _('Error!') }

-

${ smart_unicode(error) }
+
${ smart_str(error) }
%if traceback and is_admin(user): %endif diff --git a/desktop/core/src/desktop/templates/hue.mako b/desktop/core/src/desktop/templates/hue.mako index 50dee07a29c..c012831c5f5 100644 --- a/desktop/core/src/desktop/templates/hue.mako +++ b/desktop/core/src/desktop/templates/hue.mako @@ -20,7 +20,7 @@ from desktop import conf from desktop.auth.backend import is_admin from desktop.conf import ENABLE_HUE_5, has_multi_clusters - from desktop.lib.i18n import smart_unicode + from desktop.lib.i18n import smart_str from desktop.models import hue_version from desktop.views import _ko, commonshare, login_modal from desktop.webpack_utils import get_hue_bundles @@ -328,7 +328,7 @@ ${ hueAceAutocompleter.hueAceAutocompleter() } ${ commonHeaderFooterComponents.header_pollers(user, is_s3_enabled, apps) } % if request is not None: -${ smart_unicode(login_modal(request).content) | n,unicode } +${ smart_str(login_modal(request).content) | n,unicode } % endif diff --git a/desktop/core/src/desktop/templates/ko_editor.mako b/desktop/core/src/desktop/templates/ko_editor.mako index ddd96dd39d8..6296fec1639 100644 --- a/desktop/core/src/desktop/templates/ko_editor.mako +++ b/desktop/core/src/desktop/templates/ko_editor.mako @@ -17,7 +17,6 @@ <%! import sys from desktop import conf -from desktop.lib.i18n import smart_unicode from desktop.views import _ko if sys.version_info[0] > 2: diff --git a/desktop/core/src/desktop/templates/ko_metastore.mako b/desktop/core/src/desktop/templates/ko_metastore.mako index 8712a29f176..8c66d65d514 100644 --- a/desktop/core/src/desktop/templates/ko_metastore.mako +++ b/desktop/core/src/desktop/templates/ko_metastore.mako @@ -17,7 +17,6 @@ <%! import sys from desktop import conf -from desktop.lib.i18n import smart_unicode if sys.version_info[0] > 2: from django.utils.translation import gettext as _ else: diff --git a/desktop/core/src/desktop/templates/logs.mako b/desktop/core/src/desktop/templates/logs.mako index a059e8ebc2f..f9394d680c5 100644 --- a/desktop/core/src/desktop/templates/logs.mako +++ b/desktop/core/src/desktop/templates/logs.mako @@ -18,7 +18,7 @@ import re import sys from desktop.lib.conf import BoundConfig -from desktop.lib.i18n import smart_unicode +from desktop.lib.i18n import smart_str from desktop.views import commonheader, commonfooter if sys.version_info[0] > 2: @@ -101,7 +101,7 @@ ${ layout.menubar(section='log_view') }
% for l in log: -
${ smart_unicode(l, errors='ignore') }
+
${ smart_str(l, errors='ignore') }
% endfor
diff --git a/desktop/core/src/desktop/templates/popup_error.mako b/desktop/core/src/desktop/templates/popup_error.mako index eebe7d2c0cb..2b38c8338fd 100644 --- a/desktop/core/src/desktop/templates/popup_error.mako +++ b/desktop/core/src/desktop/templates/popup_error.mako @@ -18,7 +18,7 @@ import sys from desktop.views import commonheader, commonfooter -from desktop.lib.i18n import smart_unicode +from desktop.lib.i18n import smart_str from desktop.auth.backend import is_admin if sys.version_info[0] > 2: @@ -38,9 +38,9 @@ ${ commonheader(title, "", user, request, "40px") | n,unicode }

${ _('Error!') }

- ${ smart_unicode(message) } + ${ smart_str(message) } % if detail: -

${ smart_unicode(detail) }

+

${ smart_str(detail) }

% endif
@@ -65,9 +65,9 @@ ${ commonheader(title, "", user, request, "40px") | n,unicode }
% for (file_name, line_number, function_name, text) in traceback: - - - + + + % endfor diff --git a/desktop/core/src/desktop/test_data/hue_5.10.db b/desktop/core/src/desktop/test_data/hue_5.10.db index b89f25024ea..cf0179c357a 100644 Binary files a/desktop/core/src/desktop/test_data/hue_5.10.db and b/desktop/core/src/desktop/test_data/hue_5.10.db differ diff --git a/desktop/core/src/desktop/test_data/hue_5.11.db b/desktop/core/src/desktop/test_data/hue_5.11.db index 9c5c99c5f14..f4ee9c6b5dd 100644 Binary files a/desktop/core/src/desktop/test_data/hue_5.11.db and b/desktop/core/src/desktop/test_data/hue_5.11.db differ diff --git a/desktop/core/src/desktop/test_data/hue_5.12.db b/desktop/core/src/desktop/test_data/hue_5.12.db index e380f0f20f7..7abc31a993d 100644 Binary files a/desktop/core/src/desktop/test_data/hue_5.12.db and b/desktop/core/src/desktop/test_data/hue_5.12.db differ diff --git a/desktop/core/src/desktop/test_data/hue_5.13.db b/desktop/core/src/desktop/test_data/hue_5.13.db index 1ae8fd78ca2..92599865759 100644 Binary files a/desktop/core/src/desktop/test_data/hue_5.13.db and b/desktop/core/src/desktop/test_data/hue_5.13.db differ diff --git a/desktop/core/src/desktop/test_data/hue_5.14.db b/desktop/core/src/desktop/test_data/hue_5.14.db index af0075caa63..fad3211eb6c 100644 Binary files a/desktop/core/src/desktop/test_data/hue_5.14.db and b/desktop/core/src/desktop/test_data/hue_5.14.db differ diff --git a/desktop/core/src/desktop/test_data/hue_5.15.db b/desktop/core/src/desktop/test_data/hue_5.15.db index ff640ec121e..14c1f0a1629 100644 Binary files a/desktop/core/src/desktop/test_data/hue_5.15.db and b/desktop/core/src/desktop/test_data/hue_5.15.db differ diff --git a/desktop/core/src/desktop/test_data/hue_5.7.db b/desktop/core/src/desktop/test_data/hue_5.7.db index 6ed7c12f57a..efa6a19caed 100644 Binary files a/desktop/core/src/desktop/test_data/hue_5.7.db and b/desktop/core/src/desktop/test_data/hue_5.7.db differ diff --git a/desktop/core/src/desktop/test_data/hue_5.8.db b/desktop/core/src/desktop/test_data/hue_5.8.db index f5d90beab0d..f8224c236e4 100644 Binary files a/desktop/core/src/desktop/test_data/hue_5.8.db and b/desktop/core/src/desktop/test_data/hue_5.8.db differ diff --git a/desktop/core/src/desktop/test_data/hue_5.9.db b/desktop/core/src/desktop/test_data/hue_5.9.db index 81f83f62768..f97346c362f 100644 Binary files a/desktop/core/src/desktop/test_data/hue_5.9.db and b/desktop/core/src/desktop/test_data/hue_5.9.db differ diff --git a/desktop/core/src/desktop/tests.py b/desktop/core/src/desktop/tests.py index 52b3a634105..e367d992110 100644 --- a/desktop/core/src/desktop/tests.py +++ b/desktop/core/src/desktop/tests.py @@ -1455,7 +1455,7 @@ def test_db_migrations_sqlite(): 'PASSWORD': '', 'HOST': '', 'PORT': '', - 'OPTIONS': {} if sys.version_info[0] > 2 else '', + 'OPTIONS': {}, 'ATOMIC_REQUESTS': True, 'CONN_MAX_AGE': 0, } diff --git a/desktop/core/src/desktop/views.py b/desktop/core/src/desktop/views.py index c033cacd2cb..7a14bd113dc 100644 --- a/desktop/core/src/desktop/views.py +++ b/desktop/core/src/desktop/views.py @@ -343,9 +343,9 @@ def download_log_view(request): # in case it is rather big. So we write it to a file line by line # and pass that file to zipfile, which might follow a more efficient path. tmp = tempfile.NamedTemporaryFile() - log_tmp = tempfile.NamedTemporaryFile("w+t") if sys.version_info[0] == 2 else tempfile.NamedTemporaryFile("w+t", encoding='utf-8') + log_tmp = tempfile.NamedTemporaryFile("w+t", encoding='utf-8') for line in buffer: - log_tmp.write(smart_str(l, errors='replace')) + log_tmp.write(smart_str(line, errors='replace')) # This is not just for show - w/out flush, we often get truncated logs log_tmp.flush() t = time.time() @@ -530,7 +530,7 @@ def serve_500_error(request, *args, **kwargs): else: tb = traceback.extract_tb(exc_info[2]) if is_ajax(request): - tb = '\n'.join(tb.format() if sys.version_info[0] > 2 else [str(t) for t in tb]) + tb = '\n'.join(tb.format()) return render("500.mako", request, {'traceback': tb}) else: # exc_info could be empty diff --git a/desktop/libs/aws/src/aws/conf.py b/desktop/libs/aws/src/aws/conf.py index f72158e3e9d..99a7906289b 100644 --- a/desktop/libs/aws/src/aws/conf.py +++ b/desktop/libs/aws/src/aws/conf.py @@ -16,21 +16,15 @@ import os import re -import sys import logging import requests +from django.utils.translation import gettext as _t, gettext_lazy as _ from desktop.lib.conf import Config, ConfigSection, UnspecifiedConfigSection, coerce_bool, coerce_password_from_script from desktop.lib.idbroker import conf as conf_idbroker from hadoop.core_site import get_raz_api_url, get_raz_s3_default_bucket, get_s3a_access_key, get_s3a_secret_key, get_s3a_session_token -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _t, gettext_lazy as _ -else: - from django.utils.translation import ugettext as _t, ugettext_lazy as _ - - LOG = logging.getLogger() diff --git a/desktop/libs/aws/src/aws/conf_tests.py b/desktop/libs/aws/src/aws/conf_tests.py index f8ad9f99621..be23732095c 100644 --- a/desktop/libs/aws/src/aws/conf_tests.py +++ b/desktop/libs/aws/src/aws/conf_tests.py @@ -13,26 +13,17 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import absolute_import import logging -import pytest -import sys -import unittest -from aws import conf +import pytest from django.test import TestCase +from aws import conf from desktop.conf import RAZ from desktop.lib.django_test_util import make_logged_in_client - from useradmin.models import User -if sys.version_info[0] > 2: - from unittest.mock import patch -else: - from mock import patch - LOG = logging.getLogger() diff --git a/desktop/libs/aws/src/aws/s3/s3connection.py b/desktop/libs/aws/src/aws/s3/s3connection.py index a5dba706683..5d6d6578d76 100644 --- a/desktop/libs/aws/src/aws/s3/s3connection.py +++ b/desktop/libs/aws/src/aws/s3/s3connection.py @@ -14,31 +14,22 @@ # See the License for the specific language governing permissions and # limitations under the License. -import boto import logging -import requests -import sys -import xml.sax - -if sys.version_info[0] > 2: - from urllib.parse import unquote, urlparse as lib_urlparse, parse_qs, urlencode -else: - from urllib import unquote, urlencode - from urlparse import urlparse as lib_urlparse, parse_qs +from urllib.parse import parse_qs, unquote, urlencode +import boto from boto.connection import HTTPRequest from boto.exception import BotoClientError from boto.regioninfo import connect from boto.resultset import ResultSet from boto.s3 import S3RegionInfo from boto.s3.bucket import Bucket, Key -from boto.s3.connection import S3Connection, NoHostProvided +from boto.s3.connection import NoHostProvided, S3Connection from boto.s3.prefix import Prefix from desktop.conf import RAZ from desktop.lib.raz.clients import S3RazClient - LOG = logging.getLogger() @@ -92,7 +83,6 @@ def __init__(self, username, aws_access_key_id=None, aws_secret_access_key=None, suppress_consec_slashes=suppress_consec_slashes, anon=anon, validate_certs=validate_certs, profile_name=profile_name) - def make_request(self, method, bucket='', key='', headers=None, data='', query_args=None, sender=None, override_num_retries=None, retry_handler=None): @@ -113,7 +103,7 @@ def make_request(self, method, bucket='', key='', headers=None, data='', if query_args: # Clean prefix to remove s3a%3A//[S3_BUCKET]/ for sending correct relative path to RAZ if 'prefix=s3a%3A//' in query_args: - qs_parsed = parse_qs(query_args) # all strings will be unquoted + qs_parsed = parse_qs(query_args) # all strings will be unquoted prefix_relative_path = qs_parsed['prefix'][0].partition(bucket + '/')[2] qs_parsed['prefix'][0] = prefix_relative_path @@ -149,13 +139,11 @@ def make_request(self, method, bucket='', key='', headers=None, data='', return self._mexe(http_request, sender, override_num_retries, retry_handler=retry_handler) - def get_signed_url(self, action='GET', url=None, headers=None, data=None): raz_client = S3RazClient(username=self.username) return raz_client.get_url(action, url, headers, data) - def _required_auth_capability(self): """ Force AnonAuthHandler when Raz is enabled diff --git a/desktop/libs/aws/src/aws/s3/s3connection_test.py b/desktop/libs/aws/src/aws/s3/s3connection_test.py index bdf573a1cda..0c958b0fdfc 100644 --- a/desktop/libs/aws/src/aws/s3/s3connection_test.py +++ b/desktop/libs/aws/src/aws/s3/s3connection_test.py @@ -14,22 +14,17 @@ # See the License for the specific language governing permissions and # limitations under the License. -import logging -import requests -import six import sys +import logging +from unittest.mock import Mock, patch -from desktop.conf import RAZ +import six +import requests from aws.client import _make_client from aws.s3.s3connection import RazS3Connection from aws.s3.s3test_utils import S3TestBase - -if sys.version_info[0] > 2: - from unittest.mock import patch, Mock -else: - from mock import patch, Mock - +from desktop.conf import RAZ LOG = logging.getLogger() @@ -68,9 +63,7 @@ def test_list_buckets(self): raise SkipTest() # Incorrect in Py3 CircleCi assert 'GET' == http_request.method - assert ( - ('s3-us-west-1.amazonaws.com:443' if sys.version_info[0] > 2 else 's3-us-west-1.amazonaws.com') == - http_request.host) + assert 's3-us-west-1.amazonaws.com:443' == http_request.host assert '/' == http_request.path assert '/' == http_request.auth_path assert ({ diff --git a/desktop/libs/aws/src/aws/s3/s3fs.py b/desktop/libs/aws/src/aws/s3/s3fs.py index 2718e7f6403..c890c2a18c7 100644 --- a/desktop/libs/aws/src/aws/s3/s3fs.py +++ b/desktop/libs/aws/src/aws/s3/s3fs.py @@ -14,21 +14,22 @@ # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import absolute_import - import os import re -import sys import time import logging import itertools import posixpath +import urllib.error +import urllib.request from builtins import object, str +from urllib.parse import urlparse as lib_urlparse from boto.exception import BotoClientError, S3ResponseError from boto.s3.connection import Location from boto.s3.key import Key from boto.s3.prefix import Prefix +from django.utils.translation import gettext as _ from aws import s3 from aws.conf import AWS_ACCOUNTS, PERMISSION_ACTION_S3, get_default_region, get_locations, is_raz_s3 @@ -36,18 +37,6 @@ from aws.s3.s3stat import S3Stat from filebrowser.conf import REMOTE_STORAGE_HOME -if sys.version_info[0] > 2: - import urllib.error - import urllib.request - from urllib.parse import quote as urllib_quote, urlparse as lib_urlparse - - from django.utils.translation import gettext as _ -else: - from urllib import quote as urllib_quote - - from django.utils.translation import ugettext as _ - from urlparse import urlparse as lib_urlparse - DEFAULT_READ_SIZE = 1024 * 1024 # 1MB BUCKET_NAME_PATTERN = re.compile( r"^((?:(?:[a-zA-Z0-9]|[a-zA-Z0-9][a-zA-Z0-9_\-]*[a-zA-Z0-9])\.)*(?:[A-Za-z0-9]|[A-Za-z0-9][A-Za-z0-9_\-]*[A-Za-z0-9]))$") diff --git a/desktop/libs/aws/src/aws/s3/upload.py b/desktop/libs/aws/src/aws/s3/upload.py index 9e2edc34e22..e3df72d38a7 100644 --- a/desktop/libs/aws/src/aws/s3/upload.py +++ b/desktop/libs/aws/src/aws/s3/upload.py @@ -21,13 +21,13 @@ See http://docs.djangoproject.com/en/1.9/topics/http/file-uploads/ """ -import io -import sys import logging import unicodedata +from io import BytesIO as stream_io from django.core.files.uploadedfile import SimpleUploadedFile from django.core.files.uploadhandler import FileUploadHandler, SkipFile, StopFutureHandlers, StopUpload, UploadFileException +from django.utils.translation import gettext as _ from aws.s3 import parse_uri from aws.s3.s3fs import S3FileSystemException @@ -36,16 +36,6 @@ from desktop.lib.fsmanager import get_client from filebrowser.utils import calculate_total_size, generate_chunks -if sys.version_info[0] > 2: - from io import BytesIO as stream_io -else: - from cStringIO import StringIO as stream_io - -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - DEFAULT_WRITE_SIZE = 1024 * 1024 * 128 # TODO: set in configuration (currently 128 MiB) LOG = logging.getLogger() diff --git a/desktop/libs/azure/src/azure/abfs/abfs.py b/desktop/libs/azure/src/azure/abfs/abfs.py index 6c963e7dbc2..6a22f640a67 100644 --- a/desktop/libs/azure/src/azure/abfs/abfs.py +++ b/desktop/libs/azure/src/azure/abfs/abfs.py @@ -20,8 +20,6 @@ """ import os -import re -import sys import logging import threading import urllib.error @@ -585,9 +583,6 @@ def rename(self, old, new): Renames a file """ rename_source = Init_ABFS.strip_scheme(old) - if sys.version_info[0] < 3 and isinstance(rename_source, unicode): - rename_source = rename_source.encode('utf-8') - headers = {'x-ms-rename-source': '/' + urllib_quote(rename_source)} try: @@ -660,7 +655,7 @@ def _local_copy_file(self, local_src, remote_dst, chunk_size=UPLOAD_CHUCK_SIZE): offset += size chunk = src.read(chunk_size) self.flush(remote_dst, params={'position': offset}) - except: + except Exception: LOG.exception(_('Copying %s -> %s failed.') % (local_src, remote_dst)) raise finally: diff --git a/desktop/libs/azure/src/azure/abfs/upload.py b/desktop/libs/azure/src/azure/abfs/upload.py index 730e58a505f..f5c76390f5e 100644 --- a/desktop/libs/azure/src/azure/abfs/upload.py +++ b/desktop/libs/azure/src/azure/abfs/upload.py @@ -14,12 +14,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import logging import unicodedata +from io import StringIO as string_io from django.core.files.uploadedfile import SimpleUploadedFile from django.core.files.uploadhandler import FileUploadHandler, SkipFile, StopFutureHandlers, StopUpload, UploadFileException +from django.utils.translation import gettext as _ from azure.abfs.__init__ import parse_uri from azure.abfs.abfs import ABFSFileSystemException @@ -28,16 +29,6 @@ from desktop.lib.fsmanager import get_client from filebrowser.utils import calculate_total_size, generate_chunks -if sys.version_info[0] > 2: - from io import BytesIO, StringIO as string_io -else: - from cStringIO import StringIO as string_io - -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - DEFAULT_WRITE_SIZE = 100 * 1024 * 1024 # As per Azure doc, maximum blob size is 100MB LOG = logging.getLogger() diff --git a/desktop/libs/azure/src/azure/conf.py b/desktop/libs/azure/src/azure/conf.py index 5bcdf051b5f..858f267bc13 100644 --- a/desktop/libs/azure/src/azure/conf.py +++ b/desktop/libs/azure/src/azure/conf.py @@ -18,15 +18,12 @@ import sys import logging +from django.utils.translation import gettext_lazy as _t + from desktop.lib.conf import Config, ConfigSection, UnspecifiedConfigSection, coerce_bool, coerce_password_from_script from desktop.lib.idbroker import conf as conf_idbroker from hadoop import core_site -if sys.version_info[0] > 2: - from django.utils.translation import gettext_lazy as _t -else: - from django.utils.translation import ugettext_lazy as _t - LOG = logging.getLogger() PERMISSION_ACTION_ABFS = "abfs_access" diff --git a/desktop/libs/dashboard/src/dashboard/api.py b/desktop/libs/dashboard/src/dashboard/api.py index 12f594ad557..2f94fc8970c 100644 --- a/desktop/libs/dashboard/src/dashboard/api.py +++ b/desktop/libs/dashboard/src/dashboard/api.py @@ -15,42 +15,39 @@ # See the License for the specific language governing permissions and # limitations under the License. -from builtins import filter -import hashlib import json -import logging -import sys import uuid +import hashlib +import logging + +from django.utils.encoding import force_str +from django.utils.translation import gettext as _ +from dashboard.conf import USE_GRIDSTER, get_engines +from dashboard.controller import can_edit_index +from dashboard.dashboard_api import get_engine +from dashboard.data_export import download as export_download +from dashboard.decorators import allow_viewer_only +from dashboard.facet_builder import _guess_gap, _new_range_facet, _zoom_range_facet +from dashboard.models import ( + COMPARE_FACET, + NESTED_FACET_FORM, + QUERY_FACET, + Collection2, + augment_solr_exception, + augment_solr_response, + extract_solr_exception_message, + pairwise2, +) from desktop.conf import ENABLE_DOWNLOAD from desktop.lib.django_util import JsonResponse from desktop.lib.exceptions_renderable import PopupException from desktop.lib.rest.http_client import RestException from desktop.models import Document2 from desktop.views import serve_403_error - from libsolr.api import SolrApi - -from notebook.connectors.base import get_api -from notebook.dashboard_api import MockRequest from search.conf import SOLR_URL -from dashboard.conf import get_engines, USE_GRIDSTER -from dashboard.controller import can_edit_index -from dashboard.dashboard_api import get_engine -from dashboard.data_export import download as export_download -from dashboard.decorators import allow_viewer_only -from dashboard.facet_builder import _guess_gap, _zoom_range_facet, _new_range_facet -from dashboard.models import Collection2, augment_solr_response, pairwise2, augment_solr_exception,\ - NESTED_FACET_FORM, COMPARE_FACET, QUERY_FACET, extract_solr_exception_message - -if sys.version_info[0] > 2: - from django.utils.encoding import force_str - from django.utils.translation import gettext as _ -else: - from django.utils.encoding import force_unicode as force_str - from django.utils.translation import ugettext as _ - LOG = logging.getLogger() @@ -122,12 +119,12 @@ def index_fields_dynamic(request): result['message'] = '' result['fields'] = [ - Collection2._make_field(name, properties) - for name, properties in dynamic_fields['fields'].items() if 'dynamicBase' in properties + Collection2._make_field(name, properties) for name, properties in dynamic_fields['fields'].items() if 'dynamicBase' in properties ] result['gridlayout_header_fields'] = [ - Collection2._make_gridlayout_header_field({'name': name, 'type': properties.get('type')}, True) - for name, properties in dynamic_fields['fields'].items() if 'dynamicBase' in properties + Collection2._make_gridlayout_header_field({'name': name, 'type': properties.get('type')}, True) + for name, properties in dynamic_fields['fields'].items() + if 'dynamicBase' in properties ] result['status'] = 0 except Exception as e: @@ -197,9 +194,9 @@ def update_document(request): if document['hasChanged']: edits = { - "id": doc_id, + "id": doc_id, } - version = None # If there is a version, use it to avoid potential concurrent update conflicts + version = None # If there is a version, use it to avoid potential concurrent update conflicts for field in document['details']: if field['hasChanged'] and field['key'] != '_version_': @@ -207,7 +204,9 @@ def update_document(request): if field['key'] == '_version_': version = field['value'] - result['update'] = SolrApi(SOLR_URL.get(), request.user).update(collection['name'], json.dumps([edits]), content_type='json', version=version) + result['update'] = SolrApi(SOLR_URL.get(), request.user).update( + collection['name'], json.dumps([edits]), content_type='json', version=version + ) result['message'] = _('Document successfully updated.') result['status'] = 0 else: @@ -216,7 +215,7 @@ def update_document(request): except RestException as e: try: result['message'] = json.loads(e.message)['error']['msg'] - except: + except Exception: LOG.exception('Failed to parse json response') result['message'] = force_str(e) except Exception as e: @@ -271,7 +270,7 @@ def get_terms(request): # maxcount } if analysis['terms']['prefix']: - properties['terms.regex'] = '.*%(prefix)s.*' % analysis['terms'] # Use regexp instead of case sensitive 'terms.prefix' + properties['terms.regex'] = '.*%(prefix)s.*' % analysis['terms'] # Use regexp instead of case sensitive 'terms.prefix' properties['terms.regex.flag'] = 'case_insensitive' result['terms'] = SolrApi(SOLR_URL.get(), request.user).terms(collection['name'], field, properties) @@ -380,7 +379,6 @@ def new_facet(request): widget_type = request.POST.get('widget_type') window_size = request.POST.get('window_size') - result['message'] = '' result['facet'] = _create_facet(collection, request.user, facet_id, facet_label, facet_field, widget_type, window_size) result['status'] = 0 @@ -400,35 +398,37 @@ def _create_facet(collection, user, facet_id, facet_label, facet_field, widget_t 'missing': False, 'isDate': False, 'slot': 0, - 'aggregate': {'function': 'unique', 'formula': '', 'plain_formula': '', 'percentile': 50} + 'aggregate': {'function': 'unique', 'formula': '', 'plain_formula': '', 'percentile': 50}, } template = { - "showFieldList": True, - "showGrid": False, - "showChart": True, - "chartSettings" : { - 'chartType': 'pie' if widget_type == 'pie2-widget' else ('timeline' if widget_type == 'timeline-widget' else ('gradientmap' if widget_type == 'gradient-map-widget' else 'bars')), - 'chartSorting': 'none', - 'chartScatterGroup': None, - 'chartScatterSize': None, - 'chartScope': 'world', - 'chartX': None, - 'chartYSingle': None, - 'chartYMulti': [], - 'chartData': [], - 'chartMapLabel': None, - 'chartSelectorType': 'bar' - }, - "fieldsAttributes": [], - "fieldsAttributesFilter": "", - "filteredAttributeFieldsAll": True, - "fields": [], - "fieldsSelected": [], - "leafletmap": {'latitudeField': None, 'longitudeField': None, 'labelField': None}, # Use own? - 'leafletmapOn': False, - 'isGridLayout': False, - "hasDataForChart": True, - "rows": 25, + "showFieldList": True, + "showGrid": False, + "showChart": True, + "chartSettings": { + 'chartType': 'pie' + if widget_type == 'pie2-widget' + else ('timeline' if widget_type == 'timeline-widget' else ('gradientmap' if widget_type == 'gradient-map-widget' else 'bars')), + 'chartSorting': 'none', + 'chartScatterGroup': None, + 'chartScatterSize': None, + 'chartScope': 'world', + 'chartX': None, + 'chartYSingle': None, + 'chartYMulti': [], + 'chartData': [], + 'chartMapLabel': None, + 'chartSelectorType': 'bar', + }, + "fieldsAttributes": [], + "fieldsAttributesFilter": "", + "filteredAttributeFieldsAll": True, + "fields": [], + "fieldsSelected": [], + "leafletmap": {'latitudeField': None, 'longitudeField': None, 'labelField': None}, # Use own? + 'leafletmapOn': False, + 'isGridLayout': False, + "hasDataForChart": True, + "rows": 25, } if widget_type in ('tree-widget', 'heatmap-widget', 'map-widget'): facet_type = 'pivot' @@ -438,14 +438,27 @@ def _create_facet(collection, user, facet_id, facet_label, facet_field, widget_t properties['statementUuid'] = collection['selectedDocument'].get('uuid') doc = Document2.objects.get_by_uuid(user=user, uuid=collection['selectedDocument']['uuid'], perm_type='read') snippets = doc.data_dict.get('snippets', []) - properties['result'] = {'handle': {'statement_id': 0, 'statements_count': 1, 'previous_statement_hash': hashlib.sha224(str(uuid.uuid4())).hexdigest()}} + properties['result'] = { + 'handle': {'statement_id': 0, 'statements_count': 1, 'previous_statement_hash': hashlib.sha224(str(uuid.uuid4())).hexdigest()} + } if snippets: properties['engine'] = snippets[0]['type'] else: properties['statementUuid'] = '' properties['statement'] = '' properties['uuid'] = facet_field - properties['facets'] = [{'canRange': False, 'field': 'blank', 'limit': 10, 'mincount': 0, 'sort': 'desc', 'aggregate': {'function': 'count'}, 'isDate': False, 'type': 'field'}] + properties['facets'] = [ + { + 'canRange': False, + 'field': 'blank', + 'limit': 10, + 'mincount': 0, + 'sort': 'desc', + 'aggregate': {'function': 'count'}, + 'isDate': False, + 'type': 'field', + } + ] facet_type = 'statement' else: api = get_engine(user, collection) @@ -460,7 +473,15 @@ def _create_facet(collection, user, facet_id, facet_label, facet_field, widget_t else: facet_type = 'field' - if widget_type in ('bucket-widget', 'pie2-widget', 'timeline-widget', 'tree2-widget', 'text-facet-widget', 'hit-widget', 'gradient-map-widget'): + if widget_type in ( + 'bucket-widget', + 'pie2-widget', + 'timeline-widget', + 'tree2-widget', + 'text-facet-widget', + 'hit-widget', + 'gradient-map-widget', + ): # properties = {'canRange': False, 'stacked': False, 'limit': 10} # TODO: Lighter weight top nested facet properties['facets_form'] = NESTED_FACET_FORM @@ -546,7 +567,7 @@ def _create_facet(collection, user, facet_id, facet_label, facet_field, widget_t 'properties': properties, # Hue 4+ 'template': template, - 'queryResult': {} + 'queryResult': {}, } @@ -564,7 +585,7 @@ def get_range_facet(request): if action == 'select': properties = _guess_gap(solr_api, collection, facet, facet['properties']['start'], facet['properties']['end']) else: - properties = _zoom_range_facet(solr_api, collection, facet) # Zoom out + properties = _zoom_range_facet(solr_api, collection, facet) # Zoom out result['properties'] = properties result['status'] = 0 diff --git a/desktop/libs/dashboard/src/dashboard/conf.py b/desktop/libs/dashboard/src/dashboard/conf.py index d262655b3fa..a8562ae8875 100644 --- a/desktop/libs/dashboard/src/dashboard/conf.py +++ b/desktop/libs/dashboard/src/dashboard/conf.py @@ -17,16 +17,12 @@ import sys -from desktop.lib.conf import Config, UnspecifiedConfigSection, ConfigSection, coerce_bool -from desktop.appmanager import get_apps_dict +from django.utils.translation import gettext as _, gettext_lazy as _t +from desktop.appmanager import get_apps_dict +from desktop.lib.conf import Config, ConfigSection, UnspecifiedConfigSection, coerce_bool from notebook.conf import get_ordered_interpreters -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _, gettext_lazy as _t -else: - from django.utils.translation import ugettext as _, ugettext_lazy as _t - def is_enabled(): """Automatic when search is enabled.""" @@ -91,6 +87,7 @@ def is_enabled(): default=False ) + def get_properties(): if ENGINES.get(): engines = ENGINES.get() @@ -112,6 +109,7 @@ def get_properties(): }, } + def get_engines(user): engines = [] apps = get_apps_dict(user=user) @@ -139,7 +137,6 @@ def get_engines(user): return engines - ENGINES = UnspecifiedConfigSection( "engines", help="One entry for each type of snippet.", diff --git a/desktop/libs/dashboard/src/dashboard/decorators.py b/desktop/libs/dashboard/src/dashboard/decorators.py index 337dde06752..116d62caca8 100644 --- a/desktop/libs/dashboard/src/dashboard/decorators.py +++ b/desktop/libs/dashboard/src/dashboard/decorators.py @@ -15,21 +15,17 @@ # See the License for the specific language governing permissions and # limitations under the License. -import logging -import json import sys +import json +import logging from django.utils.functional import wraps +from django.utils.translation import gettext as _ from desktop.conf import USE_NEW_EDITOR from desktop.lib.exceptions_renderable import PopupException from desktop.models import Document2 -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - LOG = logging.getLogger() diff --git a/desktop/libs/dashboard/src/dashboard/facet_builder.py b/desktop/libs/dashboard/src/dashboard/facet_builder.py index 42303889f2c..72aea6d2d4f 100644 --- a/desktop/libs/dashboard/src/dashboard/facet_builder.py +++ b/desktop/libs/dashboard/src/dashboard/facet_builder.py @@ -16,30 +16,19 @@ # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import division, print_function -from future import standard_library -standard_library.install_aliases() - -from builtins import str, range - -import logging +import re import math +import logging import numbers -import re -import sys -import urllib.request, urllib.parse, urllib.error - +import urllib.error +import urllib.parse +import urllib.request from datetime import datetime, timedelta -from math import ceil -from math import log +from math import ceil, log from time import mktime -from dateutil.relativedelta import * - -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ +from dateutil.relativedelta import * +from django.utils.translation import gettext as _ LOG = logging.getLogger() @@ -59,17 +48,9 @@ 'DAYS': DAY_MS, 'WEEKS': WEEK_MS, 'MONTHS': MONTH_MS, - 'YEARS': YEAR_MS -} -TIME_INTERVALS_STARTING_VALUE = { - 'microsecond': 0, - 'second': 0, - 'minute': 0, - 'hour': 0, - 'day': 1, - 'month': 1, - 'year': 0 + 'YEARS': YEAR_MS, } +TIME_INTERVALS_STARTING_VALUE = {'microsecond': 0, 'second': 0, 'minute': 0, 'hour': 0, 'day': 1, 'month': 1, 'year': 0} TIME_INTERVAL_SORTED = ['microsecond', 'second', 'minute', 'hour', 'day', 'month'] TIME_INTERVALS = [ {'coeff': 1, 'unit': 'SECONDS'}, @@ -97,20 +78,24 @@ {'coeff': 1, 'unit': 'MONTHS'}, {'coeff': 3, 'unit': 'MONTHS'}, {'coeff': 6, 'unit': 'MONTHS'}, - {'coeff': 1, 'unit': 'YEARS'}]; + {'coeff': 1, 'unit': 'YEARS'}, +] for interval in TIME_INTERVALS: interval['ms'] = TIME_INTERVALS_MS[interval['unit']] * interval['coeff'] + def utf_quoter(what): return urllib.parse.quote(str(what).encode('utf-8'), safe='~@#$&()*!+=:;,.?/\'') -def _guess_range_facet(widget_type, solr_api, collection, facet_field, properties, start=None, end=None, gap=None, window_size=None, slot = 0): +def _guess_range_facet( + widget_type, solr_api, collection, facet_field, properties, start=None, end=None, gap=None, window_size=None, slot=0 +): try: stats_json = solr_api.stats(collection['name'], [facet_field]) stat_facet = stats_json['stats']['stats_fields'][facet_field] - _compute_range_facet(widget_type, stat_facet, properties, start, end, gap, window_size = window_size, SLOTS = slot) + _compute_range_facet(widget_type, stat_facet, properties, start, end, gap, window_size=window_size, SLOTS=slot) except Exception as e: print(e) LOG.info('Stats not supported on all the fields, like text: %s' % e) @@ -120,7 +105,7 @@ def _get_interval(domain_ms, SLOTS): biggest_interval = TIME_INTERVALS[len(TIME_INTERVALS) - 1] biggest_interval_is_too_small = math.floor(domain_ms / biggest_interval['ms']) > SLOTS if biggest_interval_is_too_small: - coeff = min(ceil(math.floor(domain_ms / SLOTS)), 100) # If we go over 100 years, something has gone wrong. + coeff = min(ceil(math.floor(domain_ms / SLOTS)), 100) # If we go over 100 years, something has gone wrong. return {'ms': YEAR_MS * coeff, 'coeff': coeff, 'unit': 'YEARS'} for i in range(len(TIME_INTERVALS) - 2, 0, -1): @@ -130,11 +115,13 @@ def _get_interval(domain_ms, SLOTS): return TIME_INTERVALS[0] + def _format_interval(interval): return '+' + str(interval['coeff']) + interval['unit'] + def _get_interval_duration(text): - regex = re.search('.*-(\d*)(.*)', text) + regex = re.search(r'.*-(\d*)(.*)', text) if regex: groups = regex.groups() @@ -142,6 +129,7 @@ def _get_interval_duration(text): return TIME_INTERVALS_MS[groups[1]] * int(groups[0]) return 0 + def _clamp_date(interval, time): gap_duration_lower = interval['unit'].lower() gap_duration_lowers = gap_duration_lower[:-1] # Removes 's' @@ -153,6 +141,7 @@ def _clamp_date(interval, time): break return time + def _get_next_interval(interval, start_time, do_at_least_once): time = start_time if interval.get('start_relative'): @@ -161,107 +150,119 @@ def _get_next_interval(interval, start_time, do_at_least_once): gap_duration_lower = interval['unit'].lower() gap_duration_lowers = gap_duration_lower[:-1] # Removes 's' gap_duration = int(interval['coeff']) - while (getattr(time, gap_duration_lowers) - TIME_INTERVALS_STARTING_VALUE[gap_duration_lowers]) % gap_duration or (do_at_least_once and time == start_time): # Do while + while (getattr(time, gap_duration_lowers) - TIME_INTERVALS_STARTING_VALUE[gap_duration_lowers]) % gap_duration or ( + do_at_least_once and time == start_time + ): # Do while kwargs = {gap_duration_lower: 1} time = time + relativedelta(**kwargs) return time + def _remove_duration(interval, nb_slot, time): gap_duration_lower = interval['unit'].lower() gap_duration = int(interval['coeff']) * nb_slot kwargs = {gap_duration_lower: -1 * gap_duration} return time + relativedelta(**kwargs) + def _compute_range_facet(widget_type, stat_facet, properties, start=None, end=None, gap=None, SLOTS=0, window_size=None): - if SLOTS == 0: - if widget_type == 'pie-widget' or widget_type == 'pie2-widget': - SLOTS = 5 - elif widget_type == 'facet-widget' or widget_type == 'text-facet-widget' or widget_type == 'histogram-widget' or widget_type == 'bar-widget' or widget_type == 'bucket-widget' or widget_type == 'timeline-widget': - if window_size: - SLOTS = math.floor(int(window_size) / 75) # Value is determined as the thinnest space required to display a timestamp on x axis - else: - SLOTS = 10 + if SLOTS == 0: + if widget_type == 'pie-widget' or widget_type == 'pie2-widget': + SLOTS = 5 + elif ( + widget_type == 'facet-widget' + or widget_type == 'text-facet-widget' + or widget_type == 'histogram-widget' + or widget_type == 'bar-widget' + or widget_type == 'bucket-widget' + or widget_type == 'timeline-widget' + ): + if window_size: + SLOTS = math.floor(int(window_size) / 75) # Value is determined as the thinnest space required to display a timestamp on x axis else: - SLOTS = 100 - - is_date = widget_type == 'timeline-widget' - - if isinstance(stat_facet['min'], numbers.Number): - stats_min = int(stat_facet['min']) # Cast floats to int currently - stats_max = int(stat_facet['max']) - if start is None: - if widget_type == 'line-widget': - start, _ = _round_thousand_range(stats_min) - else: - start, _ = _round_number_range(stats_min) + SLOTS = 10 + else: + SLOTS = 100 + + is_date = widget_type == 'timeline-widget' + + if isinstance(stat_facet['min'], numbers.Number): + stats_min = int(stat_facet['min']) # Cast floats to int currently + stats_max = int(stat_facet['max']) + if start is None: + if widget_type == 'line-widget': + start, _ = _round_thousand_range(stats_min) else: - start = int(start) - if end is None: - if widget_type == 'line-widget': - _, end = _round_thousand_range(stats_max) - else: - _, end = _round_number_range(stats_max) + start, _ = _round_number_range(stats_min) + else: + start = int(start) + if end is None: + if widget_type == 'line-widget': + _, end = _round_thousand_range(stats_max) else: - end = int(end) - - if gap is None: - gap = int(math.floor((end - start) / SLOTS)) - if gap < 1: - gap = 1 - - end = max(end, stats_max) - elif re.match('\d\d\d\d-\d\d-\d\dT\d\d:\d\d:\d\d(\.\d\d?\d?)?Z', stat_facet['min']): - is_date = True - stats_min = stat_facet['min'] - stats_max = stat_facet['max'] - start_was_none = False - if start is None: - start_was_none = True - start = stats_min - start = re.sub('\.\d\d?\d?Z$', 'Z', start) - try: - start_ts = datetime.strptime(start, '%Y-%m-%dT%H:%M:%SZ') - start_ts.strftime('%Y-%m-%dT%H:%M:%SZ') # Check for dates before 1900 - except Exception as e: - LOG.error('Bad date: %s' % e) - start_ts = datetime.strptime('1970-01-01T00:00:00Z', '%Y-%m-%dT%H:%M:%SZ') - - if end is None: - end = stats_max - end = re.sub('\.\d\d?\d?Z$', 'Z', end) - try: - end_ts = datetime.strptime(end, '%Y-%m-%dT%H:%M:%SZ') - end_ts.strftime('%Y-%m-%dT%H:%M:%SZ') # Check for dates before 1900 - except Exception as e: - LOG.error('Bad date: %s' % e) - end_ts = datetime.strptime('2050-01-01T00:00:00Z', '%Y-%m-%dT%H:%M:%SZ') - end = end_ts.strftime('%Y-%m-%dT%H:%M:%SZ') - domain_ms = (mktime(end_ts.timetuple()) - mktime(start_ts.timetuple())) * 1000 - interval = _get_interval(domain_ms, SLOTS) + _, end = _round_number_range(stats_max) + else: + end = int(end) + + if gap is None: + gap = int(math.floor((end - start) / SLOTS)) + if gap < 1: + gap = 1 + + end = max(end, stats_max) + elif re.match(r'\d\d\d\d-\d\d-\d\dT\d\d:\d\d:\d\d(\.\d\d?\d?)?Z', stat_facet['min']): + is_date = True + stats_min = stat_facet['min'] + stats_max = stat_facet['max'] + start_was_none = False + if start is None: + start_was_none = True + start = stats_min + start = re.sub(r'\.\d\d?\d?Z$', 'Z', start) + try: + start_ts = datetime.strptime(start, '%Y-%m-%dT%H:%M:%SZ') + start_ts.strftime('%Y-%m-%dT%H:%M:%SZ') # Check for dates before 1900 + except Exception as e: + LOG.error('Bad date: %s' % e) + start_ts = datetime.strptime('1970-01-01T00:00:00Z', '%Y-%m-%dT%H:%M:%SZ') + + if end is None: + end = stats_max + end = re.sub(r'\.\d\d?\d?Z$', 'Z', end) + try: + end_ts = datetime.strptime(end, '%Y-%m-%dT%H:%M:%SZ') + end_ts.strftime('%Y-%m-%dT%H:%M:%SZ') # Check for dates before 1900 + except Exception as e: + LOG.error('Bad date: %s' % e) + end_ts = datetime.strptime('2050-01-01T00:00:00Z', '%Y-%m-%dT%H:%M:%SZ') + end = end_ts.strftime('%Y-%m-%dT%H:%M:%SZ') + domain_ms = (mktime(end_ts.timetuple()) - mktime(start_ts.timetuple())) * 1000 + interval = _get_interval(domain_ms, SLOTS) + start = start_ts.strftime('%Y-%m-%dT%H:%M:%SZ') + gap = _format_interval(interval) + if start_was_none: + start_ts = _clamp_date(interval, start_ts) start = start_ts.strftime('%Y-%m-%dT%H:%M:%SZ') - gap = _format_interval(interval) - if start_was_none: - start_ts = _clamp_date(interval, start_ts) - start = start_ts.strftime('%Y-%m-%dT%H:%M:%SZ') - stats_max = end - stats_min = start - else: - start = start_ts.strftime('%Y-%m-%dT%H:%M:%SZ') - elif stat_facet['max'] == 'NOW': - is_date = True - domain_ms = _get_interval_duration(stat_facet['min']) - interval = _get_interval(domain_ms, SLOTS) - nb_slot = math.floor(domain_ms / interval['ms']) - gap = _format_interval(interval) - end_ts = datetime.utcnow() - end_ts_clamped = _clamp_date(interval, end_ts) - end_ts = _get_next_interval(interval, end_ts_clamped, end_ts_clamped != end_ts) - start_ts = _remove_duration(interval, nb_slot, end_ts) - stats_max = end = end_ts.strftime('%Y-%m-%dT%H:%M:%SZ') - stats_min = start = start_ts.strftime('%Y-%m-%dT%H:%M:%SZ') - - properties.update({ + stats_max = end + stats_min = start + else: + start = start_ts.strftime('%Y-%m-%dT%H:%M:%SZ') + elif stat_facet['max'] == 'NOW': + is_date = True + domain_ms = _get_interval_duration(stat_facet['min']) + interval = _get_interval(domain_ms, SLOTS) + nb_slot = math.floor(domain_ms / interval['ms']) + gap = _format_interval(interval) + end_ts = datetime.utcnow() + end_ts_clamped = _clamp_date(interval, end_ts) + end_ts = _get_next_interval(interval, end_ts_clamped, end_ts_clamped != end_ts) + start_ts = _remove_duration(interval, nb_slot, end_ts) + stats_max = end = end_ts.strftime('%Y-%m-%dT%H:%M:%SZ') + stats_min = start = start_ts.strftime('%Y-%m-%dT%H:%M:%SZ') + + properties.update( + { 'min': stats_min, 'max': stats_max, 'start': start, @@ -270,13 +271,11 @@ def _compute_range_facet(widget_type, stat_facet, properties, start=None, end=No 'slot': SLOTS, 'canRange': True, 'isDate': is_date, - }) + } + ) - if widget_type == 'histogram-widget': - properties.update({ - 'timelineChartType': 'bar', - 'enableSelection': True - }) + if widget_type == 'histogram-widget': + properties.update({'timelineChartType': 'bar', 'enableSelection': True}) def _round_date_range(tm): @@ -284,33 +283,38 @@ def _round_date_range(tm): end = start + timedelta(seconds=60) return start, end + def _round_number_range(n): if n <= 10: return n, n + 1 else: i = int(log(n, 10)) end = int(round(n, -i)) - start = end - 10 ** i + start = end - 10**i return start, end + def _round_thousand_range(n): if n <= 10: return 0, 0 else: i = int(log(n, 10)) - start = 10 ** i + start = 10**i end = 10 ** (i + 1) return start, end + def _guess_gap(solr_api, collection, facet, start=None, end=None): properties = {} - _guess_range_facet(facet['widgetType'], solr_api, collection, facet['field'], properties, start=start, end=end, slot = facet.get('properties', facet)['slot']) + _guess_range_facet( + facet['widgetType'], solr_api, collection, facet['field'], properties, start=start, end=end, slot=facet.get('properties', facet)['slot'] + ) return properties def _new_range_facet(solr_api, collection, facet_field, widget_type, window_size): properties = {} - _guess_range_facet(widget_type, solr_api, collection, facet_field, properties, window_size = window_size) + _guess_range_facet(widget_type, solr_api, collection, facet_field, properties, window_size=window_size) return properties diff --git a/desktop/libs/dashboard/src/dashboard/models.py b/desktop/libs/dashboard/src/dashboard/models.py index d0a241876a7..754574d5c22 100644 --- a/desktop/libs/dashboard/src/dashboard/models.py +++ b/desktop/libs/dashboard/src/dashboard/models.py @@ -17,33 +17,25 @@ from __future__ import division -from builtins import next -from builtins import str -from builtins import zip -from builtins import object -import collections -import datetime -import dateutil -import itertools +import re +import sys import json import logging import numbers -import re -import sys +import datetime +import itertools +import collections +from builtins import next, object, str, zip +import dateutil from django.urls import reverse from django.utils.html import escape - -from desktop.lib.i18n import smart_unicode, smart_str, force_unicode -from desktop.models import get_data_link, Document2 -from notebook.connectors.base import Notebook, _get_snippet_name +from django.utils.translation import gettext as _ from dashboard.dashboard_api import get_engine - -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ +from desktop.lib.i18n import force_unicode, smart_str +from desktop.models import Document2, get_data_link +from notebook.connectors.base import Notebook, _get_snippet_name LOG = logging.getLogger() @@ -142,11 +134,11 @@ def get_props(self, user): for facet in props['collection']['facets']: properties = facet['properties'] - if 'gap' in properties and not 'initial_gap' in properties: + if 'gap' in properties and 'initial_gap' not in properties: properties['initial_gap'] = properties['gap'] - if 'start' in properties and not 'initial_start' in properties: + if 'start' in properties and 'initial_start' not in properties: properties['initial_start'] = properties['start'] - if 'end' in properties and not 'initial_end' in properties: + if 'end' in properties and 'initial_end' not in properties: properties['initial_end'] = properties['end'] if 'domain' not in properties: properties['domain'] = {'blockParent': [], 'blockChildren': []} @@ -192,7 +184,7 @@ def get_default(self, user, name, engine='solr', source='data'): if id_field: id_field = id_field[0] else: - id_field = '' # Schemaless might not have an id + id_field = '' # Schemaless might not have an id if source == 'query': nb_doc = Document2.objects.document(user=user, doc_id=name) @@ -332,6 +324,7 @@ def get_field_list(cls, collection): else: return ['*'] + def get_facet_field(category, field, facets): if category in ('nested', 'function'): id_pattern = '%(id)s' @@ -345,6 +338,7 @@ def get_facet_field(category, field, facets): else: return None + def pairwise2(field, fq_filter, iterable): pairs = [] selected_values = [f['value'] for f in fq_filter] @@ -359,11 +353,12 @@ def pairwise2(field, fq_filter, iterable): }) return pairs + def range_pair(field, cat, fq_filter, iterable, end, collection_facet): # e.g. counts":["0",17430,"1000",1949,"2000",671,"3000",404,"4000",243,"5000",165],"gap":1000,"start":0,"end":6000} pairs = [] selected_values = [f['value'] for f in fq_filter] - is_single_unit_gap = re.match('^[\+\-]?1[A-Za-z]*$', str(collection_facet['properties']['gap'])) is not None + is_single_unit_gap = re.match(r'^[\+\-]?1[A-Za-z]*$', str(collection_facet['properties']['gap'])) is not None is_up = collection_facet['properties']['sort'] == 'asc' if collection_facet['properties']['sort'] == 'asc' and ( @@ -422,7 +417,7 @@ def range_pair2(facet_field, cat, fq_filter, iterable, end, facet, collection_fa # e.g. counts":["0",17430,"1000",1949,"2000",671,"3000",404,"4000",243,"5000",165],"gap":1000,"start":0,"end":6000} pairs = [] selected_values = [f['value'] for f in fq_filter] - is_single_unit_gap = re.match('^[\+\-]?1[A-Za-z]*$', str(facet['gap'])) is not None + is_single_unit_gap = re.match(r'^[\+\-]?1[A-Za-z]*$', str(facet['gap'])) is not None is_up = facet['sort'] == 'asc' if facet['sort'] == 'asc' and facet['type'] == 'range-up': @@ -541,7 +536,7 @@ def augment_solr_response(response, collection, query): if response and response.get('facets'): for facet in collection['facets']: category = facet['type'] - name = facet['id'] # Nested facets can only have one name + name = facet['id'] # Nested facets can only have one name if category == 'function' and name in response['facets']: collection_facet = get_facet_field(category, name, collection['facets']) @@ -614,7 +609,7 @@ def augment_solr_response(response, collection, query): legend = agg_keys[0].split(':', 2)[1] column = agg_keys[0] else: - legend = facet['field'] # 'count(%s)' % legend + legend = facet['field'] # 'count(%s)' % legend agg_keys = [column] _augment_stats_2d(name, facet, counts, selected_values, agg_keys, rows) @@ -670,9 +665,9 @@ def augment_solr_response(response, collection, query): agg_keys.insert(0, 'count') counts = _augment_stats_2d(name, facet, counts, selected_values, agg_keys, rows) - #_convert_nested_to_augmented_pivot_nd(facet_fields, facet['id'], count, selected_values, dimension=2) + # _convert_nested_to_augmented_pivot_nd(facet_fields, facet['id'], count, selected_values, dimension=2) dimension = len(facet_fields) - elif len(collection_facet['properties']['facets']) == 1 or (len(collection_facet['properties']['facets']) == 2 and \ + elif len(collection_facet['properties']['facets']) == 1 or (len(collection_facet['properties']['facets']) == 2 and collection_facet['properties']['facets'][1]['aggregate']['function'] != 'count'): # Dimension 1 with 1 count or agg dimension = 1 @@ -713,7 +708,7 @@ def augment_solr_response(response, collection, query): 'counts': counts, 'extraSeries': extraSeries, 'dimension': dimension, - 'response': {'response': {'start': 0, 'numFound': num_bucket}}, # Todo * nested buckets + offsets + 'response': {'response': {'start': 0, 'numFound': num_bucket}}, # Todo * nested buckets + offsets 'docs': [dict(list(zip(cols, row))) for row in rows], 'fieldsAttributes': [ Collection2._make_gridlayout_header_field( @@ -738,6 +733,7 @@ def augment_solr_response(response, collection, query): return augmented + def _get_agg_keys(counts): for count in counts: keys = [key for key, value in list(count.items()) if key.lower().startswith('agg_') or key.lower().startswith('dim_')] @@ -745,6 +741,7 @@ def _get_agg_keys(counts): return keys return [] + def augment_response(collection, query, response): # HTML escaping if not query.get('download'): @@ -762,18 +759,18 @@ def augment_response(collection, query, response): for field, value in doc.items(): if isinstance(value, numbers.Number): escaped_value = value - elif field == '_childDocuments_': # Nested documents + elif field == '_childDocuments_': # Nested documents escaped_value = value - elif isinstance(value, list): # Multivalue field - escaped_value = [smart_unicode(escape(val), errors='replace') for val in value] + elif isinstance(value, list): # Multivalue field + escaped_value = [smart_str(escape(val), errors='replace') for val in value] else: - value = smart_unicode(value, errors='replace') + value = smart_str(value, errors='replace') escaped_value = escape(value) doc[field] = escaped_value doc['externalLink'] = link doc['details'] = [] - doc['hueId'] = smart_unicode(doc.get(id_field, '')) + doc['hueId'] = smart_str(doc.get(id_field, '')) if 'moreLikeThis' in response and response['moreLikeThis'][doc['hueId']].get('numFound'): _doc = response['moreLikeThis'][doc['hueId']] doc['_childDocuments_'] = _doc['docs'] @@ -785,14 +782,14 @@ def augment_response(collection, query, response): id_field = collection.get('idField') if id_field: for doc in response['response']['docs']: - if id_field in doc and smart_unicode(doc[id_field]) in highlighted_fields: - highlighting = response['highlighting'][smart_unicode(doc[id_field])] + if id_field in doc and smart_str(doc[id_field]) in highlighted_fields: + highlighting = response['highlighting'][smart_str(doc[id_field])] if highlighting: escaped_highlighting = {} for field, hls in highlighting.items(): _hls = [ - escape(smart_unicode(hl, errors='replace')).replace('<em>', '').replace('</em>', '') + escape(smart_str(hl, errors='replace')).replace('<em>', '').replace('</em>', '') for hl in hls ] escaped_highlighting[field] = _hls[0] if len(_hls) == 1 else _hls @@ -857,7 +854,7 @@ def __augment_stats_2d(counts, label, fq_fields, fq_values, fq_filter, _selected count = bucket['count'] dim_row = [val] - _fq_fields = fq_fields + _fields[0:1] # Pick dim field if there is one + _fq_fields = fq_fields + _fields[0:1] # Pick dim field if there is one _fq_values = fq_values + [val] for agg_key in agg_keys: @@ -866,18 +863,18 @@ def __augment_stats_2d(counts, label, fq_fields, fq_values, fq_filter, _selected augmented.append(_get_augmented(count, val, label, _fq_values, _fq_fields, fq_filter, _selected_values)) elif agg_key.startswith('agg_'): label = fq_values[0] if len(_fq_fields) >= 2 else agg_key.split(':', 2)[1] - if agg_keys.index(agg_key) == 0: # One count by dimension + if agg_keys.index(agg_key) == 0: # One count by dimension dim_row.append(count) - if not agg_key in bucket: # No key if value is 0 + if agg_key not in bucket: # No key if value is 0 bucket[agg_key] = 0 dim_row.append(bucket[agg_key]) augmented.append(_get_augmented(bucket[agg_key], val, label, _fq_values, _fq_fields, fq_filter, _selected_values)) else: - augmented.append(_get_augmented(count, val, label, _fq_values, _fq_fields, fq_filter, _selected_values)) # Needed? + augmented.append(_get_augmented(count, val, label, _fq_values, _fq_fields, fq_filter, _selected_values)) # Needed? # List nested fields _agg_keys = [] - if agg_key in bucket and bucket[agg_key]['buckets']: # Protect against empty buckets + if agg_key in bucket and bucket[agg_key]['buckets']: # Protect against empty buckets for key, value in list(bucket[agg_key]['buckets'][0].items()): if key.lower().startswith('agg_') or key.lower().startswith('dim_'): _agg_keys.append(key) @@ -904,7 +901,7 @@ def __augment_stats_2d(counts, label, fq_fields, fq_values, fq_filter, _selected new_rows.append(dim_row + row) dim_row = new_rows - if dim_row and type(dim_row[0]) == list: + if dim_row and type(dim_row[0]) is list: rows.extend(dim_row) else: rows.append(dim_row) @@ -997,7 +994,6 @@ def augment_solr_exception(response, collection): }) - def extract_solr_exception_message(e): response = {} diff --git a/desktop/libs/dashboard/src/dashboard/urls.py b/desktop/libs/dashboard/src/dashboard/urls.py index a034c9191f9..c05b4224849 100644 --- a/desktop/libs/dashboard/src/dashboard/urls.py +++ b/desktop/libs/dashboard/src/dashboard/urls.py @@ -17,13 +17,9 @@ import sys -from dashboard import views as dashboard_views -from dashboard import api as dashboard_api +from django.urls import re_path -if sys.version_info[0] > 2: - from django.urls import re_path -else: - from django.conf.urls import url as re_path +from dashboard import api as dashboard_api, views as dashboard_views urlpatterns = [ re_path(r'^$', dashboard_views.index, name='index'), diff --git a/desktop/libs/dashboard/src/dashboard/views.py b/desktop/libs/dashboard/src/dashboard/views.py index 2a66a5d212e..08411b6e809 100644 --- a/desktop/libs/dashboard/src/dashboard/views.py +++ b/desktop/libs/dashboard/src/dashboard/views.py @@ -15,60 +15,53 @@ # See the License for the specific language governing permissions and # limitations under the License. -import json -import logging import re import sys +import json +import logging from django.urls import reverse from django.utils.html import escape +from django.utils.translation import gettext as _ +from dashboard.conf import get_engines +from dashboard.controller import DashboardController, can_edit_index +from dashboard.dashboard_api import get_engine +from dashboard.decorators import allow_owner_only +from dashboard.models import Collection2 from desktop.conf import USE_NEW_EDITOR from desktop.lib.django_util import JsonResponse, render from desktop.lib.exceptions_renderable import PopupException -from desktop.models import Document2, Document +from desktop.models import Document, Document2 from desktop.views import antixss - -from search.conf import LATEST from indexer.views import importer - -from dashboard.dashboard_api import get_engine -from dashboard.decorators import allow_owner_only -from dashboard.conf import get_engines -from dashboard.controller import DashboardController, can_edit_index -from dashboard.models import Collection2 - -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - +from search.conf import LATEST LOG = logging.getLogger() DEFAULT_LAYOUT = [ - {"size":2,"rows":[{"widgets":[]}],"drops":["temp"],"klass":"card card-home card-column span2"}, - {"size":10,"rows":[{"widgets":[ - {"size":12,"name":"Filter Bar","widgetType":"filter-widget", "id":"99923aef-b233-9420-96c6-15d48293532b", - "properties":{},"offset":0,"isLoading":True,"klass":"card card-widget span12"}]}, - {"widgets":[ - {"size":12,"name":"Grid Results","widgetType":"resultset-widget", "id":"14023aef-b233-9420-96c6-15d48293532b", - "properties":{},"offset":0,"isLoading":True,"klass":"card card-widget span12"}]}], - "drops":["temp"],"klass":"card card-home card-column span10"}, + {"size": 2, "rows": [{"widgets": []}], "drops": ["temp"], "klass": "card card-home card-column span2"}, + {"size": 10, "rows": [{"widgets": [ + {"size": 12, "name": "Filter Bar", "widgetType": "filter-widget", "id": "99923aef-b233-9420-96c6-15d48293532b", + "properties": {}, "offset": 0, "isLoading": True, "klass": "card card-widget span12"}]}, + {"widgets": [ + {"size": 12, "name": "Grid Results", "widgetType": "resultset-widget", "id": "14023aef-b233-9420-96c6-15d48293532b", + "properties": {}, "offset": 0, "isLoading": True, "klass": "card card-widget span12"}]}], + "drops": ["temp"], "klass": "card card-home card-column span10"}, ] REPORT_LAYOUT = [ - {u'klass': u'card card-home card-column span12', u'rows': [{"widgets":[]}], u'id': u'7e0c0a45-ae90-43a6-669a-2a852ef4a449', u'drops': [u'temp'], u'size': 12} + {u'klass': u'card card-home card-column span12', u'rows': [{"widgets": []}], u'id': u'7e0c0a45-ae90-43a6-669a-2a852ef4a449', u'drops': [u'temp'], u'size': 12} # noqa: E501 ] QUERY_BUILDER_LAYOUT = [ {u'klass': u'card card-home card-column span12', u'rows': [ {u'widgets': [ - {u'name': u'Filter Bar', u'widgetType': u'filter-widget', u'properties': {}, u'isLoading': False, u'offset': 0, u'klass': u'card card-widget span12', u'id': u'abe50df3-a5a0-408a-8122-019d779b4354', u'size': 12}], + {u'name': u'Filter Bar', u'widgetType': u'filter-widget', u'properties': {}, u'isLoading': False, u'offset': 0, u'klass': u'card card-widget span12', u'id': u'abe50df3-a5a0-408a-8122-019d779b4354', u'size': 12}], # noqa: E501 u'id': u'22532a0a-8e43-603a-daa9-77d5d233fd7f', u'columns': []}, {u'widgets': [], u'id': u'ebb7fe4d-64c5-c660-bdc0-02a77ff8321e', u'columns': []}, - {u'widgets': [{u'name': u'Grid Results', u'widgetType': u'resultset-widget', u'properties': {}, u'isLoading': False, u'offset': 0, u'klass': u'card card-widget span12', u'id': u'14023aef-b233-9420-96c6-15d48293532b', u'size': 12}], + {u'widgets': [{u'name': u'Grid Results', u'widgetType': u'resultset-widget', u'properties': {}, u'isLoading': False, u'offset': 0, u'klass': u'card card-widget span12', u'id': u'14023aef-b233-9420-96c6-15d48293532b', u'size': 12}], # noqa: E501 u'id': u'2bfa8b4b-f7f3-1491-4de0-282130c6ab61', u'columns': []} ], u'id': u'7e0c0a45-ae90-43a6-669a-2a852ef4a449', u'drops': [u'temp'], u'size': 12 @@ -76,19 +69,19 @@ ] TEXT_SEARCH_LAYOUT = [ - {"size":12,"rows":[{"widgets":[ - {"size":12,"name":"Filter Bar","widgetType":"filter-widget", "id":"99923aef-b233-9420-96c6-15d48293532b", - "properties":{},"offset":0,"isLoading":True,"klass":"card card-widget span12"}]}, - {"widgets":[ - {"size":12,"name":"HTML Results","widgetType":"html-resultset-widget", "id":"14023aef-b233-9420-96c6-15d48293532b", - "properties":{},"offset":0,"isLoading":True,"klass":"card card-widget span12"}]}], - "drops":["temp"],"klass":"card card-home card-column span12"}, + {"size": 12, "rows": [{"widgets": [ + {"size": 12, "name": "Filter Bar", "widgetType": "filter-widget", "id": "99923aef-b233-9420-96c6-15d48293532b", + "properties": {}, "offset": 0, "isLoading": True, "klass": "card card-widget span12"}]}, + {"widgets": [ + {"size": 12, "name": "HTML Results", "widgetType": "html-resultset-widget", "id": "14023aef-b233-9420-96c6-15d48293532b", + "properties": {}, "offset": 0, "isLoading": True, "klass": "card card-widget span12"}]}], + "drops": ["temp"], "klass": "card card-home card-column span12"}, ] def index(request, is_mobile=False): engine = request.GET.get('engine', 'solr') - cluster = request.POST.get('cluster','""') + cluster = request.POST.get('cluster', '""') collection_id = request.GET.get('collection') collections = get_engine(request.user, engine, cluster=cluster).datasets() if engine != 'report' else ['default'] @@ -140,12 +133,14 @@ def index(request, is_mobile=False): 'is_report': collection.data['collection'].get('engine') == 'report' }) + def index_m(request): return index(request, True) + def new_search(request): engine = request.GET.get('engine', 'solr') - cluster = request.POST.get('cluster','""') + cluster = request.POST.get('cluster', '""') collections = get_engine(request.user, engine, cluster=cluster).datasets() if engine != 'report' else ['default'] @@ -190,12 +185,13 @@ def new_search(request): 'is_report': engine == 'report' }) + def browse(request, name, is_mobile=False): engine = request.GET.get('engine', 'solr') source = request.GET.get('source', 'data') if engine == 'solr': - name = re.sub('^default\.', '', name) + name = re.sub(r'^default\.', '', name) database = name.split('.', 1)[0] collections = get_engine(request.user, engine, source=source).datasets(database=database) @@ -217,10 +213,10 @@ def browse(request, name, is_mobile=False): 'autoLoad': True, 'collections': collections, 'layout': [ - {"size":12,"rows":[{"widgets":[ - {"size":12,"name":"Grid Results","id":"52f07188-f30f-1296-2450-f77e02e1a5c0","widgetType":"resultset-widget", - "properties":{},"offset":0,"isLoading":True,"klass":"card card-widget span12"}]}], - "drops":["temp"],"klass":"card card-home card-column span10"} + {"size": 12, "rows": [{"widgets": [ + {"size": 12, "name": "Grid Results", "id": "52f07188-f30f-1296-2450-f77e02e1a5c0", "widgetType": "resultset-widget", + "properties": {}, "offset": 0, "isLoading": True, "klass": "card card-widget span12"}]}], + "drops": ["temp"], "klass": "card card-home card-column span10"} ], 'qb_layout': QUERY_BUILDER_LAYOUT, 'text_search_layout': TEXT_SEARCH_LAYOUT, @@ -252,8 +248,12 @@ def save(request): if collection['id']: dashboard_doc = Document2.objects.get(id=collection['id']) else: - dashboard_doc = Document2.objects.create(name=collection['name'], uuid=collection['uuid'], type='search-dashboard', owner=request.user, description=collection['label']) - Document.objects.link(dashboard_doc, owner=request.user, name=collection['name'], description=collection['label'], extra='search-dashboard') + dashboard_doc = Document2.objects.create( + name=collection['name'], uuid=collection['uuid'], type='search-dashboard', owner=request.user, description=collection['label'] + ) + Document.objects.link( + dashboard_doc, owner=request.user, name=collection['name'], description=collection['label'], extra='search-dashboard' + ) dashboard_doc.update_data({ 'collection': collection, diff --git a/desktop/libs/hadoop/src/hadoop/conf.py b/desktop/libs/hadoop/src/hadoop/conf.py index 3b2a8efbabb..ba0509349bc 100644 --- a/desktop/libs/hadoop/src/hadoop/conf.py +++ b/desktop/libs/hadoop/src/hadoop/conf.py @@ -15,19 +15,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import fnmatch -import logging import os import sys +import fnmatch +import logging -from desktop.conf import default_ssl_validate, has_connectors -from desktop.lib.conf import Config, UnspecifiedConfigSection, ConfigSection, coerce_bool - -if sys.version_info[0] > 2: - from django.utils.translation import gettext_lazy as _t -else: - from django.utils.translation import ugettext_lazy as _t +from django.utils.translation import gettext_lazy as _t +from desktop.conf import default_ssl_validate, has_connectors +from desktop.lib.conf import Config, ConfigSection, UnspecifiedConfigSection, coerce_bool LOG = logging.getLogger() DEFAULT_NN_HTTP_PORT = 50070 @@ -94,13 +90,13 @@ def get_hadoop_conf_dir_default(): ), NN_KERBEROS_PRINCIPAL=Config( "nn_kerberos_principal", - help="Kerberos principal for NameNode", # Unused + help="Kerberos principal for NameNode", # Unused default="hdfs", type=str ), DN_KERBEROS_PRINCIPAL=Config( "dn_kerberos_principal", - help="Kerberos principal for DataNode", # Unused + help="Kerberos principal for DataNode", # Unused default="hdfs", type=str ), @@ -124,8 +120,7 @@ def get_hadoop_conf_dir_default(): HADOOP_CONF_DIR=Config( key="hadoop_conf_dir", dynamic_default=get_hadoop_conf_dir_default, - help= - "Directory of the Hadoop configuration. Defaults to the environment variable HADOOP_CONF_DIR when set, " + help="Directory of the Hadoop configuration. Defaults to the environment variable HADOOP_CONF_DIR when set, " "or '/etc/hadoop/conf'.", type=str ), @@ -182,7 +177,7 @@ def get_hadoop_conf_dir_default(): help="Whether Hue should use this cluster to run jobs", default=True, type=coerce_bool - ), # True here for backward compatibility + ), # True here for backward compatibility ) ) ) @@ -196,6 +191,7 @@ def get_spark_history_server_from_cm(): return ManagerApi().get_spark_history_server_url() return None + def get_spark_history_server_url(): """ Try to get Spark history server URL from Cloudera Manager API, otherwise give default URL @@ -203,6 +199,7 @@ def get_spark_history_server_url(): url = get_spark_history_server_from_cm() return url if url else 'http://localhost:18088' + def get_spark_history_server_security_enabled(): """ Try to get Spark history server URL from Cloudera Manager API, otherwise give default URL @@ -234,7 +231,7 @@ def get_spark_history_server_security_enabled(): SECURITY_ENABLED=Config("security_enabled", help="Is running with Kerberos authentication", default=False, type=coerce_bool), SUBMIT_TO=Config('submit_to', help="Whether Hue should use this cluster to run jobs", - default=False, type=coerce_bool), # False here for backward compatibility + default=False, type=coerce_bool), # False here for backward compatibility IS_YARN=Config("is_yarn", help="Attribute set only on YARN clusters and not MR1 ones.", default=True, type=coerce_bool), RESOURCE_MANAGER_API_URL=Config("resourcemanager_api_url", @@ -309,7 +306,7 @@ def test_spark_configuration(user): try: spark_hs_api.get_history_server_api().applications() status = 'OK' - except: + except Exception: LOG.exception('failed to get spark history server status') return status @@ -319,7 +316,7 @@ def test_yarn_configurations(user): result = [] try: - from jobbrowser.api import get_api # Required for cluster HA testing + from jobbrowser.api import get_api # Required for cluster HA testing except Exception as e: LOG.warning('Jobbrowser is disabled, skipping test_yarn_configurations') return result diff --git a/desktop/libs/hadoop/src/hadoop/core_site.py b/desktop/libs/hadoop/src/hadoop/core_site.py index 3e075f2ca19..e2b03b79893 100644 --- a/desktop/libs/hadoop/src/hadoop/core_site.py +++ b/desktop/libs/hadoop/src/hadoop/core_site.py @@ -15,21 +15,12 @@ # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import absolute_import +import re import errno import logging -import re -import sys - -from hadoop import conf -from hadoop import confparse from desktop.lib.paths import get_config_root_hadoop - -if sys.version_info[0] > 2: - open_file = open -else: - open_file = file +from hadoop import confparse __all = ['get_conf', 'get_trash_interval', 'get_s3a_access_key', 'get_s3a_secret_key'] @@ -63,6 +54,7 @@ _CNF_SECURITY = 'hadoop.security.authentication' + def reset(): """Reset the cached conf""" global _CORE_SITE_DICT @@ -85,7 +77,7 @@ def _parse_core_site(): try: _CORE_SITE_PATH = get_config_root_hadoop('core-site.xml') - data = open_file(_CORE_SITE_PATH, 'r').read() + data = open(_CORE_SITE_PATH, 'r').read() except IOError as err: if err.errno != errno.ENOENT: LOG.error('Cannot read from "%s": %s' % (_CORE_SITE_PATH, err)) @@ -104,6 +96,7 @@ def get_trash_interval(): """ return get_conf().get(_CNF_TRASH_INTERVAL, 0) + def get_s3a_access_key(): """ Get S3A AWS access key ID @@ -111,6 +104,7 @@ def get_s3a_access_key(): """ return get_conf().get(_CNF_S3A_ACCESS_KEY) + def get_s3a_secret_key(): """ Get S3A AWS secret key @@ -118,6 +112,7 @@ def get_s3a_secret_key(): """ return get_conf().get(_CNF_S3A_SECRET_KEY) + def get_s3a_session_token(): return get_conf().get(_CNF_S3A_SESSION_TOKEN) @@ -134,12 +129,14 @@ def get_raz_api_url(): return s3a_raz_url or adls_raz_url + def get_raz_cluster_name(): """ Get the name of the Cluster where Raz is running. """ return get_conf().get(_CNF_S3A_RAZ_CLUSTER_NAME, '') or get_conf().get(_CNF_ADLS_RAZ_CLUSTER_NAME, '') + def get_raz_s3_default_bucket(): """ Get the name of the default S3 bucket of Raz @@ -153,9 +150,11 @@ def get_raz_s3_default_bucket(): 'bucket': match.group('bucket') } + def get_default_fs(): return get_conf().get(_CNF_DEFAULT_FS) + def get_adls_client_id(): """ Get ADLS client id @@ -163,6 +162,7 @@ def get_adls_client_id(): """ return get_conf().get(_CNF_ADLS_CLIENT_ID) + def get_adls_authentication_code(): """ Get ADLS secret key @@ -170,6 +170,7 @@ def get_adls_authentication_code(): """ return get_conf().get(_CNF_ADLS_AUTHENTICATION_CODE) + def get_adls_refresh_url(): """ Get ADLS secret key @@ -177,6 +178,7 @@ def get_adls_refresh_url(): """ return get_conf().get(_CNF_ADLS_REFRESH_URL) + def get_adls_grant_type(): """ Get ADLS provider type @@ -184,14 +186,18 @@ def get_adls_grant_type(): """ return get_conf().get(_CNF_ADLS_GRANT_TYPE) + def is_kerberos_enabled(): return get_conf().get(_CNF_SECURITY) == 'kerberos' + def get_azure_client_id(): return get_conf().get(_CNF_AZURE_CLIENT_ID) + def get_azure_client_secret(): return get_conf().get(_CNF_AZURE_CLIENT_SECRET) + def get_azure_client_endpoint(): return get_conf().get(_CNF_AZURE_CLIENT_ENDPOINT) diff --git a/desktop/libs/hadoop/src/hadoop/core_site_tests.py b/desktop/libs/hadoop/src/hadoop/core_site_tests.py index f7f0d82ac92..d48d05e0fda 100644 --- a/desktop/libs/hadoop/src/hadoop/core_site_tests.py +++ b/desktop/libs/hadoop/src/hadoop/core_site_tests.py @@ -15,21 +15,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import absolute_import - import os -import sys import logging import tempfile from desktop.models import get_remote_home_storage from hadoop import conf, core_site -if sys.version_info[0] > 2: - open_file = open -else: - open_file = file - LOG = logging.getLogger() @@ -78,7 +70,7 @@ def test_core_site(): """ - open_file(os.path.join(hadoop_home, 'core-site.xml'), 'w').write(xml) + open(os.path.join(hadoop_home, 'core-site.xml'), 'w').write(xml) finish = ( conf.HDFS_CLUSTERS.set_for_testing({'default': {}}), diff --git a/desktop/libs/hadoop/src/hadoop/fs/__init__.py b/desktop/libs/hadoop/src/hadoop/fs/__init__.py index 1a560131a27..1c7513def3a 100644 --- a/desktop/libs/hadoop/src/hadoop/fs/__init__.py +++ b/desktop/libs/hadoop/src/hadoop/fs/__init__.py @@ -29,35 +29,25 @@ When possible, the interfaces here have fidelity to the native python interfaces. """ -from __future__ import division -from future import standard_library -from functools import reduce -standard_library.install_aliases() -from builtins import map -from builtins import range -from builtins import object -import errno -import grp -import logging -import math import os -import posixpath -import pwd import re -import shutil +import grp +import pwd +import math import stat -import sys - -if sys.version_info[0] > 2: - from builtins import open as builtins_open -else: - from __builtin__ import open as builtins_open +import errno +import shutil +import logging +import posixpath +from builtins import map, object, open as builtins_open, range +from functools import reduce SEEK_SET, SEEK_CUR, SEEK_END = os.SEEK_SET, os.SEEK_CUR, os.SEEK_END # The web (and POSIX) always uses forward slash as a separator -LEADING_DOUBLE_SEPARATORS = re.compile("^" + posixpath.sep*2) +LEADING_DOUBLE_SEPARATORS = re.compile("^" + posixpath.sep * 2) + def normpath(path): """ @@ -73,6 +63,7 @@ def normpath(path): class IllegalPathException(Exception): pass + class LocalSubFileSystem(object): """ Facade around normal python filesystem calls, for a temporary/local @@ -106,7 +97,7 @@ def __init__(self, root): self.root = root self.name = "file://%s" % self.root if not os.path.isdir(root): - logging.fatal("Root(%s) not found." % root + + logging.fatal("Root(%s) not found." % root + " Perhaps you need to run manage.py create_test_fs") def _resolve_path(self, path): @@ -162,6 +153,7 @@ def open(self, name, mode="r"): paths = [0] # complicated way of taking the intersection of three lists. assert not reduce(set.intersection, list(map(set, [paths, users, groups]))) + def wrapped(*args): self = args[0] newargs = list(args[1:]) @@ -172,7 +164,7 @@ def wrapped(*args): for i in groups: newargs[i] = grp.getgrnam(newargs[i]).gr_gid - if f == builtins_open and sys.version_info[0] > 2: + if f == builtins_open: return f(*newargs, encoding='utf-8') return f(*newargs) @@ -185,7 +177,7 @@ def wrapped(*args): mkdir = _wrap(os.mkdir) rmdir = _wrap(os.rmdir) listdir = _wrap(os.listdir) - rename = _wrap(os.rename, paths=[0,1]) + rename = _wrap(os.rename, paths=[0, 1]) exists = _wrap(os.path.exists) isfile = _wrap(os.path.isfile) isdir = _wrap(os.path.isdir) @@ -235,6 +227,7 @@ def listdir_stats(self, path): def __repr__(self): return "LocalFileSystem(%s)" % repr(self.root) + class FakeStatus(object): """ A fake implementation of HDFS health RPCs. @@ -246,16 +239,16 @@ class FakeStatus(object): def get_messages(self): """Warnings/lint checks.""" return [ - dict(type="WARNING",message="All your base belong to us."), + dict(type="WARNING", message="All your base belong to us."), dict(type="INFO", message="Hamster Dance!") ] def get_health(self): o = dict() - GB = 1024*1024*1024 - o["bytesTotal"] = 5*GB - o["bytesUsed"] = math.floor(5*GB / 2) - o["bytesRemaining"] = 2*GB + GB = 1024 * 1024 * 1024 + o["bytesTotal"] = 5 * GB + o["bytesUsed"] = math.floor(5 * GB / 2) + o["bytesRemaining"] = 2 * GB o["bytesNonDfs"] = math.floor(GB / 2) o["liveDataNodes"] = 13 o["deadDataNodes"] = 2 @@ -269,8 +262,8 @@ def get_datanode_report(self): dinfo["name"] = "fake-%d" % i dinfo["storageID"] = "fake-id-%d" % i dinfo["host"] = "fake-host-%d" % i - dinfo["capacity"] = 123456789 - dinfo["dfsUsed"] = 23456779 + dinfo["capacity"] = 123456789 + dinfo["dfsUsed"] = 23456779 dinfo["remaining"] = 100000010 dinfo["xceiverCount"] = 3 dinfo["state"] = "NORMAL_STATE" @@ -280,8 +273,8 @@ def get_datanode_report(self): dinfo["name"] = "fake-dead-%d" % i dinfo["storageID"] = "fake-dead-id-%d" % i dinfo["host"] = "fake-dead-host-%d" % i - dinfo["capacity"] = 523456789 - dinfo["dfsUsed"] = 23456779 + dinfo["capacity"] = 523456789 + dinfo["dfsUsed"] = 23456779 dinfo["remaining"] = 500000010 dinfo["xceiverCount"] = 3 dinfo["state"] = "DECOMISSION_INPROGRESS" diff --git a/desktop/libs/hadoop/src/hadoop/fs/hadoopfs.py b/desktop/libs/hadoop/src/hadoop/fs/hadoopfs.py index c762066047b..8912e46c646 100644 --- a/desktop/libs/hadoop/src/hadoop/fs/hadoopfs.py +++ b/desktop/libs/hadoop/src/hadoop/fs/hadoopfs.py @@ -22,47 +22,36 @@ Interfaces for Hadoop filesystem access via the HADOOP-4707 Thrift APIs. """ -from __future__ import division -from past.builtins import cmp -from future import standard_library -standard_library.install_aliases() -from builtins import object -import codecs +import os +import sys +import math import errno +import codecs +import random import logging -import math -import os import posixpath -import random import subprocess -import sys - -from django.utils.encoding import smart_str +from builtins import object +from urllib.parse import urlsplit as lib_urlsplit -from desktop.lib import i18n +from django.utils.encoding import force_str, smart_str +from django.utils.translation import gettext as _ +from past.builtins import cmp import hadoop.conf -from hadoop.fs import normpath, SEEK_SET, SEEK_CUR, SEEK_END +from desktop.lib import i18n +from hadoop.fs import SEEK_CUR, SEEK_END, SEEK_SET, normpath from hadoop.fs.exceptions import PermissionDeniedException -if sys.version_info[0] > 2: - from django.utils.encoding import force_str - from urllib.parse import urlsplit as lib_urlsplit - from django.utils.translation import gettext as _ -else: - from django.utils.encoding import force_unicode as force_str - from urlparse import urlsplit as lib_urlsplit - from django.utils.translation import ugettext as _ - LOG = logging.getLogger() DEFAULT_USER = "webui" # The number of bytes to read if not specified -DEFAULT_READ_SIZE = 1024*1024 # 1MB +DEFAULT_READ_SIZE = 1024 * 1024 # 1MB # The buffer size of the pipe to hdfs -put during upload -WRITE_BUFFER_SIZE = 128*1024 # 128K +WRITE_BUFFER_SIZE = 128 * 1024 # 128K # Class that we translate into PermissionDeniedException HADOOP_ACCESSCONTROLEXCEPTION = "org.apache.hadoop.security.AccessControlException" @@ -78,10 +67,12 @@ textchars = bytearray({7, 8, 9, 10, 12, 13, 27} | set(range(0x20, 0x100)) - {0x7f}) is_binary_string = lambda bytes: bool(bytes.translate(None, textchars)) + def encode_fs_path(path): """encode_fs_path(path) -> byte string in utf8""" return smart_str(path, HDFS_ENCODING, errors='strict') + def decode_fs_path(path): """decode_fs_path(bytestring) -> unicode path""" return force_str(path, HDFS_ENCODING, errors='strict') @@ -158,7 +149,7 @@ def urlsplit(url): if schema not in ('hdfs', 'viewfs'): # Default to standard for non-hdfs return lib_urlsplit(url) - url = url[i+3:] + url = url[i + 3:] i = url.find('/') if i == -1: # Everything is netloc. Assume path is root. @@ -185,9 +176,9 @@ def create_home_dir(self, home_path=None): if home_path is None: home_path = self.get_home_dir() + from desktop.conf import DEFAULT_HDFS_SUPERUSER from hadoop.hdfs_site import get_umask_mode from useradmin.conf import HOME_DIR_PERMISSIONS, USE_HOME_DIR_PERMISSIONS - from desktop.conf import DEFAULT_HDFS_SUPERUSER mode = int(HOME_DIR_PERMISSIONS.get(), 8) if USE_HOME_DIR_PERMISSIONS.get() else (0o777 & (0o1777 ^ get_umask_mode())) if not self.exists(home_path): user = self.user @@ -242,7 +233,7 @@ def _copy_binary_file(self, local_src, remote_dst, chunk_size): self.append(remote_dst, chunk) chunk = src.read(chunk_size) LOG.info(_('Copied %s -> %s.') % (local_src, remote_dst)) - except: + except Exception: LOG.exception(_('Copying %s -> %s failed.') % (local_src, remote_dst)) raise finally: @@ -251,10 +242,8 @@ def _copy_binary_file(self, local_src, remote_dst, chunk_size): def _copy_non_binary_file(self, local_src, remote_dst, chunk_size): for data_format in ("ascii", "utf-8", "latin-1", "iso-8859"): src_copied = False - if sys.version_info[0] > 2: - src = open(local_src, encoding=data_format) - else: - src = codecs.open(local_src, encoding=data_format) + src = open(local_src, encoding=data_format) + try: self.create(remote_dst, permission=0o755) chunk = src.read(chunk_size) @@ -262,7 +251,7 @@ def _copy_non_binary_file(self, local_src, remote_dst, chunk_size): self.append(remote_dst, chunk) chunk = src.read(chunk_size) src_copied = True - except: + except Exception: LOG.exception(_('Copying %s -> %s failed with %s encoding format') % (local_src, remote_dst, data_format)) self.remove(remote_dst) finally: @@ -295,7 +284,6 @@ def _copy_file(self, local_src, remote_dst, chunk_size=1024 * 1024 * 64): else: LOG.info(_('Skipping %s (not a file).') % local_src) - @_coerce_exceptions def mktemp(self, subdir='', prefix='tmp', basedir=None): """ @@ -350,9 +338,6 @@ def listdir_stats(self): raise NotImplementedError(_("%(function)s has not been implemented.") % {'function': 'listdir_stats'}) - - - def require_open(func): """ Decorator that ensures that the file instance isn't closed when the @@ -365,8 +350,6 @@ def wrapper(self, *args, **kwargs): return wrapper - - class File(object): """ Represents an open file on HDFS. """ @@ -378,7 +361,7 @@ def __init__(self, fs, path, mode="r", buffering=False): self._block_cache = BlockCache() if buffering or mode != "r": - raise Exception("buffering and write support not yet implemented") # NYI + raise Exception("buffering and write support not yet implemented") # NYI stat = self._stat() @@ -386,7 +369,7 @@ def __init__(self, fs, path, mode="r", buffering=False): raise IOError(errno.ENOENT, "No such file or directory: '%s'" % path) if stat.isDir: raise IOError(errno.EISDIR, "Is a directory: '%s'" % path) - #TODO(todd) somehow we need to check permissions here - maybe we need an access() call? + # TODO(todd) somehow we need to check permissions here - maybe we need an access() call? # Minimal context manager implementation. # See: http://www.python.org/doc/2.5.2/lib/typecontextmanager.html @@ -395,7 +378,7 @@ def __enter__(self): def __exit__(self, exc_type, exc_val, exc_tb): self.close() - return False # don't supress exceptions. + return False # don't supress exceptions. @require_open def seek(self, offset, whence=0): @@ -413,7 +396,6 @@ def seek(self, offset, whence=0): def tell(self): return self.pos - def _get_block(self, pos): """Return the Block instance that contains the given offset""" cached_block = self._block_cache.find_block(pos) @@ -421,7 +403,7 @@ def _get_block(self, pos): return cached_block # Cache "miss" - fetch ahead 500MB worth of blocks - new_blocks = self.fs._get_blocks(self.path, pos, 500*1024*1024) + new_blocks = self.fs._get_blocks(self.path, pos, 500 * 1024 * 1024) self._block_cache.insert_new_blocks(new_blocks) result = self._block_cache.find_block(pos) if not result: @@ -463,7 +445,7 @@ def read(self, length=DEFAULT_READ_SIZE): read_so_far = 0 while read_so_far < length: this_data = self._read_in_block(length - read_so_far) - if this_data == "": # eof + if this_data == "": # eof break read_so_far += len(this_data) result.append(this_data) @@ -515,6 +497,7 @@ def __init__(self, fs, path, mode="w", block_size=None): close_fds=True, env=self.subprocess_env, bufsize=WRITE_BUFFER_SIZE) + @require_open def write(self, data): """May raise IOError, particularly EPIPE""" diff --git a/desktop/libs/hadoop/src/hadoop/fs/test_webhdfs.py b/desktop/libs/hadoop/src/hadoop/fs/test_webhdfs.py index 8bbd213940f..19c58039379 100644 --- a/desktop/libs/hadoop/src/hadoop/fs/test_webhdfs.py +++ b/desktop/libs/hadoop/src/hadoop/fs/test_webhdfs.py @@ -16,25 +16,21 @@ # See the License for the specific language governing permissions and # limitations under the License. -from builtins import map -from builtins import zip -from builtins import range -from builtins import object -import logging -import pytest import os -import random import sys +import random +import logging import threading -import unittest +from builtins import map, object, range, zip +from functools import reduce + +import pytest from django.test import TestCase from hadoop import pseudo_hdfs4 from hadoop.fs.exceptions import WebHdfsException from hadoop.fs.hadoopfs import Hdfs from hadoop.pseudo_hdfs4 import is_live_cluster -from functools import reduce - LOG = logging.getLogger() @@ -65,7 +61,7 @@ def test_webhdfs(self): try: f.write("hello") f.close() - assert (b"hello" if sys.version_info[0] > 2 else "hello") == fs.open(test_file).read() + assert (b"hello") == fs.open(test_file).read() assert 5 == fs.stats(test_file)["size"] assert fs.isfile(test_file) assert not fs.isfile("/") @@ -96,14 +92,14 @@ def test_seek(self): f = fs.open(test_file, "r") f.seek(0, os.SEEK_SET) - assert (b"he" if sys.version_info[0] > 2 else "he") == f.read(2) + assert (b"he") == f.read(2) f.seek(1, os.SEEK_SET) - assert (b"el" if sys.version_info[0] > 2 else "el") == f.read(2) + assert (b"el") == f.read(2) f.seek(-1, os.SEEK_END) - assert (b"o" if sys.version_info[0] > 2 else "o") == f.read() + assert (b"o") == f.read() f.seek(0, os.SEEK_SET) f.seek(2, os.SEEK_CUR) - assert (b"ll" if sys.version_info[0] > 2 else "ll") == f.read(2) + assert (b"ll") == f.read(2) finally: fs.remove(test_file) @@ -122,14 +118,14 @@ def test_seek_across_blocks(self): f.close() for i in range(1, 10): - f = fs.open(test_file, "rt" if sys.version_info[0] > 2 else "r") + f = fs.open(test_file, "rt") for j in range(1, 100): offset = random.randint(0, len(data) - 1) f.seek(offset, os.SEEK_SET) - t = data[offset:offset+50] - if sys.version_info[0] > 2: - t = t.encode('utf-8') + t = data[offset:offset + 50] + t = t.encode('utf-8') + assert t == f.read(50) f.close() @@ -191,7 +187,6 @@ def test_umask(self): finally: fs._umask = fs_umask - def test_umask_overriden(self): fs = self.cluster.fs @@ -211,7 +206,6 @@ def test_umask_overriden(self): finally: fs._umask = fs_umask - def test_umask_without_sticky(self): fs = self.cluster.fs @@ -231,7 +225,6 @@ def test_umask_without_sticky(self): finally: fs._umask = fs_umask - def test_copy_remote_dir(self): fs = self.cluster.fs @@ -245,7 +238,7 @@ def test_copy_remote_dir(self): f2.close() new_owner = 'testcopy' - new_owner_dir = self.prefix + '/' + new_owner + '/test-copy' + new_owner_dir = self.prefix + '/' + new_owner + '/test-copy' fs.copy_remote_dir(src_dir, new_owner_dir, dir_mode=0o755, owner=new_owner) @@ -310,7 +303,7 @@ def check_existence(name, parent, present=True): if present: assert name in listing, f"{name} should be in {listing}" else: - assert not name in listing, f"{name} should not be in {listing}" + assert name not in listing, f"{name} should not be in {listing}" name = u'''pt-Olá_ch-你好_ko-안녕_ru-Здравствуйте%20,.<>~`!@$%^&()_-+='"''' prefix = self.prefix + '/tmp/i18n' @@ -547,8 +540,10 @@ def test_trash_users(self): class test_local(object): def __getattribute__(self, name): return object.__getattribute__(self, name) + def __setattr__(self, name, value): return object.__setattr__(self, name, value) + def __delattr__(self, name): return object.__delattr__(self, name) @@ -587,13 +582,11 @@ def __delattr__(self, name): def test_check_access(self): # Set user to owner self.cluster.fs.setuser('test') - assert ((b'' if sys.version_info[0] > 2 else '') == - self.cluster.fs.check_access(path='/user/test', aclspec='rw-')) # returns zero-length content + assert ((b'') == self.cluster.fs.check_access(path='/user/test', aclspec='rw-')) # returns zero-length content # Set user to superuser self.cluster.fs.setuser(self.cluster.superuser) - assert ((b'' if sys.version_info[0] > 2 else '') == - self.cluster.fs.check_access(path='/user/test', aclspec='rw-')) # returns zero-length content + assert ((b'') == self.cluster.fs.check_access(path='/user/test', aclspec='rw-')) # returns zero-length content # Set user to non-authorized, non-superuser user self.cluster.fs.setuser('nonadmin') diff --git a/desktop/libs/hadoop/src/hadoop/fs/upload.py b/desktop/libs/hadoop/src/hadoop/fs/upload.py index 75a169b769b..32c0627fa7e 100644 --- a/desktop/libs/hadoop/src/hadoop/fs/upload.py +++ b/desktop/libs/hadoop/src/hadoop/fs/upload.py @@ -35,6 +35,7 @@ from builtins import object from django.core.files.uploadhandler import FileUploadHandler, SkipFile, StopFutureHandlers, StopUpload, UploadFileException +from django.utils.translation import gettext as _ import hadoop.cluster from desktop.lib import fsmanager @@ -44,11 +45,6 @@ from hadoop.conf import UPLOAD_CHUNK_SIZE from hadoop.fs.exceptions import WebHdfsException -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - LOG = logging.getLogger() UPLOAD_SUBDIR = 'hue-uploads' diff --git a/desktop/libs/hadoop/src/hadoop/fs/webhdfs.py b/desktop/libs/hadoop/src/hadoop/fs/webhdfs.py index 537775f5402..cb5f2d698c9 100644 --- a/desktop/libs/hadoop/src/hadoop/fs/webhdfs.py +++ b/desktop/libs/hadoop/src/hadoop/fs/webhdfs.py @@ -19,45 +19,34 @@ Interfaces for Hadoop filesystem access via HttpFs/WebHDFS """ -from future import standard_library -standard_library.install_aliases() -from builtins import oct -from builtins import object +import stat +import time import errno import logging import posixpath -import stat -import sys import threading -import time -import urllib.request, urllib.error +import urllib.error +import urllib.request +from builtins import object, oct +from urllib.parse import unquote as urllib_unquote, urlparse from django.utils.encoding import smart_str +from django.utils.translation import gettext as _ +from past.builtins import long import hadoop.conf import desktop.conf - from desktop.lib.rest import http_client, resource -from past.builtins import long -from hadoop.fs import normpath as fs_normpath, SEEK_SET, SEEK_CUR, SEEK_END -from hadoop.fs.hadoopfs import Hdfs +from hadoop.fs import SEEK_CUR, SEEK_END, SEEK_SET, normpath as fs_normpath from hadoop.fs.exceptions import WebHdfsException -from hadoop.fs.webhdfs_types import WebHdfsStat, WebHdfsContentSummary -from hadoop.hdfs_site import get_nn_sentry_prefixes, get_umask_mode, get_supergroup, get_webhdfs_ssl - -if sys.version_info[0] > 2: - from urllib.parse import unquote as urllib_unquote, urlparse - from django.utils.translation import gettext as _ -else: - from urllib import unquote as urllib_unquote - from urlparse import urlparse - from django.utils.translation import ugettext as _ - +from hadoop.fs.hadoopfs import Hdfs +from hadoop.fs.webhdfs_types import WebHdfsContentSummary, WebHdfsStat +from hadoop.hdfs_site import get_nn_sentry_prefixes, get_supergroup, get_umask_mode, get_webhdfs_ssl DEFAULT_HDFS_SUPERUSER = desktop.conf.DEFAULT_HDFS_SUPERUSER.get() # The number of bytes to read if not specified -DEFAULT_READ_SIZE = 1024 * 1024 # 1MB +DEFAULT_READ_SIZE = 1024 * 1024 # 1MB LOG = logging.getLogger() @@ -244,7 +233,7 @@ def normpath(self, path): @staticmethod def norm_path(path): path = fs_normpath(path) - #fs_normpath clears scheme:/ to scheme: which doesn't make sense + # fs_normpath clears scheme:/ to scheme: which doesn't make sense split = urlparse(path) if not split.path: path = split._replace(path="/").geturl() @@ -296,7 +285,6 @@ def get_content_summary(self, path): json = self._root.get(path, params, headers) return WebHdfsContentSummary(json['ContentSummary']) - def _stats(self, path): """This version of stats returns None if the entry is not found""" path = self.strip_normpath(path) @@ -374,7 +362,6 @@ def _trash(self, path, recursive=False): self.mkdir(self.dirname(trash_path)) self.rename(path, trash_path) - def _delete(self, path, recursive=False): """ _delete(path, recursive=False) @@ -515,7 +502,6 @@ def chown(self, path, user=None, group=None, recursive=False): else: self._root.put(path, params, headers=headers) - def chmod(self, path, mode, recursive=False): """ chmod(path, mode, recursive=False) @@ -533,7 +519,6 @@ def chmod(self, path, mode, recursive=False): else: self._root.put(path, params, headers=headers) - def get_home_dir(self): """get_home_dir() -> Home directory for the current user""" params = self._getparams() @@ -595,7 +580,6 @@ def read(self, path, offset, length, bufsize=None): return "" raise ex - def open(self, path, mode='r'): """ DEPRECATED! @@ -606,15 +590,12 @@ def open(self, path, mode='r'): """ return File(self, path, mode) - def getDefaultFilePerms(self): return 0o666 & (0o1777 ^ self._umask) - def getDefaultDirPerms(self): return 0o1777 & (0o1777 ^ self._umask) - def create(self, path, overwrite=False, blocksize=None, replication=None, permission=None, data=None): """ create(path, overwrite=False, blocksize=None, replication=None, permission=None) @@ -636,7 +617,6 @@ def create(self, path, overwrite=False, blocksize=None, replication=None, permis headers = self._getheaders() self._invoke_with_redirect('PUT', path, params, data, headers) - def append(self, path, data): """ append(path, data) @@ -649,7 +629,6 @@ def append(self, path, data): headers = self._getheaders() self._invoke_with_redirect('POST', path, params, data, headers) - # e.g. ACLSPEC = user:joe:rwx,user::rw- def modify_acl_entries(self, path, aclspec): path = self.strip_normpath(path) @@ -659,7 +638,6 @@ def modify_acl_entries(self, path, aclspec): headers = self._getheaders() return self._root.put(path, params, headers=headers) - def remove_acl_entries(self, path, aclspec): path = self.strip_normpath(path) params = self._getparams() @@ -668,7 +646,6 @@ def remove_acl_entries(self, path, aclspec): headers = self._getheaders() return self._root.put(path, params, headers=headers) - def remove_default_acl(self, path): path = self.strip_normpath(path) params = self._getparams() @@ -676,7 +653,6 @@ def remove_default_acl(self, path): headers = self._getheaders() return self._root.put(path, params, headers=headers) - def remove_acl(self, path): path = self.strip_normpath(path) params = self._getparams() @@ -684,7 +660,6 @@ def remove_acl(self, path): headers = self._getheaders() return self._root.put(path, params, headers=headers) - def set_acl(self, path, aclspec): path = self.strip_normpath(path) params = self._getparams() @@ -693,7 +668,6 @@ def set_acl(self, path, aclspec): headers = self._getheaders() return self._root.put(path, params, headers=headers) - def get_acl_status(self, path): path = self.strip_normpath(path) params = self._getparams() @@ -701,7 +675,6 @@ def get_acl_status(self, path): headers = self._getheaders() return self._root.get(path, params, headers=headers) - def check_access(self, path, aclspec='rw-'): path = self.strip_normpath(path) params = self._getparams() @@ -758,7 +731,6 @@ def copyfile(self, src, dst, skip_header=False): offset += cnt - def copy_remote_dir(self, source, destination, dir_mode=None, owner=None): if owner is None: owner = self.DEFAULT_USER @@ -777,7 +749,6 @@ def copy_remote_dir(self, source, destination, dir_mode=None, owner=None): else: self.do_as_user(owner, self.copyfile, source_file, destination_file) - def copy(self, src, dest, recursive=False, dir_mode=None, owner=None): """ Copy file, or directory, in HDFS to another location in HDFS. @@ -840,16 +811,13 @@ def copy(self, src, dest, recursive=False, dir_mode=None, owner=None): else: self.copyfile(src, dest) - @staticmethod def urlsplit(url): return Hdfs.urlsplit(url) - def get_hdfs_path(self, path): return posixpath.join(self.fs_defaultfs, path.lstrip('/')) - def _invoke_with_redirect(self, method, path, params=None, data=None, headers=None): """ Issue a request, and expect a redirect, and then submit the data to @@ -879,7 +847,6 @@ def _invoke_with_redirect(self, method, path, params=None, data=None, headers=No headers["Content-Type"] = 'application/octet-stream' return resource.Resource(client).invoke(method, data=data, headers=headers) - def _get_redirect_url(self, webhdfs_ex): """Retrieve the redirect url from an exception object""" try: @@ -909,7 +876,6 @@ def get_delegation_token(self, renewer): res = self._root.get(params=params, headers=headers) return res['Token'] and res['Token']['urlString'] - def do_as_user(self, username, fn, *args, **kwargs): prev_user = self.user try: @@ -918,11 +884,9 @@ def do_as_user(self, username, fn, *args, **kwargs): finally: self.setuser(prev_user) - def do_as_superuser(self, fn, *args, **kwargs): return self.do_as_user(self.superuser, fn, *args, **kwargs) - def do_recursively(self, fn, path, *args, **kwargs): for stat in self.listdir_stats(path): try: @@ -1021,7 +985,7 @@ def safe_octal(octal_value): This correctly handles octal values specified as a string or as a numeric. """ try: - return oct(octal_value).replace('o', '') # fix futurized octal value with 0o prefix + return oct(octal_value).replace('o', '') # fix futurized octal value with 0o prefix except TypeError: return str(octal_value).replace('o', '') diff --git a/desktop/libs/hadoop/src/hadoop/mini_cluster.py b/desktop/libs/hadoop/src/hadoop/mini_cluster.py index 4ca7e385512..7f0f723fce7 100644 --- a/desktop/libs/hadoop/src/hadoop/mini_cluster.py +++ b/desktop/libs/hadoop/src/hadoop/mini_cluster.py @@ -17,15 +17,15 @@ ####################################################### -## WARNING!!! ## -## This file is stale. Hadoop 0.23 and CDH4 ## -## do not support minicluster. This is replaced ## -## by webhdfs.py, to set up a running cluster. ## +# WARNING!!! ## +# This file is stale. Hadoop 0.23 and CDH4 ## +# do not support minicluster. This is replaced ## +# by webhdfs.py, to set up a running cluster. ## ####################################################### # A Python-side driver for MiniHadoopClusterManager -# +# # See README.testing for hints on how to use this, # and also look for other examples. # @@ -37,64 +37,51 @@ # echo "GET /" | nc -w 1 localhost $p # done -from __future__ import print_function -from future import standard_library -standard_library.install_aliases() -from builtins import object -import atexit -import subprocess import os import pwd -import logging import sys -import signal -import shutil +import json import time +import atexit +import shutil +import signal +import logging import tempfile -import json +import subprocess +from urllib.error import URLError as lib_URLError +from urllib.request import Request as lib_Request, urlopen as lib_urlopen + import lxml.etree +import hadoop.cluster from desktop.lib import python_util from desktop.lib.test_utils import clear_sys_caches, restore_sys_caches -import hadoop.cluster - -if sys.version_info[0] > 2: - from urllib.request import Request as lib_Request - from urllib.error import URLError as lib_URLError - from urllib.request import urlopen as lib_urlopen - open_file = open -else: - from urllib2 import Request as lib_Request - from urllib2 import URLError as lib_URLError - from urllib2 import urlopen as lib_urlopen - open_file = file - # Starts mini cluster suspended until a debugger attaches to it. -DEBUG_HADOOP=False +DEBUG_HADOOP = False # Redirects mini cluster stderr to stderr. (Default is to put it in a file.) -USE_STDERR=os.environ.get("MINI_CLUSTER_USE_STDERR", False) +USE_STDERR = os.environ.get("MINI_CLUSTER_USE_STDERR", False) # Whether to clean up temp dir at exit -CLEANUP_TMP_DIR=os.environ.get("MINI_CLUSTER_CLEANUP", True) +CLEANUP_TMP_DIR = os.environ.get("MINI_CLUSTER_CLEANUP", True) # How long to wait for cluster to start up. (seconds) MAX_CLUSTER_STARTUP_TIME = 120.0 # List of classes to be used as plugins for the JT of the cluster. CLUSTER_JT_PLUGINS = 'org.apache.hadoop.thriftfs.ThriftJobTrackerPlugin' # MR Task Scheduler. By default use the FIFO scheduler -CLUSTER_TASK_SCHEDULER='org.apache.hadoop.mapred.JobQueueTaskScheduler' +CLUSTER_TASK_SCHEDULER = 'org.apache.hadoop.mapred.JobQueueTaskScheduler' # MR queue names -CLUSTER_QUEUE_NAMES='default' +CLUSTER_QUEUE_NAMES = 'default' -STARTUP_CONFIGS={} +STARTUP_CONFIGS = {} # users and their groups which are used in Hue tests. TEST_USER_GROUP_MAPPING = { - 'test': ['test','users','supergroup'], 'chown_test': ['chown_test'], + 'test': ['test', 'users', 'supergroup'], 'chown_test': ['chown_test'], 'notsuperuser': ['notsuperuser'], 'gamma': ['gamma'], 'webui': ['webui'], 'hue': ['supergroup'] } -LOGGER=logging.getLogger() +LOGGER = logging.getLogger() class MiniHadoopCluster(object): @@ -126,7 +113,7 @@ def tmppath(filename): os.mkdir(in_conf_dir) self.log_dir = tmppath("logs") os.mkdir(self.log_dir) - f = open_file(os.path.join(in_conf_dir, "hadoop-metrics.properties"), "w") + f = open(os.path.join(in_conf_dir, "hadoop-metrics.properties"), "w") try: f.write(""" dfs.class=org.apache.hadoop.metrics.spi.NoEmitMetricsContext @@ -155,15 +142,26 @@ def tmppath(filename): 'mapred.queue.names': CLUSTER_QUEUE_NAMES}, tmppath('in-conf/mapred-site.xml')) - hadoop_policy_keys = ['client', 'client.datanode', 'datanode', 'inter.datanode', 'namenode', 'inter.tracker', 'job.submission', 'task.umbilical', 'refresh.policy', 'admin.operations'] + hadoop_policy_keys = [ + 'client', + 'client.datanode', + 'datanode', + 'inter.datanode', + 'namenode', + 'inter.tracker', + 'job.submission', + 'task.umbilical', + 'refresh.policy', + 'admin.operations', + ] hadoop_policy_config = {} for policy in hadoop_policy_keys: hadoop_policy_config['security.' + policy + '.protocol.acl'] = '*' write_config(hadoop_policy_config, tmppath('in-conf/hadoop-policy.xml')) - details_file = open_file(tmppath("details.json"), "w+") + details_file = open(tmppath("details.json"), "w+") try: - args = [ os.path.join(hadoop.conf.HADOOP_MR1_HOME.get(), 'bin', 'hadoop'), + args = [os.path.join(hadoop.conf.HADOOP_MR1_HOME.get(), 'bin', 'hadoop'), "jar", hadoop.conf.HADOOP_TEST_JAR.get(), "minicluster", @@ -193,7 +191,7 @@ def tmppath(filename): "-D", "hadoop.policy.file=%s/hadoop-policy.xml" % in_conf_dir, ] - for key,value in extra_configs.items(): + for key, value in extra_configs.items(): args.append("-D") args.append(key + "=" + value) @@ -229,13 +227,13 @@ def tmppath(filename): env["HADOOP_OPTS"] = env.get("HADOOP_OPTS", "") + " -Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=9999" if USE_STDERR: - stderr=sys.stderr + stderr = sys.stderr else: - stderr=open_file(tmppath("stderr"), "w") + stderr = open(tmppath("stderr"), "w") LOGGER.debug("Starting minicluster: %s env: %s" % (repr(args), repr(env))) self.clusterproc = subprocess.Popen( args=args, - stdout=open_file(tmppath("stdout"), "w"), + stdout=open(tmppath("stdout"), "w"), stderr=stderr, env=env) @@ -251,9 +249,9 @@ def tmppath(filename): except ValueError: pass if self.clusterproc.poll() is not None or (not DEBUG_HADOOP and (time.time() - start) > MAX_CLUSTER_STARTUP_TIME): - LOGGER.debug("stdout:" + open_file(tmppath("stdout")).read()) + LOGGER.debug("stdout:" + open(tmppath("stdout")).read()) if not USE_STDERR: - LOGGER.debug("stderr:" + open_file(tmppath("stderr")).read()) + LOGGER.debug("stderr:" + open(tmppath("stderr")).read()) self.stop() raise Exception("Cluster process quit or is taking too long to start. Aborting.") finally: @@ -267,10 +265,10 @@ def tmppath(filename): # Parse the configuration using XPath and place into self.config. config = lxml.etree.parse(tmppath("config.xml")) - self.config = dict( (property.find("./name").text, property.find("./value").text) + self.config = dict((property.find("./name").text, property.find("./value").text) for property in config.xpath("/configuration/property")) - # Write out Hadoop-style configuration directory, + # Write out Hadoop-style configuration directory, # which can, in turn, be used for /bin/hadoop. self.config_dir = tmppath("conf") os.mkdir(self.config_dir) @@ -280,11 +278,13 @@ def tmppath(filename): write_config(self.config, tmppath("conf/core-site.xml"), ["fs.defaultFS", "jobclient.completion.poll.interval", "dfs.namenode.checkpoint.period", "dfs.namenode.checkpoint.dir", - 'hadoop.proxyuser.'+self.superuser+'.groups', 'hadoop.proxyuser.'+self.superuser+'.hosts']) - write_config(self.config, tmppath("conf/hdfs-site.xml"), ["fs.defaultFS", "dfs.namenode.http-address", "dfs.namenode.secondary.http-address"]) + 'hadoop.proxyuser.' + self.superuser + '.groups', 'hadoop.proxyuser.' + self.superuser + '.hosts']) + write_config( + self.config, tmppath("conf/hdfs-site.xml"), ["fs.defaultFS", "dfs.namenode.http-address", "dfs.namenode.secondary.http-address"] + ) # mapred.job.tracker isn't written out into self.config, so we fill # that one out more manually. - write_config({ 'mapred.job.tracker': 'localhost:%d' % self.jobtracker_port }, + write_config({'mapred.job.tracker': 'localhost:%d' % self.jobtracker_port}, tmppath("conf/mapred-site.xml")) write_config(hadoop_policy_config, tmppath('conf/hadoop-policy.xml')) @@ -299,8 +299,8 @@ def tmppath(filename): self.secondary_proc = subprocess.Popen( args=args, - stdout=open_file(tmppath("stdout.2nn"), "w"), - stderr=open_file(tmppath("stderr.2nn"), "w"), + stdout=open(tmppath("stdout.2nn"), "w"), + stderr=open(tmppath("stderr.2nn"), "w"), env=env) while True: @@ -310,9 +310,9 @@ def tmppath(filename): except lib_URLError: # If we should abort startup. if self.secondary_proc.poll() is not None or (not DEBUG_HADOOP and (time.time() - start) > MAX_CLUSTER_STARTUP_TIME): - LOGGER.debug("stdout:" + open_file(tmppath("stdout")).read()) + LOGGER.debug("stdout:" + open(tmppath("stdout")).read()) if not USE_STDERR: - LOGGER.debug("stderr:" + open_file(tmppath("stderr")).read()) + LOGGER.debug("stderr:" + open(tmppath("stderr")).read()) self.stop() raise Exception("2nn process quit or is taking too long to start. Aborting.") break @@ -326,7 +326,6 @@ def tmppath(filename): LOGGER.debug("Successfully started 2NN") - def stop(self): """ Kills the cluster ungracefully. @@ -356,8 +355,8 @@ def jt(self): @property def superuser(self): """ - Returns the "superuser" of this cluster. - + Returns the "superuser" of this cluster. + This is essentially the user that the cluster was started with. """ @@ -400,6 +399,7 @@ def dump_ini(self, fd=sys.stdout): # Shared global cluster returned by shared_cluster context manager. _shared_cluster = None + def shared_cluster(conf=False): """ Use a shared cluster that is initialized on demand, @@ -412,7 +412,7 @@ def shared_cluster(conf=False): done with the shared cluster. """ cluster = shared_cluster_internal() - closers = [ ] + closers = [] if conf: closers.extend([ hadoop.conf.HDFS_CLUSTERS["default"].NN_HOST.set_for_testing("localhost"), @@ -433,18 +433,19 @@ def finish(): x() # We don't run the cluster's real stop method, - # because a shared cluster should be shutdown at + # because a shared cluster should be shutdown at # exit. cluster.shutdown = finish return cluster + def write_config(config, path, variables=None): """ Minimal utility to write Hadoop-style configuration from a configuration map (config), into a new file called path. """ - f = open_file(path, "w") + f = open(path, "w") try: f.write(""" @@ -461,18 +462,20 @@ def write_config(config, path, variables=None): finally: f.close() + def _write_static_group_mapping(user_group_mapping, path): """ Create a Java-style .properties file to contain the static user -> group mapping used by tests. """ - f = open_file(path, 'w') + f = open(path, 'w') try: for user, groups in user_group_mapping.items(): f.write('%s = %s\n' % (user, ','.join(groups))) finally: f.close() + def shared_cluster_internal(): """ Manages _shared_cluster. @@ -484,6 +487,7 @@ def shared_cluster_internal(): atexit.register(_shared_cluster.stop) return _shared_cluster + if __name__ == '__main__': """ It's poor form to write tests for tests (the world-wide stack diff --git a/desktop/libs/hadoop/src/hadoop/pseudo_hdfs4.py b/desktop/libs/hadoop/src/hadoop/pseudo_hdfs4.py index c070f92d309..f40f74aa29e 100755 --- a/desktop/libs/hadoop/src/hadoop/pseudo_hdfs4.py +++ b/desktop/libs/hadoop/src/hadoop/pseudo_hdfs4.py @@ -15,36 +15,28 @@ # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import print_function -from builtins import object -import atexit -import getpass -import logging import os +import time +import atexit import shutil import signal -import subprocess import socket -import sys +import getpass +import logging import tempfile import textwrap -import time +import subprocess +import hadoop from desktop.lib.paths import get_run_root from desktop.lib.python_util import find_unused_port from desktop.lib.test_utils import clear_sys_caches, restore_sys_caches - -import hadoop from hadoop import cluster from hadoop.mini_cluster import write_config -if sys.version_info[0] > 2: - open_file = open -else: - open_file = file - _shared_cluster = None + LOG = logging.getLogger() @@ -56,11 +48,13 @@ def is_live_cluster(): return os.environ.get('LIVE_CLUSTER', 'false').lower() == 'true' + def get_fs_prefix(fs): prefix = '/tmp/hue_tests_%s' % str(time.time()) fs.mkdir(prefix, 0o777) return prefix + def get_db_prefix(name='hive'): if is_live_cluster(): return 'hue_test_%s_%s' % (name, str(time.time()).replace('.', '')) @@ -72,7 +66,7 @@ class LiveHdfs(object): def __init__(self): self.fs = cluster.get_hdfs('default') # Assumes /tmp exists and is 1777 - self.jt = None # Deprecated + self.jt = None # Deprecated self.fs_prefix = get_fs_prefix(self.fs) LOG.info('Using %s as FS root' % self.fs_prefix) @@ -210,7 +204,6 @@ def _kill_proc(name, proc): if self.shutdown_hook is not None: self.shutdown_hook() - def _tmppath(self, filename): return os.path.join(self._tmpdir, filename) @@ -257,7 +250,7 @@ def start(self): if "JAVA_HOME" in os.environ: env['JAVA_HOME'] = os.environ['JAVA_HOME'] - LOG.debug("Hadoop Environment:\n" + "\n".join([ str(x) for x in sorted(env.items()) ])) + LOG.debug("Hadoop Environment:\n" + "\n".join([str(x) for x in sorted(env.items())])) # Format HDFS self._format(self.hadoop_conf_dir, env) @@ -305,13 +298,12 @@ def start(self): self.fs_prefix = get_fs_prefix(self.fs) - def _start_mr2(self, env): LOG.info("Starting MR2") self._mr2_env = env.copy() - LOG.debug("MR2 Environment:\n" + "\n".join([ str(x) for x in sorted(self.mr2_env.items()) ])) + LOG.debug("MR2 Environment:\n" + "\n".join([str(x) for x in sorted(self.mr2_env.items())])) # Run YARN self._rm_proc = self._start_daemon('resourcemanager', self.hadoop_conf_dir, self.mr2_env, self._get_yarn_bin(self.mr2_env)) @@ -347,8 +339,8 @@ def _format(self, conf_dir, env): def _log_exit(self, proc_name, exit_code): LOG.info('%s exited with %s' % (proc_name, exit_code)) - LOG.debug('--------------------- STDOUT:\n' + open_file(self._logpath(proc_name + '.stdout')).read()) - LOG.debug('--------------------- STDERR:\n' + open_file(self._logpath(proc_name + '.stderr')).read()) + LOG.debug('--------------------- STDOUT:\n' + open(self._logpath(proc_name + '.stdout')).read()) + LOG.debug('--------------------- STDERR:\n' + open(self._logpath(proc_name + '.stderr')).read()) def _is_hdfs_ready(self, env): if self._nn_proc.poll() is not None: @@ -376,7 +368,6 @@ def _is_hdfs_ready(self, env): LOG.debug('Waiting for DN to come up .................\n%s' % (report_out,)) return False - def _is_mr2_ready(self, env): if self._rm_proc.poll() is not None: self._log_exit('resourcemanager', self._rm_proc.poll()) @@ -388,7 +379,6 @@ def _is_mr2_ready(self, env): self._log_exit('historyserver', self._hs_proc.poll()) return False - # Run a `hadoop job -list all' list_all = subprocess.Popen( (self._get_mapred_bin(env), 'job', '-list', 'all'), @@ -403,7 +393,6 @@ def _is_mr2_ready(self, env): LOG.debug('MR2 not ready yet.\n%s\n%s' % (list_all.stderr.read(), list_all.stderr.read())) return False - def _start_daemon(self, proc_name, conf_dir, env, hadoop_bin=None): if hadoop_bin is None: hadoop_bin = self._get_hadoop_bin(env) @@ -411,8 +400,8 @@ def _start_daemon(self, proc_name, conf_dir, env, hadoop_bin=None): args = (hadoop_bin, '--config', conf_dir, proc_name) LOG.info('Starting Hadoop cluster daemon: %s' % (args,)) - stdout = open_file(self._logpath(proc_name + ".stdout"), 'w') - stderr = open_file(self._logpath(proc_name + ".stderr"), 'w') + stdout = open(self._logpath(proc_name + ".stdout"), 'w') + stderr = open(self._logpath(proc_name + ".stderr"), 'w') return subprocess.Popen(args=args, stdout=stdout, stderr=stderr, env=env) @@ -450,7 +439,7 @@ def _write_hdfs_site(self): 'dfs.namenode.safemode.extension': 1, 'dfs.namenode.safemode.threshold-pct': 0, 'dfs.datanode.address': '%s:0' % self._fqdn, - 'dfs.datanode.http.address': '0.0.0.0:0', # Work around webhdfs redirect bug -- bind to all interfaces + 'dfs.datanode.http.address': '0.0.0.0:0', # Work around webhdfs redirect bug -- bind to all interfaces 'dfs.datanode.ipc.address': '%s:0' % self._fqdn, 'dfs.replication': 1, 'dfs.safemode.min.datanodes': 1, @@ -503,7 +492,7 @@ def _write_yarn_site(self): 'yarn.nodemanager.local-dirs': self._local_dir, 'yarn.nodemanager.log-dirs': self._logpath('yarn-logs'), 'yarn.nodemanager.remote-app-log-dir': '/var/log/hadoop-yarn/apps', - 'yarn.nodemanager.localizer.address' : '%s:%s' % (self._fqdn, self._nm_port,), + 'yarn.nodemanager.localizer.address': '%s:%s' % (self._fqdn, self._nm_port,), 'yarn.nodemanager.aux-services': 'mapreduce_shuffle', 'yarn.nodemanager.aux-services.mapreduce.shuffle.class': 'org.apache.hadoop.mapred.ShuffleHandler', 'yarn.nodemanager.webapp.address': '%s:%s' % (self._fqdn, self._nm_webapp_port,), @@ -520,7 +509,6 @@ def _write_yarn_site(self): self._yarn_site = self._tmppath('conf/yarn-site.xml') write_config(yarn_configs, self._tmppath('conf/yarn-site.xml')) - def _write_mapred_site(self): self._jh_port = find_unused_port() self._jh_web_port = find_unused_port() @@ -538,7 +526,7 @@ def _write_mapred_site(self): write_config(mapred_configs, self._tmppath('conf/mapred-site.xml')) def _write_hadoop_metrics_conf(self, conf_dir): - f = open_file(os.path.join(conf_dir, "hadoop-metrics.properties"), "w") + f = open(os.path.join(conf_dir, "hadoop-metrics.properties"), "w") try: f.write(textwrap.dedent(""" dfs.class=org.apache.hadoop.metrics.spi.NoEmitMetricsContext @@ -568,13 +556,29 @@ def shared_cluster(): closers = [ hadoop.conf.HDFS_CLUSTERS['default'].FS_DEFAULTFS.set_for_testing(cluster.fs_default_name), hadoop.conf.HDFS_CLUSTERS['default'].WEBHDFS_URL.set_for_testing(webhdfs_url), - hadoop.conf.YARN_CLUSTERS['default'].HOST.set_for_testing(fqdn), hadoop.conf.YARN_CLUSTERS['default'].PORT.set_for_testing(cluster._rm_port), - - hadoop.conf.YARN_CLUSTERS['default'].RESOURCE_MANAGER_API_URL.set_for_testing('http://%s:%s' % (cluster._fqdn, cluster._rm_webapp_port,)), - hadoop.conf.YARN_CLUSTERS['default'].PROXY_API_URL.set_for_testing('http://%s:%s' % (cluster._fqdn, cluster._rm_webapp_port,)), - hadoop.conf.YARN_CLUSTERS['default'].HISTORY_SERVER_API_URL.set_for_testing('%s:%s' % (cluster._fqdn, cluster._jh_web_port,)), + hadoop.conf.YARN_CLUSTERS['default'].RESOURCE_MANAGER_API_URL.set_for_testing( + 'http://%s:%s' + % ( + cluster._fqdn, + cluster._rm_webapp_port, + ) + ), + hadoop.conf.YARN_CLUSTERS['default'].PROXY_API_URL.set_for_testing( + 'http://%s:%s' + % ( + cluster._fqdn, + cluster._rm_webapp_port, + ) + ), + hadoop.conf.YARN_CLUSTERS['default'].HISTORY_SERVER_API_URL.set_for_testing( + '%s:%s' + % ( + cluster._fqdn, + cluster._jh_web_port, + ) + ), ] old_caches = clear_sys_caches() @@ -591,7 +595,6 @@ def restore_config(): return _shared_cluster - """ Manual start from the Hue shell. @@ -604,6 +607,8 @@ def restore_config(): > exit() # To shutdown cleanly """ + + def main(): logging.basicConfig(level=logging.DEBUG) diff --git a/desktop/libs/hadoop/src/hadoop/ssl_client_site.py b/desktop/libs/hadoop/src/hadoop/ssl_client_site.py index 07703374dbf..d58a63ebf00 100644 --- a/desktop/libs/hadoop/src/hadoop/ssl_client_site.py +++ b/desktop/libs/hadoop/src/hadoop/ssl_client_site.py @@ -15,19 +15,12 @@ # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import absolute_import +import sys import errno import logging import os.path -import sys -from hadoop import conf -from hadoop import confparse - -if sys.version_info[0] > 2: - open_file = open -else: - open_file = file +from hadoop import conf, confparse _SSL_SITE_PATH = None # Path to ssl-client.xml _SSL_SITE_DICT = None # A dictionary of name/value config options @@ -35,6 +28,7 @@ _CNF_TRUSTORE_LOCATION = 'ssl.client.truststore.location' _CNF_TRUSTORE_PASSWORD = 'ssl.client.truststore.password' + LOG = logging.getLogger() @@ -56,7 +50,7 @@ def _parse_ssl_client_site(): for indentifier in conf.HDFS_CLUSTERS.get(): try: _SSL_SITE_PATH = os.path.join(conf.HDFS_CLUSTERS[indentifier].HADOOP_CONF_DIR.get(), 'ssl-client.xml') - data = open_file(_SSL_SITE_PATH, 'r').read() + data = open(_SSL_SITE_PATH, 'r').read() break except KeyError: data = "" @@ -75,4 +69,4 @@ def get_trustore_location(): def get_trustore_password(): - return get_conf().get(_CNF_TRUSTORE_PASSWORD) \ No newline at end of file + return get_conf().get(_CNF_TRUSTORE_PASSWORD) diff --git a/desktop/libs/hadoop/src/hadoop/test_hdfs_site.py b/desktop/libs/hadoop/src/hadoop/test_hdfs_site.py index 0d93c455b31..2b7ad65328a 100644 --- a/desktop/libs/hadoop/src/hadoop/test_hdfs_site.py +++ b/desktop/libs/hadoop/src/hadoop/test_hdfs_site.py @@ -15,19 +15,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import absolute_import -from hadoop import conf -import logging import os -import sys +import logging import tempfile -from hadoop import hdfs_site - -if sys.version_info[0] > 2: - open_file = open -else: - open_file = file +from hadoop import conf, hdfs_site LOG = logging.getLogger() @@ -51,7 +43,7 @@ def test_hdfs_site(): """ - open_file(os.path.join(hadoop_home, 'hdfs-site.xml'), 'w').write(xml) + open(os.path.join(hadoop_home, 'hdfs-site.xml'), 'w').write(xml) finish = conf.HDFS_CLUSTERS['default'].HADOOP_CONF_DIR.set_for_testing(hadoop_home) hdfs_site.reset() diff --git a/desktop/libs/hadoop/src/hadoop/test_ssl_client_site.py b/desktop/libs/hadoop/src/hadoop/test_ssl_client_site.py index 20e48335bcc..408f22f31ad 100644 --- a/desktop/libs/hadoop/src/hadoop/test_ssl_client_site.py +++ b/desktop/libs/hadoop/src/hadoop/test_ssl_client_site.py @@ -15,19 +15,12 @@ # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import absolute_import -from hadoop import conf -import logging + import os -import sys +import logging import tempfile -from hadoop import ssl_client_site - -if sys.version_info[0] > 2: - open_file = open -else: - open_file = file +from hadoop import conf, ssl_client_site LOG = logging.getLogger() @@ -59,7 +52,7 @@ def test_ssl_client_site(): """ - open_file(os.path.join(hadoop_home, 'ssl-client.xml'), 'w').write(xml) + open(os.path.join(hadoop_home, 'ssl-client.xml'), 'w').write(xml) finish = conf.HDFS_CLUSTERS['default'].HADOOP_CONF_DIR.set_for_testing(hadoop_home) ssl_client_site.reset() diff --git a/desktop/libs/hadoop/src/hadoop/tests.py b/desktop/libs/hadoop/src/hadoop/tests.py index d83a2060aee..02195085bbc 100644 --- a/desktop/libs/hadoop/src/hadoop/tests.py +++ b/desktop/libs/hadoop/src/hadoop/tests.py @@ -15,27 +15,16 @@ # See the License for the specific language governing permissions and # limitations under the License. -from future import standard_library -standard_library.install_aliases() import os -import pytest -import sys +from io import BytesIO as string_io +import pytest import desktop.conf as desktop_conf - -from desktop.lib.test_utils import clear_sys_caches, restore_sys_caches from desktop.lib.django_test_util import make_logged_in_client +from desktop.lib.test_utils import clear_sys_caches, restore_sys_caches +from hadoop import cluster, conf, confparse, pseudo_hdfs4 -from hadoop import cluster -from hadoop import conf -from hadoop import confparse -from hadoop import pseudo_hdfs4 - -if sys.version_info[0] > 2: - from io import BytesIO as string_io -else: - from cStringIO import StringIO as string_io def test_confparse(): data = """ @@ -69,9 +58,9 @@ def test_confparse(): assert cp['fs.default.name'] == 'hdfs://localhost:8020' assert cp.get('with_description') == 'bar' assert cp.get('not_in_xml', 'abc') == 'abc' - assert cp.getbool('boolean_true') == True - assert cp.getbool('boolean_false') == False - assert cp.getbool('not_in_xml', True) == True + assert cp.getbool('boolean_true') is True + assert cp.getbool('boolean_false') is False + assert cp.getbool('not_in_xml', True) is True try: cp['bogus'] @@ -82,14 +71,13 @@ def test_confparse(): cp_empty = confparse.ConfParse("") assert cp_empty.get('whatever', 'yes') == 'yes' + def test_tricky_confparse(): """ We found (experimentally) that dealing with a file sometimes triggered the wrong results here. """ - cp_data = confparse.ConfParse(open(os.path.join(os.path.dirname(__file__), - "test_data", - "sample_conf.xml"), 'rb')) + cp_data = confparse.ConfParse(open(os.path.join(os.path.dirname(__file__), "test_data", "sample_conf.xml"), 'rb')) assert "org.apache.hadoop.examples.SleepJob" == cp_data["mapred.mapper.class"] @@ -128,10 +116,10 @@ def test_config_validator_more(): try: resp = cli.get('/debug/check_config') - assert not 'Failed to access filesystem root' in resp.content - assert not 'Failed to create' in resp.content - assert not 'Failed to chown' in resp.content - assert not 'Failed to delete' in resp.content + assert 'Failed to access filesystem root' not in resp.content + assert 'Failed to create' not in resp.content + assert 'Failed to chown' not in resp.content + assert 'Failed to delete' not in resp.content finally: restore_sys_caches(old_caches) @@ -141,8 +129,8 @@ def test_non_default_cluster(): NON_DEFAULT_NAME = 'non_default' old_caches = clear_sys_caches() reset = ( - conf.HDFS_CLUSTERS.set_for_testing({ NON_DEFAULT_NAME: { } }), - conf.MR_CLUSTERS.set_for_testing({ NON_DEFAULT_NAME: { } }), + conf.HDFS_CLUSTERS.set_for_testing({NON_DEFAULT_NAME: {}}), + conf.MR_CLUSTERS.set_for_testing({NON_DEFAULT_NAME: {}}), ) try: # This is indeed the only hdfs/mr cluster @@ -160,25 +148,28 @@ def test_non_default_cluster(): def test_hdfs_ssl_validate(): for desktop_kwargs, conf_kwargs, expected in [ - ({'present': False}, {'present': False}, True), - ({'present': False}, {'data': False}, False), - ({'present': False}, {'data': True}, True), - - ({'data': False}, {'present': False}, False), - ({'data': False}, {'data': False}, False), - ({'data': False}, {'data': True}, True), - - ({'data': True}, {'present': False}, True), - ({'data': True}, {'data': False}, False), - ({'data': True}, {'data': True}, True), - ]: + ({'present': False}, {'present': False}, True), + ({'present': False}, {'data': False}, False), + ({'present': False}, {'data': True}, True), + ({'data': False}, {'present': False}, False), + ({'data': False}, {'data': False}, False), + ({'data': False}, {'data': True}, True), + ({'data': True}, {'present': False}, True), + ({'data': True}, {'data': False}, False), + ({'data': True}, {'data': True}, True), + ]: resets = [ desktop_conf.SSL_VALIDATE.set_for_testing(**desktop_kwargs), conf.HDFS_CLUSTERS['default'].SSL_CERT_CA_VERIFY.set_for_testing(**conf_kwargs), ] try: - assert conf.HDFS_CLUSTERS['default'].SSL_CERT_CA_VERIFY.get() == expected, 'desktop:%s conf:%s expected:%s got:%s' % (desktop_kwargs, conf_kwargs, expected, conf.HDFS_CLUSTERS['default'].SSL_CERT_CA_VERIFY.get()) + assert conf.HDFS_CLUSTERS['default'].SSL_CERT_CA_VERIFY.get() == expected, 'desktop:%s conf:%s expected:%s got:%s' % ( + desktop_kwargs, + conf_kwargs, + expected, + conf.HDFS_CLUSTERS['default'].SSL_CERT_CA_VERIFY.get(), + ) finally: for reset in resets: reset() @@ -186,18 +177,16 @@ def test_hdfs_ssl_validate(): def test_yarn_ssl_validate(): for desktop_kwargs, conf_kwargs, expected in [ - ({'present': False}, {'present': False}, True), - ({'present': False}, {'data': False}, False), - ({'present': False}, {'data': True}, True), - - ({'data': False}, {'present': False}, False), - ({'data': False}, {'data': False}, False), - ({'data': False}, {'data': True}, True), - - ({'data': True}, {'present': False}, True), - ({'data': True}, {'data': False}, False), - ({'data': True}, {'data': True}, True), - ]: + ({'present': False}, {'present': False}, True), + ({'present': False}, {'data': False}, False), + ({'present': False}, {'data': True}, True), + ({'data': False}, {'present': False}, False), + ({'data': False}, {'data': False}, False), + ({'data': False}, {'data': True}, True), + ({'data': True}, {'present': False}, True), + ({'data': True}, {'data': False}, False), + ({'data': True}, {'data': True}, True), + ]: resets = [ conf.YARN_CLUSTERS.set_for_testing({'default': {}}), desktop_conf.SSL_VALIDATE.set_for_testing(**desktop_kwargs), @@ -205,7 +194,12 @@ def test_yarn_ssl_validate(): ] try: - assert conf.YARN_CLUSTERS['default'].SSL_CERT_CA_VERIFY.get() == expected, 'desktop:%s conf:%s expected:%s got:%s' % (desktop_kwargs, conf_kwargs, expected, conf.YARN_CLUSTERS['default'].SSL_CERT_CA_VERIFY.get()) + assert conf.YARN_CLUSTERS['default'].SSL_CERT_CA_VERIFY.get() == expected, 'desktop:%s conf:%s expected:%s got:%s' % ( + desktop_kwargs, + conf_kwargs, + expected, + conf.YARN_CLUSTERS['default'].SSL_CERT_CA_VERIFY.get(), + ) finally: for reset in resets: reset() diff --git a/desktop/libs/hadoop/src/hadoop/yarn/clients.py b/desktop/libs/hadoop/src/hadoop/yarn/clients.py index d70f4809f7f..4fab52c676e 100644 --- a/desktop/libs/hadoop/src/hadoop/yarn/clients.py +++ b/desktop/libs/hadoop/src/hadoop/yarn/clients.py @@ -15,25 +15,16 @@ # See the License for the specific language governing permissions and # limitations under the License. -from future import standard_library -standard_library.install_aliases() -from builtins import next +import time +import heapq import logging -import sys import threading -import time import urllib.parse -import heapq +from urllib.parse import urlsplit as lib_urlsplit from desktop.lib.rest.http_client import HttpClient - from hadoop import cluster -if sys.version_info[0] > 2: - from urllib.parse import urlsplit as lib_urlsplit -else: - from urlparse import urlsplit as lib_urlsplit - LOG = logging.getLogger() MAX_HEAP_SIZE = 20 diff --git a/desktop/libs/hadoop/src/hadoop/yarn/resource_manager_api.py b/desktop/libs/hadoop/src/hadoop/yarn/resource_manager_api.py index 15dc97020fe..442074402ec 100644 --- a/desktop/libs/hadoop/src/hadoop/yarn/resource_manager_api.py +++ b/desktop/libs/hadoop/src/hadoop/yarn/resource_manager_api.py @@ -15,27 +15,22 @@ # See the License for the specific language governing permissions and # limitations under the License. -from builtins import object +import sys import json import logging import posixpath -import sys import threading +from builtins import object + +from django.utils.translation import gettext as _ from desktop.conf import DEFAULT_USER from desktop.lib.exceptions_renderable import PopupException from desktop.lib.i18n import smart_str from desktop.lib.rest.http_client import HttpClient from desktop.lib.rest.resource import Resource - from hadoop import cluster -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - - LOG = logging.getLogger() _API_VERSION = 'v1' @@ -55,11 +50,13 @@ def get_resource_manager(username=None): yarn_cluster = cluster.get_cluster_conf_for_job_submission() if yarn_cluster is None: raise PopupException(_('No Resource Manager are available.')) - API_CACHE = ResourceManagerApi(yarn_cluster.RESOURCE_MANAGER_API_URL.get(), yarn_cluster.SECURITY_ENABLED.get(), yarn_cluster.SSL_CERT_CA_VERIFY.get()) + API_CACHE = ResourceManagerApi( + yarn_cluster.RESOURCE_MANAGER_API_URL.get(), yarn_cluster.SECURITY_ENABLED.get(), yarn_cluster.SSL_CERT_CA_VERIFY.get() + ) finally: API_CACHE_LOCK.release() - API_CACHE.setuser(username) # Set the correct user + API_CACHE.setuser(username) # Set the correct user return API_CACHE @@ -71,7 +68,7 @@ def __init__(self, rm_url, security_enabled=False, ssl_cert_ca_verify=False): self._client = HttpClient(self._url, logger=LOG) self._root = Resource(self._client) self._security_enabled = security_enabled - self._thread_local = threading.local() # To store user info + self._thread_local = threading.local() # To store user info self.from_failover = False if self._security_enabled: @@ -82,7 +79,7 @@ def __init__(self, rm_url, security_enabled=False, ssl_cert_ca_verify=False): def _get_params(self): params = {} - if self.username != DEFAULT_USER.get(): # We impersonate if needed + if self.username != DEFAULT_USER.get(): # We impersonate if needed params['doAs'] = self.username if not self.security_enabled: params['user.name'] = DEFAULT_USER.get() @@ -99,7 +96,7 @@ def setuser(self, user): @property def user(self): - return self.username # Backward compatibility + return self.username # Backward compatibility @property def username(self): @@ -127,11 +124,15 @@ def apps(self, **kwargs): def app(self, app_id): params = self._get_params() - return self._execute(self._root.get, 'cluster/apps/%(app_id)s' % {'app_id': app_id}, params=params, headers={'Accept': _JSON_CONTENT_TYPE}) + return self._execute( + self._root.get, 'cluster/apps/%(app_id)s' % {'app_id': app_id}, params=params, headers={'Accept': _JSON_CONTENT_TYPE} + ) def appattempts(self, app_id): params = self._get_params() - return self._execute(self._root.get, 'cluster/apps/%(app_id)s/appattempts' % {'app_id': app_id}, params=params, headers={'Accept': _JSON_CONTENT_TYPE}) + return self._execute( + self._root.get, 'cluster/apps/%(app_id)s/appattempts' % {'app_id': app_id}, params=params, headers={'Accept': _JSON_CONTENT_TYPE} + ) def appattempts_attempt(self, app_id, attempt_id): attempts = self.appattempts(app_id) @@ -154,7 +155,13 @@ def kill(self, app_id): try: params = self._get_params() - return self._execute(self._root.put, 'cluster/apps/%(app_id)s/state' % {'app_id': app_id}, params=params, data=json.dumps(data), contenttype=_JSON_CONTENT_TYPE) + return self._execute( + self._root.put, + 'cluster/apps/%(app_id)s/state' % {'app_id': app_id}, + params=params, + data=json.dumps(data), + contenttype=_JSON_CONTENT_TYPE, + ) finally: if token: self.cancel_token(token) diff --git a/desktop/libs/hadoop/src/hadoop/yarn/spark_history_server_api.py b/desktop/libs/hadoop/src/hadoop/yarn/spark_history_server_api.py index 6b7fcb23be3..fdb4d6ab669 100644 --- a/desktop/libs/hadoop/src/hadoop/yarn/spark_history_server_api.py +++ b/desktop/libs/hadoop/src/hadoop/yarn/spark_history_server_api.py @@ -15,15 +15,14 @@ # See the License for the specific language governing permissions and # limitations under the License. -from future import standard_library -standard_library.install_aliases() -from builtins import object -import json import logging import posixpath -import sys import threading import urllib.parse +from urllib.parse import urlsplit as lib_urlsplit + +from django.utils.translation import gettext as _ +from lxml import html from desktop.lib.exceptions_renderable import PopupException from desktop.lib.rest.http_client import HttpClient @@ -31,15 +30,6 @@ from hadoop import cluster from hadoop.yarn.clients import get_log_client -from lxml import html - -if sys.version_info[0] > 2: - from urllib.parse import urlsplit as lib_urlsplit - from django.utils.translation import gettext as _ -else: - from urlparse import urlsplit as lib_urlsplit - from django.utils.translation import ugettext as _ - LOG = logging.getLogger() _API_VERSION = 'v1' @@ -60,7 +50,11 @@ def get_history_server_api(): yarn_cluster = cluster.get_cluster_conf_for_job_submission() if yarn_cluster is None: raise PopupException(_('No Spark History Server is available.')) - API_CACHE = SparkHistoryServerApi(yarn_cluster.SPARK_HISTORY_SERVER_URL.get(), yarn_cluster.SPARK_HISTORY_SERVER_SECURITY_ENABLED.get(), yarn_cluster.SSL_CERT_CA_VERIFY.get()) + API_CACHE = SparkHistoryServerApi( + yarn_cluster.SPARK_HISTORY_SERVER_URL.get(), + yarn_cluster.SPARK_HISTORY_SERVER_SECURITY_ENABLED.get(), + yarn_cluster.SSL_CERT_CA_VERIFY.get(), + ) finally: API_CACHE_LOCK.release() @@ -120,13 +114,25 @@ def stage_attempts(self, app_id, stage_id): return self._root.get('applications/%(app_id)s/stages/%(stage_id)s' % {'app_id': app_id, 'stage_id': stage_id}, headers=self.headers) def stage_attempt(self, app_id, stage_id, stage_attempt_id): - return self._root.get('applications/%(app_id)s/stages/%(stage_id)s/%(stage_attempt_id)s' % {'app_id': app_id, 'stage_id': stage_id, 'stage_attempt_id': stage_attempt_id}, headers=self.headers) + return self._root.get( + 'applications/%(app_id)s/stages/%(stage_id)s/%(stage_attempt_id)s' + % {'app_id': app_id, 'stage_id': stage_id, 'stage_attempt_id': stage_attempt_id}, + headers=self.headers, + ) def task_summary(self, app_id, stage_id, stage_attempt_id): - return self._root.get('applications/%(app_id)s/stages/%(stage_id)s/%(stage_attempt_id)s/taskSummary' % {'app_id': app_id, 'stage_id': stage_id, 'stage_attempt_id': stage_attempt_id}, headers=self.headers) + return self._root.get( + 'applications/%(app_id)s/stages/%(stage_id)s/%(stage_attempt_id)s/taskSummary' + % {'app_id': app_id, 'stage_id': stage_id, 'stage_attempt_id': stage_attempt_id}, + headers=self.headers, + ) def task_list(self, app_id, stage_id, stage_attempt_id): - return self._root.get('applications/%(app_id)s/stages/%(stage_id)s/%(stage_attempt_id)s/taskList' % {'app_id': app_id, 'stage_id': stage_id, 'stage_attempt_id': stage_attempt_id}, headers=self.headers) + return self._root.get( + 'applications/%(app_id)s/stages/%(stage_id)s/%(stage_attempt_id)s/taskList' + % {'app_id': app_id, 'stage_id': stage_id, 'stage_attempt_id': stage_attempt_id}, + headers=self.headers, + ) def storages(self, app_id): return self._root.get('applications/%(app_id)s/storage/rdd' % {'app_id': app_id}, headers=self.headers) @@ -138,7 +144,9 @@ def download_logs(self, app_id): return self._root.get('applications/%(app_id)s/logs' % {'app_id': app_id}, headers=self.headers) def download_attempt_logs(self, app_id, attempt_id): - return self._root.get('applications/%(app_id)s/%(attempt_id)s/logs' % {'app_id': app_id, 'attempt_id': attempt_id}, headers=self.headers) + return self._root.get( + 'applications/%(app_id)s/%(attempt_id)s/logs' % {'app_id': app_id, 'attempt_id': attempt_id}, headers=self.headers + ) def download_executors_logs(self, request, job, name, offset): log_links = self.get_executors_loglinks(job) diff --git a/desktop/libs/indexer/src/indexer/api.py b/desktop/libs/indexer/src/indexer/api.py index f997d711c92..b060c705215 100644 --- a/desktop/libs/indexer/src/indexer/api.py +++ b/desktop/libs/indexer/src/indexer/api.py @@ -15,26 +15,20 @@ # See the License for the specific language governing permissions and # limitations under the License. -from builtins import next -from builtins import zip -import itertools -import json -import logging import re import sys +import json +import logging +import itertools +from builtins import next, zip + +from django.utils.translation import gettext as _ from desktop.lib.django_util import JsonResponse from desktop.lib.exceptions_renderable import PopupException - from indexer.controller import CollectionManagerController from indexer.solr_client import SolrClient -from indexer.utils import fields_from_log, field_values_from_separated_file, get_type_from_morphline_type, get_field_types - -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - +from indexer.utils import field_values_from_separated_file, fields_from_log, get_field_types, get_type_from_morphline_type LOG = logging.getLogger() @@ -70,7 +64,7 @@ def parse_fields(request): result['data'] = [] for field_result in field_results: - result['data'].append( (field_result[1], get_type_from_morphline_type(field_result[0])) ) + result['data'].append((field_result[1], get_type_from_morphline_type(field_result[0]))) result['status'] = 0 else: @@ -93,6 +87,7 @@ def parse_fields(request): return JsonResponse(result) + def autocomplete(request): searcher = CollectionManagerController(request.user) autocomplete = searcher.get_autocomplete() @@ -167,7 +162,7 @@ def collections_create(request): table = request.POST.get('table') columns = [field['name'] for field in collection.get('fields', [])] - searcher.update_data_from_hive(db, collection.get('name'), database, table, columns) # Not up to date + searcher.update_data_from_hive(db, collection.get('name'), database, table, columns) # Not up to date response['status'] = 0 response['message'] = _('Collection created!') @@ -193,7 +188,9 @@ def collections_import(request): unique_key, fields = searcher.get_fields(collection.get('name')) # Create collection and metadata. - hue_collection, created = Collection.objects.get_or_create(name=collection.get('name'), solr_properties='{}', is_enabled=True, user=request.user) + hue_collection, created = Collection.objects.get_or_create( + name=collection.get('name'), solr_properties='{}', is_enabled=True, user=request.user + ) properties_dict = hue_collection.properties_dict properties_dict['data_type'] = 'separated' properties_dict['field_order'] = [field_name for field_name in fields] @@ -207,6 +204,7 @@ def collections_import(request): return JsonResponse(response) + # Deprecated def collections_remove(request): if request.method != 'POST': @@ -244,7 +242,9 @@ def collections_fields(request, collection): unique_key, fields = searcher.get_fields(collection) response['status'] = 0 - response['fields'] = [(field, fields[field]['type'], fields[field].get('indexed', None), fields[field].get('stored', None)) for field in fields] + response['fields'] = [ + (field, fields[field]['type'], fields[field].get('indexed', None), fields[field].get('stored', None)) for field in fields + ] response['unique_key'] = unique_key return JsonResponse(response) diff --git a/desktop/libs/indexer/src/indexer/api3.py b/desktop/libs/indexer/src/indexer/api3.py index 8337336d1cb..05c35928993 100644 --- a/desktop/libs/indexer/src/indexer/api3.py +++ b/desktop/libs/indexer/src/indexer/api3.py @@ -15,68 +15,55 @@ # See the License for the specific language governing permissions and # limitations under the License. -from future import standard_library -standard_library.install_aliases() - -from builtins import zip -from past.builtins import basestring +import re import csv import json +import uuid import logging -import urllib.error -import openpyxl -import re -import sys import tempfile -import uuid +import urllib.error +from builtins import zip +from io import StringIO as string_io +from urllib.parse import unquote as urllib_unquote, urlparse +import pandas as pd +import openpyxl from django.urls import reverse +from django.utils.translation import gettext as _ from django.views.decorators.http import require_POST - -LOG = logging.getLogger() - -try: - from simple_salesforce.api import Salesforce - from simple_salesforce.exceptions import SalesforceRefusedRequest -except ImportError: - LOG.warning('simple_salesforce module not found') +from past.builtins import basestring from desktop.lib.django_util import JsonResponse from desktop.lib.exceptions_renderable import PopupException -from desktop.lib.i18n import smart_unicode +from desktop.lib.i18n import smart_str from desktop.lib.python_util import check_encoding from desktop.models import Document2 from filebrowser.forms import UploadLocalFileForm -from kafka.kafka_api import get_topics, get_topic_data -from notebook.connectors.base import get_api, Notebook -from notebook.decorators import api_error_handler -from notebook.models import MockedDjangoRequest, escape_rows - from indexer.controller import CollectionManagerController -from indexer.file_format import HiveFormat from indexer.fields import Field, guess_field_type_from_samples -from indexer.indexers.envelope import _envelope_job +from indexer.file_format import HiveFormat from indexer.indexers.base import get_api +from indexer.indexers.envelope import _envelope_job from indexer.indexers.flink_sql import FlinkIndexer +from indexer.indexers.flume import FlumeIndexer from indexer.indexers.morphline import MorphlineIndexer, _create_solr_collection from indexer.indexers.phoenix_sql import PhoenixIndexer -from indexer.indexers.rdbms import run_sqoop, _get_api +from indexer.indexers.rdbms import _get_api, run_sqoop from indexer.indexers.sql import _create_database, _create_table, _create_table_from_local from indexer.models import _save_pipeline -from indexer.solr_client import SolrClient, MAX_UPLOAD_SIZE -from indexer.indexers.flume import FlumeIndexer +from indexer.solr_client import MAX_UPLOAD_SIZE, SolrClient +from kafka.kafka_api import get_topic_data, get_topics +from notebook.connectors.base import Notebook, get_api +from notebook.decorators import api_error_handler +from notebook.models import MockedDjangoRequest, escape_rows +LOG = logging.getLogger() -if sys.version_info[0] > 2: - from io import StringIO as string_io - from urllib.parse import urlparse, unquote as urllib_unquote - from django.utils.translation import gettext as _ - import pandas as pd -else: - from StringIO import StringIO as string_io - from urllib import unquote as urllib_unquote - from urlparse import urlparse - from django.utils.translation import ugettext as _ +try: + from simple_salesforce.api import Salesforce + from simple_salesforce.exceptions import SalesforceRefusedRequest +except ImportError: + LOG.warning('simple_salesforce module not found') try: from beeswax.server import dbms @@ -106,10 +93,7 @@ def _escape_white_space_characters(s, inverse=False): from_ = 0 if inverse else 1 for pair in MAPPINGS.items(): - if sys.version_info[0] > 2: - s = s.replace(pair[to], pair[from_]) - else: - s = s.replace(pair[to], pair[from_]).encode('utf-8') + s = s.replace(pair[to], pair[from_]) return s @@ -125,9 +109,6 @@ def guess_format(request): file_format = json.loads(request.POST.get('fileFormat', '{}')) file_type = file_format['file_type'] path = file_format["path"] - - if sys.version_info[0] < 3 and (file_type == 'excel' or path[-3:] == 'xls' or path[-4:] == 'xlsx'): - return JsonResponse({'status': -1, 'message': 'Python2 based Hue does not support Excel file importer'}) if file_format['inputFormat'] == 'localfile': if file_type == 'excel': @@ -169,7 +150,7 @@ def guess_format(request): }) _convert_format(format_) - if file_format["path"][-3:] == 'xls' or file_format["path"][-4:] == 'xlsx': + if file_format["path"][-3:] == 'xls' or file_format["path"][-4:] == 'xlsx': format_ = { "quoteChar": "\"", "recordSeparator": '\\n', @@ -201,7 +182,7 @@ def guess_format(request): "fieldSeparator": storage.get('field.delim', ',') } elif table_metadata.details['properties']['format'] == 'parquet': - format_ = {"type": "parquet", "hasHeader": False,} + format_ = {"type": "parquet", "hasHeader": False, } else: raise PopupException('Hive table format %s is not supported.' % table_metadata.details['properties']['format']) elif file_format['inputFormat'] == 'query': @@ -255,9 +236,11 @@ def guess_format(request): format_['status'] = 0 return JsonResponse(format_) + def decode_utf8(input_iterator): - for l in input_iterator: - yield l.decode('utf-8') + for line in input_iterator: + yield line.decode('utf-8') + def guess_field_types(request): file_format = json.loads(request.POST.get('fileFormat', '{}')) @@ -275,7 +258,7 @@ def guess_field_types(request): column_row = [re.sub('[^0-9a-zA-Z]+', '_', col) for col in csv_data[0]] else: sample = csv_data[:4] - column_row = ['field_' + str(count+1) for count, col in enumerate(sample[0])] + column_row = ['field_' + str(count + 1) for count, col in enumerate(sample[0])] field_type_guesses = [] for count, col in enumerate(column_row): @@ -317,7 +300,7 @@ def guess_field_types(request): if 'sample' in format_ and format_['sample']: format_['sample'] = escape_rows(format_['sample'], nulls_only=True, encoding=encoding) for col in format_['columns']: - col['name'] = smart_unicode(col['name'], errors='replace', encoding=encoding) + col['name'] = smart_str(col['name'], errors='replace', encoding=encoding) elif file_format['inputFormat'] == 'table': sample = get_api( @@ -659,7 +642,7 @@ def _small_indexing(user, fs, client, source, destination, index_name): ) # TODO if rows == MAX_ROWS truncation warning elif source['inputFormat'] == 'manual': - pass # No need to do anything + pass # No need to do anything else: response = client.index(name=index_name, data=data, **kwargs) errors = [error.get('message', '') for error in response['responseHeader'].get('errors', [])] @@ -691,7 +674,7 @@ def _large_indexing(request, file_format, collection_name, query=None, start_tim client = SolrClient(user=request.user) - if not client.exists(collection_name) and not request.POST.get('show_command'): # if destination['isTargetExisting']: + if not client.exists(collection_name) and not request.POST.get('show_command'): # if destination['isTargetExisting']: client.create_index( name=collection_name, fields=request.POST.get('fields', schema_fields), @@ -786,12 +769,12 @@ def upload_local_file(request): read_file = pd.read_excel(upload_file) else: read_file = pd.read_excel(upload_file, engine='xlrd') - + temp_file = tempfile.NamedTemporaryFile(mode='w', prefix=filename, suffix='.csv', delete=False) read_file.to_csv(temp_file, index=False) file_type = 'excel' - else: + else: temp_file = tempfile.NamedTemporaryFile(prefix=filename, suffix='.csv', delete=False) temp_file.write(upload_file.read()) diff --git a/desktop/libs/indexer/src/indexer/api3_tests.py b/desktop/libs/indexer/src/indexer/api3_tests.py index 541dd4e2d76..c4978338fe8 100644 --- a/desktop/libs/indexer/src/indexer/api3_tests.py +++ b/desktop/libs/indexer/src/indexer/api3_tests.py @@ -15,20 +15,16 @@ # See the License for the specific language governing permissions and # limitations under the License. -import json import sys -from django.utils.datastructures import MultiValueDict +import json +from unittest.mock import Mock, patch +from urllib.parse import unquote as urllib_unquote + from django.core.files.uploadhandler import InMemoryUploadedFile +from django.utils.datastructures import MultiValueDict from desktop.settings import BASE_DIR -from indexer.api3 import upload_local_file, guess_field_types, guess_format - -if sys.version_info[0] > 2: - from urllib.parse import unquote as urllib_unquote - from unittest.mock import patch, Mock, MagicMock -else: - from urllib import unquote as urllib_unquote - from mock import patch, Mock, MagicMock +from indexer.api3 import guess_field_types, guess_format, upload_local_file def test_xlsx_local_file_upload(): diff --git a/desktop/libs/indexer/src/indexer/argument.py b/desktop/libs/indexer/src/indexer/argument.py index 62a450e4559..3b563e5ddd1 100644 --- a/desktop/libs/indexer/src/indexer/argument.py +++ b/desktop/libs/indexer/src/indexer/argument.py @@ -15,13 +15,9 @@ # limitations under the License.import logging import sys - from builtins import object -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ +from django.utils.translation import gettext as _ class Argument(object): diff --git a/desktop/libs/indexer/src/indexer/conf.py b/desktop/libs/indexer/src/indexer/conf.py index 8803343070c..1215e33ce31 100644 --- a/desktop/libs/indexer/src/indexer/conf.py +++ b/desktop/libs/indexer/src/indexer/conf.py @@ -15,23 +15,16 @@ # See the License for the specific language governing permissions and # limitations under the License. -from future import standard_library -standard_library.install_aliases() -import logging import os -import sys +import logging +from urllib.parse import urlparse + +from django.utils.translation import gettext_lazy as _t from desktop.lib.conf import Config, coerce_bool from libsolr import conf as libsolr_conf from libzookeeper import conf as libzookeeper_conf -if sys.version_info[0] > 2: - from urllib.parse import urlparse - from django.utils.translation import gettext_lazy as _t -else: - from urlparse import urlparse - from django.utils.translation import ugettext_lazy as _t - LOG = logging.getLogger() @@ -62,14 +55,14 @@ def zkensemble(): clusters = CLUSTERS.get() if clusters['default'].HOST_PORTS.get() != 'localhost:2181': return '%s/solr' % clusters['default'].HOST_PORTS.get() - except: + except Exception: LOG.warning('Failed to get Zookeeper ensemble') try: from search.conf import SOLR_URL parsed = urlparse(SOLR_URL.get()) return "%s:2181/solr" % (parsed.hostname or 'localhost') - except: + except Exception: LOG.warning('Failed to get Solr url') diff --git a/desktop/libs/indexer/src/indexer/controller.py b/desktop/libs/indexer/src/indexer/controller.py index cb8ef170a6d..5e55523c163 100644 --- a/desktop/libs/indexer/src/indexer/controller.py +++ b/desktop/libs/indexer/src/indexer/controller.py @@ -16,34 +16,28 @@ # See the License for the specific language governing permissions and # limitations under the License. -from builtins import object +import os +import sys import json +import shutil import logging import numbers -import os -import shutil -import sys +from builtins import object import tablib +from django.utils.translation import gettext as _ -from desktop.lib.exceptions_renderable import PopupException from dashboard.models import Collection2 -from libsolr.api import SolrApi -from libzookeeper.models import ZookeeperClient -from search.conf import SOLR_URL, SECURITY_ENABLED - +from desktop.lib.exceptions_renderable import PopupException from indexer.conf import CORE_INSTANCE_DIR -from indexer.utils import copy_configs, field_values_from_log, field_values_from_separated_file from indexer.solr_client import SolrClient - -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - +from indexer.utils import copy_configs, field_values_from_log, field_values_from_separated_file +from libsolr.api import SolrApi +from libzookeeper.models import ZookeeperClient +from search.conf import SECURITY_ENABLED, SOLR_URL LOG = logging.getLogger() -MAX_UPLOAD_SIZE = 100 * 1024 * 1024 # 100 MB +MAX_UPLOAD_SIZE = 100 * 1024 * 1024 # 100 MB ALLOWED_FIELD_ATTRIBUTES = set(['name', 'type', 'indexed', 'stored']) FLAGS = [('I', 'indexed'), ('T', 'tokenized'), ('S', 'stored')] ZK_SOLR_CONFIG_NAMESPACE = 'configs' @@ -124,13 +118,13 @@ def get_fields(self, collection_or_core_name): try: fields = api.schema_fields(collection_or_core_name) fields = Collection2._make_luke_from_schema_fields(fields) - except: + except Exception: LOG.exception(_('Could not fetch fields for collection %s.') % collection_or_core_name) raise PopupException(_('Could not fetch fields for collection %s. See logs for more info.') % collection_or_core_name) try: uniquekey = api.uniquekey(collection_or_core_name) - except: + except Exception: LOG.exception(_('Could not fetch unique key for collection %s.') % collection_or_core_name) raise PopupException(_('Could not fetch unique key for collection %s. See logs for more info.') % collection_or_core_name) @@ -200,7 +194,6 @@ def _create_non_solr_cloud_collection(self, name, fields, unique_key_field, df): shutil.rmtree(instancedir) raise PopupException(_('Could not create collection. Check error logs for more info.')) - def delete_collection(self, name, core): """ Delete solr collection/core and instance dir @@ -263,7 +256,13 @@ def update_data_from_hdfs(self, fs, collection_or_core_name, fields, path, data_ data = json.dumps([value for value in field_values_from_log(fh, fields)]) content_type = 'json' elif data_type == 'separated': - data = json.dumps([value for value in field_values_from_separated_file(fh, kwargs.get('separator', ','), kwargs.get('quote_character', '"'), fields)], indent=2) + data = json.dumps( + [ + value + for value in field_values_from_separated_file(fh, kwargs.get('separator', ','), kwargs.get('quote_character', '"'), fields) + ], + indent=2, + ) content_type = 'json' else: raise PopupException(_('Could not update index. Unknown type %s') % data_type) diff --git a/desktop/libs/indexer/src/indexer/file_format.py b/desktop/libs/indexer/src/indexer/file_format.py index f985eb9ffd8..16ba26fa92b 100644 --- a/desktop/libs/indexer/src/indexer/file_format.py +++ b/desktop/libs/indexer/src/indexer/file_format.py @@ -13,33 +13,24 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License.import logging -from future import standard_library -standard_library.install_aliases() -from builtins import range -from past.builtins import basestring -from builtins import object + import csv import gzip +import logging import operator import itertools -import logging -import sys +from builtins import object, range +from io import StringIO as string_io -from desktop.lib import i18n +from django.utils.translation import gettext as _ +from past.builtins import basestring, long +from desktop.lib import i18n from indexer.argument import CheckboxArgument, TextDelimiterArgument from indexer.conf import ENABLE_SCALABLE_INDEXER from indexer.fields import Field, guess_field_type_from_samples from indexer.indexers.morphline_operations import get_operator -if sys.version_info[0] > 2: - from io import StringIO as string_io - from past.builtins import long - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - from StringIO import StringIO as string_io - LOG = logging.getLogger() @@ -59,8 +50,8 @@ def get_format_types(): ApacheCombinedFormat, SyslogFormat, HueLogFormat, - #RubyLogFormat, - #ParquetFormat + # RubyLogFormat, + # ParquetFormat ]) return formats @@ -69,13 +60,16 @@ def get_format_types(): def get_file_indexable_format_types(): return [format_ for format_ in get_format_types() if format_.is_file_indexable] + def _get_format_mapping(): return dict([(format_.get_name(), format_) for format_ in get_format_types()]) + def get_file_format_class(type_): mapping = _get_format_mapping() return mapping[type_] if type_ in mapping else None + def get_file_format_instance(file, format_=None): file_stream = file['stream'] file_extension = file['name'].split('.')[-1] if '.' in file['name'] else '' @@ -368,7 +362,8 @@ def _hasHeader(self, sniffer, sample, dialect): columns = len(header) columnTypes = {} - for i in range(columns): columnTypes[i] = None + for i in range(columns): + columnTypes[i] = None checked = 0 for row in rdr: @@ -408,7 +403,7 @@ def _hasHeader(self, sniffer, sample, dialect): # on whether it's a header hasHeader = 0 for col, colType in list(columnTypes.items()): - if type(colType) == type(0): # it's a length + if type(colType) is type(0): # it's a length if len(header[col]) != colType: hasHeader += 1 else: @@ -678,7 +673,7 @@ class HiveFormat(CSVFormat): "string": "string", "timestamp": "date", "binary": "string", - "decimal": "double", # Won't match decimal(16,6) + "decimal": "double", # Won't match decimal(16,6) "date": "date", } diff --git a/desktop/libs/indexer/src/indexer/indexers/base.py b/desktop/libs/indexer/src/indexer/indexers/base.py index 0e7288648a6..c80223e98b2 100644 --- a/desktop/libs/indexer/src/indexer/indexers/base.py +++ b/desktop/libs/indexer/src/indexer/indexers/base.py @@ -17,15 +17,12 @@ import sys +from django.utils.translation import gettext as _ + from desktop.conf import has_connectors from desktop.lib.connectors.models import _get_installed_connectors from desktop.lib.exceptions_renderable import PopupException -from desktop.lib.i18n import smart_unicode - -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ +from desktop.lib.i18n import smart_str def get_api(user, connector_id): @@ -52,7 +49,6 @@ def __init__(self, user, connector_id): def index(self, source, destination, options=None): pass - class IndexerApiException(Exception): def __init__(self, message=None): self.message = message or _('No error message, please check the logs.') @@ -61,4 +57,4 @@ def __str__(self): return str(self.message) def __unicode__(self): - return smart_unicode(self.message) + return smart_str(self.message) diff --git a/desktop/libs/indexer/src/indexer/indexers/envelope.py b/desktop/libs/indexer/src/indexer/indexers/envelope.py index 485407c2029..240141b48f6 100644 --- a/desktop/libs/indexer/src/indexer/indexers/envelope.py +++ b/desktop/libs/indexer/src/indexer/indexers/envelope.py @@ -14,27 +14,21 @@ # See the License for the specific language governing permissions and # limitations under the License.import logging -from builtins import object -import logging import os import sys +import logging +from builtins import object from django.urls import reverse +from django.utils.translation import gettext as _ +from desktop.lib.exceptions_renderable import PopupException from hadoop.fs.hadoopfs import Hdfs from indexer.conf import CONFIG_JARS_LIBS_PATH, config_morphline_path from libzookeeper.conf import zkensemble from notebook.models import make_notebook from useradmin.models import User -from desktop.lib.exceptions_renderable import PopupException - -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - - LOG = logging.getLogger() @@ -45,7 +39,6 @@ def __init__(self, username, fs=None, jt=None, solr_client=None): self.jt = jt self.username = username - def _upload_workspace(self, configs): from oozie.models2 import Job @@ -60,7 +53,6 @@ def _upload_workspace(self, configs): return hdfs_workspace_path - def run(self, request, collection_name, configs, input_path, start_time=None, lib_path=None): workspace_path = self._upload_workspace(configs) @@ -70,8 +62,8 @@ def run(self, request, collection_name, configs, input_path, start_time=None, li task = make_notebook( name=_('Indexing into %s') % collection_name, editor_type='notebook', - #on_success_url=reverse('search:browse', kwargs={'name': collection_name}), - #pub_sub_url='assist.collections.refresh', + # on_success_url=reverse('search:browse', kwargs={'name': collection_name}), + # pub_sub_url='assist.collections.refresh', is_task=True, is_notebook=True, last_executed=start_time @@ -98,7 +90,6 @@ def run(self, request, collection_name, configs, input_path, start_time=None, li return task.execute(request, batch=True) - def generate_config(self, properties): configs = { } @@ -168,7 +159,6 @@ def generate_config(self, properties): else: raise PopupException(_('Input format not recognized: %(inputFormat)s') % properties) - extra_step = '' properties['output_deriver'] = """ deriver { @@ -176,7 +166,7 @@ def generate_config(self, properties): query.literal = \"\"\"SELECT * from inputdata\"\"\" }""" - if properties['inputFormat'] == 'stream' and properties['topics'] == 'NavigatorAuditEvents': # Kudu does not support upper case names + if properties['inputFormat'] == 'stream' and properties['topics'] == 'NavigatorAuditEvents': # Kudu does not support upper case names properties['output_deriver'] = """ deriver { type = sql @@ -205,7 +195,6 @@ def generate_config(self, properties): \"\"\" }""" - if properties['ouputFormat'] == 'file': output = """ %(output_deriver)s @@ -245,7 +234,7 @@ def generate_config(self, properties): table.name = "%(output_table)s" }""" % properties elif properties['ouputFormat'] == 'index': - if True: # Workaround until envelope Solr output is official + if True: # Workaround until envelope Solr output is official morphline_config = open(os.path.join(config_morphline_path(), 'navigator_topic.morphline.conf')).read() configs['navigator_topic.morphline.conf'] = morphline_config.replace( '${SOLR_COLLECTION}', properties['collectionName'] @@ -355,7 +344,7 @@ def _envelope_job(request, file_format, destination, start_time=None, lib_path=N collection_name = destination['name'] indexer = EnvelopeIndexer(request.user, request.fs) - lib_path = None # Todo optional input field + lib_path = None # Todo optional input field input_path = None if file_format['inputFormat'] == 'table': @@ -394,7 +383,7 @@ def _envelope_job(request, file_format, destination, start_time=None, lib_path=N if True: properties['window'] = '' - else: # For "KafkaSQL" + else: # For "KafkaSQL" properties['window'] = ''' window { enabled = true @@ -420,12 +409,12 @@ def _envelope_job(request, file_format, destination, start_time=None, lib_path=N } if destination['outputFormat'] == 'table': - if destination['isTargetExisting']: # Todo: check if format matches + if destination['isTargetExisting']: # Todo: check if format matches pass else: - destination['importData'] = False # Avoid LOAD DATA + destination['importData'] = False # Avoid LOAD DATA if destination['tableFormat'] == 'kudu': - properties['kafkaFieldNames'] = properties['kafkaFieldNames'].lower() # Kudu names should be all lowercase + properties['kafkaFieldNames'] = properties['kafkaFieldNames'].lower() # Kudu names should be all lowercase # Create table if not request.POST.get('show_command'): SQLIndexer( @@ -452,12 +441,11 @@ def _envelope_job(request, file_format, destination, start_time=None, lib_path=N if file_format['inputFormat'] == 'stream': properties['format'] = 'csv' else: - properties['format'] = file_format['tableFormat'] # or csv + properties['format'] = file_format['tableFormat'] # or csv elif destination['outputFormat'] == 'index': properties['collectionName'] = collection_name properties['connection'] = SOLR_URL.get() - properties["app_name"] = 'Data Ingest' properties["inputFormat"] = file_format['inputFormat'] properties["ouputFormat"] = destination['ouputFormat'] diff --git a/desktop/libs/indexer/src/indexer/indexers/flink_sql.py b/desktop/libs/indexer/src/indexer/indexers/flink_sql.py index 1d40c932651..faf14ca5b74 100644 --- a/desktop/libs/indexer/src/indexer/indexers/flink_sql.py +++ b/desktop/libs/indexer/src/indexer/indexers/flink_sql.py @@ -14,19 +14,14 @@ # See the License for the specific language governing permissions and # limitations under the License.import logging -import logging import sys +import logging from django.urls import reverse +from django.utils.translation import gettext as _ from notebook.models import make_notebook -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - - LOG = logging.getLogger() @@ -46,7 +41,7 @@ def create_table_from_kafka(self, source, destination, start_time=-1, dry_run=Fa source_type = source['sourceType'] - from desktop.api_public import _get_interpreter_from_dialect # due to a circular import + from desktop.api_public import _get_interpreter_from_dialect # due to a circular import interpreter = _get_interpreter_from_dialect('flink', self.user) editor_type = interpreter['type'] # destination['sourceType'] diff --git a/desktop/libs/indexer/src/indexer/indexers/flume.py b/desktop/libs/indexer/src/indexer/indexers/flume.py index c1f40125761..d33c515d4ab 100644 --- a/desktop/libs/indexer/src/indexer/indexers/flume.py +++ b/desktop/libs/indexer/src/indexer/indexers/flume.py @@ -14,26 +14,20 @@ # See the License for the specific language governing permissions and # limitations under the License.import logging -from builtins import object -import logging import os import sys +import logging +from builtins import object from django.urls import reverse +from django.utils.translation import gettext as _ -from libzookeeper.conf import zkensemble +from desktop.lib.exceptions_renderable import PopupException from indexer.conf import config_morphline_path +from libzookeeper.conf import zkensemble from metadata.manager_client import ManagerApi from useradmin.models import User -from desktop.lib.exceptions_renderable import PopupException - -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - - LOG = logging.getLogger() @@ -42,7 +36,6 @@ class FlumeIndexer(object): def __init__(self, user): self.user = user - def start(self, destination_name, file_format, destination): responses = {'status': 0} @@ -59,7 +52,6 @@ def start(self, destination_name, file_format, destination): return responses - def generate_config(self, source, destination): configs = [] @@ -160,7 +152,6 @@ def generate_config(self, source, destination): return configs - def generate_morphline_config(self, destination): # TODO manage generic config, cf. MorphlineIndexer morphline_config = open(os.path.join(config_morphline_path(), 'hue_accesslogs_no_geo.morphline.conf')).read() diff --git a/desktop/libs/indexer/src/indexer/indexers/morphline.py b/desktop/libs/indexer/src/indexer/indexers/morphline.py index ec1c5b7551f..4e67aff1b5f 100644 --- a/desktop/libs/indexer/src/indexer/indexers/morphline.py +++ b/desktop/libs/indexer/src/indexer/indexers/morphline.py @@ -14,32 +14,25 @@ # See the License for the specific language governing permissions and # limitations under the License.import logging -from builtins import object -import logging import os import sys - +import logging +from builtins import object from collections import deque from django.urls import reverse +from django.utils.translation import gettext as _ from mako.lookup import TemplateLookup from desktop.models import Document2 -from notebook.connectors.base import get_api -from notebook.models import Notebook, make_notebook -from useradmin.models import User - -from indexer.conf import CONFIG_INDEXING_TEMPLATES_PATH, CONFIG_INDEXER_LIBS_PATH +from indexer.conf import CONFIG_INDEXER_LIBS_PATH, CONFIG_INDEXING_TEMPLATES_PATH from indexer.fields import get_field_type -from indexer.file_format import get_file_format_instance, get_file_format_class +from indexer.file_format import get_file_format_class, get_file_format_instance from indexer.indexers.morphline_operations import get_checked_args from indexer.solr_client import SolrClient - -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - +from notebook.connectors.base import get_api +from notebook.models import Notebook, make_notebook +from useradmin.models import User LOG = logging.getLogger() @@ -201,7 +194,7 @@ def generate_morphline_config(self, collection_name, data, uuid_name=None, lib_p "get_kept_args": get_checked_args, "grok_dictionaries_location": grok_dicts_loc if self.fs and self.fs.exists(grok_dicts_loc) else None, "geolite_db_location": geolite_loc if self.fs and self.fs.exists(geolite_loc) else None, - "zk_host": self.solr_client.get_zookeeper_host() ## offline test? + "zk_host": self.solr_client.get_zookeeper_host() # offline test? } oozie_workspace = CONFIG_INDEXING_TEMPLATES_PATH.get() @@ -224,7 +217,7 @@ def _create_solr_collection(user, fs, client, destination, index_name, kwargs): for field in fields: for operation in field['operations']: if operation['type'] == 'split': - field['multiValued'] = True # Solr requires multiValued to be set when splitting + field['multiValued'] = True # Solr requires multiValued to be set when splitting kwargs['f.%(name)s.split' % field] = 'true' kwargs['f.%(name)s.separator' % field] = operation['settings']['splitChar'] or ',' diff --git a/desktop/libs/indexer/src/indexer/indexers/morphline_operations.py b/desktop/libs/indexer/src/indexer/indexers/morphline_operations.py index 6b238582942..9d4b4492449 100644 --- a/desktop/libs/indexer/src/indexer/indexers/morphline_operations.py +++ b/desktop/libs/indexer/src/indexer/indexers/morphline_operations.py @@ -15,15 +15,11 @@ # limitations under the License.import logging import sys - from builtins import object -from indexer.argument import TextArgument, CheckboxArgument, MappingArgument +from django.utils.translation import gettext as _ -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ +from indexer.argument import CheckboxArgument, MappingArgument, TextArgument class Operator(object): @@ -58,6 +54,7 @@ def get_default_operation(self): "fields": self._get_default_output_fields() } + OPERATORS = [ Operator( name="split", @@ -127,9 +124,11 @@ def get_default_operation(self): ), ] + def get_operator(operation_name): return [operation for operation in OPERATORS if operation.name == operation_name][0] + def get_checked_args(operation): operation_args = get_operator(operation["type"]).args diff --git a/desktop/libs/indexer/src/indexer/indexers/morphline_tests.py b/desktop/libs/indexer/src/indexer/indexers/morphline_tests.py index 8680e52fdd7..7be69435737 100644 --- a/desktop/libs/indexer/src/indexer/indexers/morphline_tests.py +++ b/desktop/libs/indexer/src/indexer/indexers/morphline_tests.py @@ -14,34 +14,28 @@ # See the License for the specific language governing permissions and # limitations under the License. -from future import standard_library -from builtins import zip -from past.builtins import basestring -from builtins import object +import sys +import logging +from builtins import object, zip from copy import deepcopy +from io import StringIO as string_io -import logging import pytest -import sys +from future import standard_library +from past.builtins import basestring from desktop.lib.django_test_util import make_logged_in_client -from desktop.lib.test_utils import grant_access, add_to_group +from desktop.lib.test_utils import add_to_group, grant_access from hadoop.pseudo_hdfs4 import is_live_cluster, shared_cluster -from useradmin.models import User - from indexer.conf import ENABLE_SCALABLE_INDEXER from indexer.controller import CollectionManagerController -from indexer.file_format import ApacheCombinedFormat, RubyLogFormat, HueLogFormat from indexer.fields import Field -from indexer.indexers.morphline_operations import get_operator +from indexer.file_format import ApacheCombinedFormat, HueLogFormat, RubyLogFormat from indexer.indexers.morphline import MorphlineIndexer +from indexer.indexers.morphline_operations import get_operator from indexer.solr_client import SolrClient from indexer.solr_client_tests import MockSolrCdhCloudHdfsApi - -if sys.version_info[0] > 2: - from io import StringIO as string_io -else: - from StringIO import StringIO as string_io +from useradmin.models import User standard_library.install_aliases() diff --git a/desktop/libs/indexer/src/indexer/indexers/phoenix_sql.py b/desktop/libs/indexer/src/indexer/indexers/phoenix_sql.py index eef290a91ee..cb2c31a1865 100644 --- a/desktop/libs/indexer/src/indexer/indexers/phoenix_sql.py +++ b/desktop/libs/indexer/src/indexer/indexers/phoenix_sql.py @@ -16,24 +16,15 @@ import csv import logging -import sys -import uuid +from io import StringIO as string_io +from urllib.parse import unquote as urllib_unquote, urlparse + from django.urls import reverse +from django.utils.translation import gettext as _ from notebook.conf import get_ordered_interpreters from notebook.models import make_notebook -if sys.version_info[0] > 2: - from io import StringIO as string_io - from urllib.parse import urlparse, unquote as urllib_unquote - from django.utils.translation import gettext as _ -else: - from cStringIO import StringIO as string_io - from django.utils.translation import ugettext as _ - from urllib import unquote as urllib_unquote - from urlparse import urlparse - - LOG = logging.getLogger() @@ -87,7 +78,7 @@ def create_table_from_file(self, request, source, destination, start_time=-1, dr if (source['format']['hasHeader'] and count == 0) or not csv_row: continue else: - _sql = ', '.join([ "'{0}'".format(col_val) if columns[count]['type'] in ('varchar', 'timestamp') \ + _sql = ', '.join(["'{0}'".format(col_val) if columns[count]['type'] in ('varchar', 'timestamp') else '{0}'.format(col_val) for count, col_val in enumerate(csv_row)]) sql += '''\nUPSERT INTO %(table_name)s VALUES (%(csv_row)s);\n''' % { @@ -95,7 +86,7 @@ def create_table_from_file(self, request, source, destination, start_time=-1, dr 'table_name': table_name, 'csv_row': _sql } - + if dry_run: return sql else: diff --git a/desktop/libs/indexer/src/indexer/indexers/phoenix_sql_tests.py b/desktop/libs/indexer/src/indexer/indexers/phoenix_sql_tests.py index 820ffaa08e2..94bcf0dbf81 100644 --- a/desktop/libs/indexer/src/indexer/indexers/phoenix_sql_tests.py +++ b/desktop/libs/indexer/src/indexer/indexers/phoenix_sql_tests.py @@ -17,15 +17,11 @@ # limitations under the License.from indexer.indexers.phoenix_sql import PhoenixIndexer import sys +from unittest.mock import MagicMock, Mock, patch from desktop.settings import BASE_DIR from indexer.indexers.phoenix_sql import PhoenixIndexer -if sys.version_info[0] > 2: - from unittest.mock import patch, Mock, MagicMock -else: - from mock import patch, Mock, MagicMock - def test_create_table_phoenix(): with patch('indexer.indexers.phoenix_sql.get_ordered_interpreters') as get_ordered_interpreters: diff --git a/desktop/libs/indexer/src/indexer/indexers/rdbms.py b/desktop/libs/indexer/src/indexer/indexers/rdbms.py index 8033ada2b5b..07ca029992e 100644 --- a/desktop/libs/indexer/src/indexer/indexers/rdbms.py +++ b/desktop/libs/indexer/src/indexer/indexers/rdbms.py @@ -15,14 +15,17 @@ # See the License for the specific language governing permissions and # limitations under the License. -import json -import logging import sys +import json import uuid +import logging from django.urls import reverse +from django.utils.translation import gettext as _ -from librdbms.conf import DATABASES, get_database_password, get_server_choices, get_connector_name +from desktop.lib.django_util import JsonResponse +from desktop.lib.i18n import smart_str +from librdbms.conf import DATABASES, get_connector_name, get_database_password, get_server_choices from librdbms.jdbc import Jdbc from librdbms.server import dbms as rdbms from notebook.conf import get_ordered_interpreters @@ -30,15 +33,6 @@ from notebook.models import make_notebook from useradmin.models import User -from desktop.lib.django_util import JsonResponse -from desktop.lib.i18n import smart_str - -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - - LOG = logging.getLogger() @@ -65,6 +59,7 @@ def get_db_component(request): return JsonResponse(format_) + def _get_api(request): file_format = json.loads(request.POST.get('source', request.POST.get('fileFormat', '{}'))) options = None @@ -72,7 +67,7 @@ def _get_api(request): if file_format['rdbmsMode'] == 'customRdbms': type = 'custom' if file_format['rdbmsType'] == 'jdbc': - name = file_format['rdbmsHostname'] # We make sure it's unique as name is the cache key + name = file_format['rdbmsHostname'] # We make sure it's unique as name is the cache key interface = file_format['rdbmsType'] options = {'driver': file_format['rdbmsJdbcDriver'], 'url': file_format['rdbmsHostname'], @@ -90,7 +85,7 @@ def _get_api(request): 'options': {}, 'alias': file_format['rdbmsType'] } - name = 'rdbms:%(server_name)s://%(server_host)s:%(server_port)s' % query_server # We make sure it's unique as name is the cache key + name = 'rdbms:%(server_name)s://%(server_host)s:%(server_port)s' % query_server # We make sure it's unique as name is the cache key else: if file_format['rdbmsType'] == 'jdbc': type = file_format['rdbmsJdbcDriverName'] and file_format['rdbmsJdbcDriverName'].lower() @@ -102,6 +97,7 @@ def _get_api(request): return get_api(request, {'type': type, 'interface': interface, 'options': options, 'query_server': query_server, 'name': name}) + def jdbc_db_list(request): format_ = {'data': [], 'status': 1} interpreters = get_ordered_interpreters(request.user) @@ -110,6 +106,7 @@ def jdbc_db_list(request): return JsonResponse(format_) + def get_drivers(request): format_ = {'data': [], 'status': 1} servers_dict = dict(get_server_choices()) @@ -120,6 +117,7 @@ def get_drivers(request): return JsonResponse(format_) + def run_sqoop(request, source, destination, start_time): rdbms_mode = source['rdbmsMode'] rdbms_name = source['rdbmsJdbcDriverName'] if source['rdbmsType'] == 'jdbc' else source['rdbmsType'] @@ -184,7 +182,7 @@ def run_sqoop(request, source, destination, start_time): 'url': url, 'rdbmsPort': rdbms_port } - + password_file_path = request.fs.join(request.fs.get_home_dir() + '/sqoop/', uuid.uuid4().hex + '.password') request.fs.do_as_user( request.user, diff --git a/desktop/libs/indexer/src/indexer/indexers/sql_tests.py b/desktop/libs/indexer/src/indexer/indexers/sql_tests.py index 9776fddebe1..bfa1b684bf9 100644 --- a/desktop/libs/indexer/src/indexer/indexers/sql_tests.py +++ b/desktop/libs/indexer/src/indexer/indexers/sql_tests.py @@ -36,23 +36,21 @@ def mock_uuid(): @pytest.mark.django_db class TestSQLIndexer(object): - def setup_method(self): self.client = make_logged_in_client(username="test", groupname="empty", recreate=True, is_superuser=False) self.user = User.objects.get(username="test") def test_create_table_from_a_file_to_csv(self): - fs = Mock( - stats=Mock(return_value={'mode': 0o0777}) - ) + fs = Mock(stats=Mock(return_value={'mode': 0o0777})) def source_dict(key): return { 'path': 'hdfs:///path/data.csv', 'format': {'quoteChar': '"', 'fieldSeparator': ','}, - 'sampleCols': [{u'operations': [], u'comment': u'', u'name': u'customers.id'}], - 'sourceType': 'hive' + 'sampleCols': [{'operations': [], 'comment': '', 'name': 'customers.id'}], + 'sourceType': 'hive', }.get(key, Mock()) + source = MagicMock() source.__getitem__.side_effect = source_dict @@ -66,16 +64,18 @@ def destination_dict(key): 'columns': [{'name': 'id', 'type': 'int'}], 'partitionColumns': [{'name': 'day', 'type': 'date', 'partitionValue': '20200101'}], 'description': 'No comment!', - 'sourceType': 'hive-1' + 'sourceType': 'hive-1', }.get(key, Mock()) + destination = MagicMock() destination.__getitem__.side_effect = destination_dict with patch('notebook.models.get_interpreter') as get_interpreter: notebook = SQLIndexer(user=self.user, fs=fs).create_table_from_a_file(source, destination) - assert ( - [statement.strip() for statement in u'''DROP TABLE IF EXISTS `default`.`hue__tmp_export_table`; + assert [ + statement.strip() + for statement in '''DROP TABLE IF EXISTS `default`.`hue__tmp_export_table`; CREATE TABLE IF NOT EXISTS `default`.`hue__tmp_export_table` ( @@ -98,8 +98,8 @@ def destination_dict(key): AS SELECT * FROM `default`.`hue__tmp_export_table`; -DROP TABLE IF EXISTS `default`.`hue__tmp_export_table`;'''.split(';')] == - [statement.strip() for statement in notebook.get_data()['snippets'][0]['statement_raw'].split(';')]) +DROP TABLE IF EXISTS `default`.`hue__tmp_export_table`;'''.split(';') + ] == [statement.strip() for statement in notebook.get_data()['snippets'][0]['statement_raw'].split(';')] @patch('uuid.uuid4', mock_uuid) def test_create_table_from_a_file_to_csv_for_kms_encryption(self): @@ -119,9 +119,10 @@ def enc_source_dict(key): return { 'path': '/enc_zn/upload_dir/data.csv', 'format': {'quoteChar': '"', 'fieldSeparator': ','}, - 'sampleCols': [{u'operations': [], u'comment': u'', u'name': u'customers.id'}], - 'sourceType': 'hive' + 'sampleCols': [{'operations': [], 'comment': '', 'name': 'customers.id'}], + 'sourceType': 'hive', }.get(key, Mock()) + source = MagicMock() source.__getitem__.side_effect = enc_source_dict @@ -135,24 +136,24 @@ def destination_dict(key): 'columns': [{'name': 'id', 'type': 'int'}], 'partitionColumns': [{'name': 'day', 'type': 'date', 'partitionValue': '20200101'}], 'description': 'No comment!', - 'sourceType': 'hive-1' + 'sourceType': 'hive-1', }.get(key, Mock()) + destination = MagicMock() destination.__getitem__.side_effect = destination_dict fs = Mock( - stats=Mock( - return_value=MockStat() - ), - parent_path=mock_parent_path, - get_home_dir=Mock(return_value='/user/test'), + stats=Mock(return_value=MockStat()), + parent_path=mock_parent_path, + get_home_dir=Mock(return_value='/user/test'), ) notebook = SQLIndexer(user=self.user, fs=fs).create_table_from_a_file(source, destination) # source dir is in encryption zone, so the scratch dir is in the same dir - assert ( - [statement.strip() for statement in u'''DROP TABLE IF EXISTS `default`.`hue__tmp_export_table`; + assert [ + statement.strip() + for statement in '''DROP TABLE IF EXISTS `default`.`hue__tmp_export_table`; CREATE TABLE IF NOT EXISTS `default`.`hue__tmp_export_table` ( `id` int ) COMMENT "No comment!" @@ -172,32 +173,32 @@ def destination_dict(key): TBLPROPERTIES('transactional'='true', 'transactional_properties'='insert_only') AS SELECT * FROM `default`.`hue__tmp_export_table`; -DROP TABLE IF EXISTS `default`.`hue__tmp_export_table`;'''.split(';')] == - [statement.strip() for statement in notebook.get_data()['snippets'][0]['statement_raw'].split(';')]) +DROP TABLE IF EXISTS `default`.`hue__tmp_export_table`;'''.split(';') # noqa: E501 + ] == [statement.strip() for statement in notebook.get_data()['snippets'][0]['statement_raw'].split(';')] fs = Mock( - stats=Mock( - return_value=MockStat(encBit=False) - ), - parent_path=mock_parent_path, - get_home_dir=Mock(return_value='/user/test'), + stats=Mock(return_value=MockStat(encBit=False)), + parent_path=mock_parent_path, + get_home_dir=Mock(return_value='/user/test'), ) def source_dict(key): return { 'path': '/user/test/data.csv', 'format': {'quoteChar': '"', 'fieldSeparator': ','}, - 'sampleCols': [{u'operations': [], u'comment': u'', u'name': u'customers.id'}], - 'sourceType': 'hive' + 'sampleCols': [{'operations': [], 'comment': '', 'name': 'customers.id'}], + 'sourceType': 'hive', }.get(key, Mock()) + source = MagicMock() source.__getitem__.side_effect = source_dict notebook = SQLIndexer(user=self.user, fs=fs).create_table_from_a_file(source, destination) # source dir is not in encryption zone, so the scratch dir is in user's home dir - assert ( - [statement.strip() for statement in u'''DROP TABLE IF EXISTS `default`.`hue__tmp_export_table`; + assert [ + statement.strip() + for statement in '''DROP TABLE IF EXISTS `default`.`hue__tmp_export_table`; CREATE TABLE IF NOT EXISTS `default`.`hue__tmp_export_table` ( `id` int ) COMMENT "No comment!" @@ -217,8 +218,8 @@ def source_dict(key): TBLPROPERTIES('transactional'='true', 'transactional_properties'='insert_only') AS SELECT * FROM `default`.`hue__tmp_export_table`; -DROP TABLE IF EXISTS `default`.`hue__tmp_export_table`;'''.split(';')] == - [statement.strip() for statement in notebook.get_data()['snippets'][0]['statement_raw'].split(';')]) +DROP TABLE IF EXISTS `default`.`hue__tmp_export_table`;'''.split(';') # noqa: E501 + ] == [statement.strip() for statement in notebook.get_data()['snippets'][0]['statement_raw'].split(';')] class MockRequest(object): @@ -254,66 +255,292 @@ def stats(self, path): @pytest.mark.django_db def test_generate_create_text_table_with_data_partition(): source = { - u'sourceType': 'hive', u'sampleCols': [{u'operations': [], u'comment': u'', u'name': u'customers.id', u'level': 0, - u'keyType': u'string', u'required': False, u'nested': [], u'isPartition': False, u'length': 100, u'partitionValue': u'', - u'multiValued': False, u'unique': False, u'type': u'bigint', u'showProperties': False, u'keep': True}, - {u'operations': [], u'comment': u'', u'name': u'customers.name', u'level': 0, u'keyType': u'string', u'required': False, - u'nested': [], u'isPartition': False, u'length': 100, u'partitionValue': u'', u'multiValued': False, u'unique': False, - u'type': u'string', u'showProperties': False, u'keep': True}, {u'operations': [], u'comment': u'', - u'name': u'customers.email_preferences', u'level': 0, u'keyType': u'string', u'required': False, u'nested': [], - u'isPartition': False, u'length': 100, u'partitionValue': u'', u'multiValued': False, u'unique': False, u'type': - u'string', u'showProperties': False, u'keep': True}, {u'operations': [], u'comment': u'', u'name': u'customers.addresses', - u'level': 0, u'keyType': u'string', u'required': False, u'nested': [], u'isPartition': False, u'length': 100, - u'partitionValue': u'', u'multiValued': False, u'unique': False, u'type': u'string', u'showProperties': False, u'keep': True}, - {u'operations': [], u'comment': u'', u'name': u'customers.orders', u'level': 0, u'keyType': u'string', u'required': False, - u'nested': [], u'isPartition': False, u'length': 100, u'partitionValue': u'', u'multiValued': False, u'unique': False, - u'type': u'string', u'showProperties': False, u'keep': True}], u'name': u'', u'inputFormat': u'file', - u'format': {u'status': 0, u'fieldSeparator': u',', u'hasHeader': True, u'quoteChar': u'"', - u'recordSeparator': u'\\n', u'type': u'csv'}, u'defaultName': u'default.customer_stats', u'show': True, - u'tableName': u'', u'sample': [], u'apiHelperType': u'hive', u'inputFormatsAll': [{u'name': u'File', u'value': u'file'}, - {u'name': u'Manually', u'value': u'manual'}, {u'name': u'SQL Query', u'value': u'query'}, - {u'name': u'Table', u'value': u'table'}], u'query': u'', u'databaseName': u'default', u'table': u'', - u'inputFormats': [{u'name': u'File', u'value': u'file'}, {u'name': u'Manually', u'value': u'manual'}, - {u'name': u'SQL Query', u'value': u'query'}, {u'name': u'Table', u'value': u'table'}], - u'path': u'/user/romain/customer_stats.csv', u'draggedQuery': u'', - u'inputFormatsManual': [{u'name': u'Manually', u'value': u'manual'}], u'isObjectStore': False + 'sourceType': 'hive', + 'sampleCols': [ + { + 'operations': [], + 'comment': '', + 'name': 'customers.id', + 'level': 0, + 'keyType': 'string', + 'required': False, + 'nested': [], + 'isPartition': False, + 'length': 100, + 'partitionValue': '', + 'multiValued': False, + 'unique': False, + 'type': 'bigint', + 'showProperties': False, + 'keep': True, + }, + { + 'operations': [], + 'comment': '', + 'name': 'customers.name', + 'level': 0, + 'keyType': 'string', + 'required': False, + 'nested': [], + 'isPartition': False, + 'length': 100, + 'partitionValue': '', + 'multiValued': False, + 'unique': False, + 'type': 'string', + 'showProperties': False, + 'keep': True, + }, + { + 'operations': [], + 'comment': '', + 'name': 'customers.email_preferences', + 'level': 0, + 'keyType': 'string', + 'required': False, + 'nested': [], + 'isPartition': False, + 'length': 100, + 'partitionValue': '', + 'multiValued': False, + 'unique': False, + 'type': 'string', + 'showProperties': False, + 'keep': True, + }, + { + 'operations': [], + 'comment': '', + 'name': 'customers.addresses', + 'level': 0, + 'keyType': 'string', + 'required': False, + 'nested': [], + 'isPartition': False, + 'length': 100, + 'partitionValue': '', + 'multiValued': False, + 'unique': False, + 'type': 'string', + 'showProperties': False, + 'keep': True, + }, + { + 'operations': [], + 'comment': '', + 'name': 'customers.orders', + 'level': 0, + 'keyType': 'string', + 'required': False, + 'nested': [], + 'isPartition': False, + 'length': 100, + 'partitionValue': '', + 'multiValued': False, + 'unique': False, + 'type': 'string', + 'showProperties': False, + 'keep': True, + }, + ], + 'name': '', + 'inputFormat': 'file', + 'format': {'status': 0, 'fieldSeparator': ',', 'hasHeader': True, 'quoteChar': '"', 'recordSeparator': '\\n', 'type': 'csv'}, + 'defaultName': 'default.customer_stats', + 'show': True, + 'tableName': '', + 'sample': [], + 'apiHelperType': 'hive', + 'inputFormatsAll': [ + {'name': 'File', 'value': 'file'}, + {'name': 'Manually', 'value': 'manual'}, + {'name': 'SQL Query', 'value': 'query'}, + {'name': 'Table', 'value': 'table'}, + ], + 'query': '', + 'databaseName': 'default', + 'table': '', + 'inputFormats': [ + {'name': 'File', 'value': 'file'}, + {'name': 'Manually', 'value': 'manual'}, + {'name': 'SQL Query', 'value': 'query'}, + {'name': 'Table', 'value': 'table'}, + ], + 'path': '/user/romain/customer_stats.csv', + 'draggedQuery': '', + 'inputFormatsManual': [{'name': 'Manually', 'value': 'manual'}], + 'isObjectStore': False, } destination = { - u'isTransactional': False, u'isInsertOnly': False, u'sourceType': 'hive', - u'KUDU_DEFAULT_PARTITION_COLUMN': {u'int_val': 16, u'name': u'HASH', u'columns': [], - u'range_partitions': [{u'include_upper_val': u'<=', u'upper_val': 1, u'name': u'VALUES', u'include_lower_val': u'<=', - u'lower_val': 0, u'values': [{u'value': u''}]}]}, u'isTargetChecking': False, u'tableName': u'customer_stats', - u'outputFormatsList': [{u'name': u'Table', u'value': u'table'}, {u'name': u'Solr index', u'value': u'index'}, - {u'name': u'File', u'value': u'file'}, {u'name': u'Database', u'value': u'database'}], u'customRegexp': u'', - u'isTargetExisting': False, u'partitionColumns': [{u'operations': [], u'comment': u'', u'name': u'new_field_1', - u'level': 0, u'keyType': u'string', u'required': False, u'nested': [], u'isPartition': True, u'length': 100, - u'partitionValue': u'AAA', u'multiValued': False, u'unique': False, u'type': u'string', u'showProperties': False, u'keep': True}], - u'useCustomDelimiters': False, u'apiHelperType': u'hive', u'kuduPartitionColumns': [], - u'outputFormats': [{u'name': u'Table', u'value': u'table'}, {u'name': u'Solr index', u'value': u'index'}], - u'customMapDelimiter': u'\\003', u'showProperties': False, u'useDefaultLocation': True, u'description': u'', - u'primaryKeyObjects': [], u'customFieldDelimiter': u',', u'existingTargetUrl': u'', u'importData': True, u'isIceberg': False, - u'useCopy': False, u'databaseName': u'default', u'KUDU_DEFAULT_RANGE_PARTITION_COLUMN': {u'include_upper_val': u'<=', u'upper_val': 1, - u'name': u'VALUES', u'include_lower_val': u'<=', u'lower_val': 0, u'values': [{u'value': u''}]}, u'primaryKeys': [], - u'outputFormat': u'table', u'nonDefaultLocation': u'/user/romain/customer_stats.csv', u'name': u'default.customer_stats', - u'tableFormat': u'text', 'ouputFormat': u'table', - u'bulkColumnNames': u'customers.id,customers.name,customers.email_preferences,customers.addresses,customers.orders', - u'columns': [{u'operations': [], u'comment': u'', u'name': u'customers.id', u'level': 0, u'keyType': u'string', - u'required': False, u'nested': [], u'isPartition': False, u'length': 100, u'partitionValue': u'', u'multiValued': False, - u'unique': False, u'type': u'bigint', u'showProperties': False, u'keep': True}, {u'operations': [], u'comment': u'', - u'name': u'customers.name', u'level': 0, u'keyType': u'string', u'required': False, u'nested': [], u'isPartition': False, - u'length': 100, u'partitionValue': u'', u'multiValued': False, u'unique': False, u'type': u'string', u'showProperties': False, - u'keep': True}, {u'operations': [], u'comment': u'', u'name': u'customers.email_preferences', u'level': 0, u'keyType': u'string', - u'required': False, u'nested': [], u'isPartition': False, u'length': 100, u'partitionValue': u'', u'multiValued': False, - u'unique': False, u'type': u'string', u'showProperties': False, u'keep': True}, {u'operations': [], u'comment': u'', - u'name': u'customers.addresses', u'level': 0, u'keyType': u'string', u'required': False, u'nested': [], u'isPartition': False, - u'length': 100, u'partitionValue': u'', u'multiValued': False, u'unique': False, u'type': u'string', u'showProperties': False, - u'keep': True}, {u'operations': [], u'comment': u'', u'name': u'customers.orders', u'level': 0, u'keyType': u'string', - u'required': False, u'nested': [], u'isPartition': False, u'length': 100, u'partitionValue': u'', u'multiValued': False, - u'unique': False, u'type': u'string', u'showProperties': False, u'keep': True}], u'hasHeader': True, - u'tableFormats': [{u'name': u'Text', u'value': u'text'}, {u'name': u'Parquet', u'value': u'parquet'}, - {u'name': u'Kudu', u'value': u'kudu'}, {u'name': u'Csv', u'value': u'csv'}, {u'name': u'Avro', u'value': u'avro'}, - {u'name': u'Json', u'value': u'json'}, {u'name': u'Regexp', u'value': u'regexp'}, {u'name': u'ORC', u'value': u'orc'}], - u'customCollectionDelimiter': u'\\002' + 'isTransactional': False, + 'isInsertOnly': False, + 'sourceType': 'hive', + 'KUDU_DEFAULT_PARTITION_COLUMN': { + 'int_val': 16, + 'name': 'HASH', + 'columns': [], + 'range_partitions': [ + {'include_upper_val': '<=', 'upper_val': 1, 'name': 'VALUES', 'include_lower_val': '<=', 'lower_val': 0, 'values': [{'value': ''}]} + ], + }, + 'isTargetChecking': False, + 'tableName': 'customer_stats', + 'outputFormatsList': [ + {'name': 'Table', 'value': 'table'}, + {'name': 'Solr index', 'value': 'index'}, + {'name': 'File', 'value': 'file'}, + {'name': 'Database', 'value': 'database'}, + ], + 'customRegexp': '', + 'isTargetExisting': False, + 'partitionColumns': [ + { + 'operations': [], + 'comment': '', + 'name': 'new_field_1', + 'level': 0, + 'keyType': 'string', + 'required': False, + 'nested': [], + 'isPartition': True, + 'length': 100, + 'partitionValue': 'AAA', + 'multiValued': False, + 'unique': False, + 'type': 'string', + 'showProperties': False, + 'keep': True, + } + ], + 'useCustomDelimiters': False, + 'apiHelperType': 'hive', + 'kuduPartitionColumns': [], + 'outputFormats': [{'name': 'Table', 'value': 'table'}, {'name': 'Solr index', 'value': 'index'}], + 'customMapDelimiter': '\\003', + 'showProperties': False, + 'useDefaultLocation': True, + 'description': '', + 'primaryKeyObjects': [], + 'customFieldDelimiter': ',', + 'existingTargetUrl': '', + 'importData': True, + 'isIceberg': False, + 'useCopy': False, + 'databaseName': 'default', + 'KUDU_DEFAULT_RANGE_PARTITION_COLUMN': { + 'include_upper_val': '<=', + 'upper_val': 1, + 'name': 'VALUES', + 'include_lower_val': '<=', + 'lower_val': 0, + 'values': [{'value': ''}], + }, + 'primaryKeys': [], + 'outputFormat': 'table', + 'nonDefaultLocation': '/user/romain/customer_stats.csv', + 'name': 'default.customer_stats', + 'tableFormat': 'text', + 'ouputFormat': 'table', + 'bulkColumnNames': 'customers.id,customers.name,customers.email_preferences,customers.addresses,customers.orders', + 'columns': [ + { + 'operations': [], + 'comment': '', + 'name': 'customers.id', + 'level': 0, + 'keyType': 'string', + 'required': False, + 'nested': [], + 'isPartition': False, + 'length': 100, + 'partitionValue': '', + 'multiValued': False, + 'unique': False, + 'type': 'bigint', + 'showProperties': False, + 'keep': True, + }, + { + 'operations': [], + 'comment': '', + 'name': 'customers.name', + 'level': 0, + 'keyType': 'string', + 'required': False, + 'nested': [], + 'isPartition': False, + 'length': 100, + 'partitionValue': '', + 'multiValued': False, + 'unique': False, + 'type': 'string', + 'showProperties': False, + 'keep': True, + }, + { + 'operations': [], + 'comment': '', + 'name': 'customers.email_preferences', + 'level': 0, + 'keyType': 'string', + 'required': False, + 'nested': [], + 'isPartition': False, + 'length': 100, + 'partitionValue': '', + 'multiValued': False, + 'unique': False, + 'type': 'string', + 'showProperties': False, + 'keep': True, + }, + { + 'operations': [], + 'comment': '', + 'name': 'customers.addresses', + 'level': 0, + 'keyType': 'string', + 'required': False, + 'nested': [], + 'isPartition': False, + 'length': 100, + 'partitionValue': '', + 'multiValued': False, + 'unique': False, + 'type': 'string', + 'showProperties': False, + 'keep': True, + }, + { + 'operations': [], + 'comment': '', + 'name': 'customers.orders', + 'level': 0, + 'keyType': 'string', + 'required': False, + 'nested': [], + 'isPartition': False, + 'length': 100, + 'partitionValue': '', + 'multiValued': False, + 'unique': False, + 'type': 'string', + 'showProperties': False, + 'keep': True, + }, + ], + 'hasHeader': True, + 'tableFormats': [ + {'name': 'Text', 'value': 'text'}, + {'name': 'Parquet', 'value': 'parquet'}, + {'name': 'Kudu', 'value': 'kudu'}, + {'name': 'Csv', 'value': 'csv'}, + {'name': 'Avro', 'value': 'avro'}, + {'name': 'Json', 'value': 'json'}, + {'name': 'Regexp', 'value': 'regexp'}, + {'name': 'ORC', 'value': 'orc'}, + ], + 'customCollectionDelimiter': '\\002', } request = MockRequest(fs=MockFs()) @@ -337,86 +564,422 @@ def test_generate_create_text_table_with_data_partition(): ;''' assert statement in sql, sql - assert ('''LOAD DATA INPATH '/user/romain/customer_stats.csv' ''' - '''INTO TABLE `default`.`customer_stats` PARTITION (new_field_1='AAA');''' in sql), sql + assert ( + '''LOAD DATA INPATH '/user/romain/customer_stats.csv' ''' + '''INTO TABLE `default`.`customer_stats` PARTITION (new_field_1='AAA');''' in sql + ), sql @pytest.mark.django_db def test_generate_create_kudu_table_with_data(): source = { - u'sourceType': 'impala', u'apiHelperType': 'hive', u'sampleCols': [], u'name': u'', u'inputFormat': u'file', - u'format': {u'quoteChar': u'"', u'recordSeparator': u'\\n', u'type': u'csv', u'hasHeader': True, u'fieldSeparator': u','}, - u'show': True, u'tableName': u'', u'sample': [], u'defaultName': u'index_data', u'query': u'', u'databaseName': u'default', - u'table': u'', u'inputFormats': [{u'name': u'File', u'value': u'file'}, {u'name': u'Manually', u'value': u'manual'}], - u'path': u'/user/admin/index_data.csv', u'draggedQuery': u'', u'isObjectStore': False + 'sourceType': 'impala', + 'apiHelperType': 'hive', + 'sampleCols': [], + 'name': '', + 'inputFormat': 'file', + 'format': {'quoteChar': '"', 'recordSeparator': '\\n', 'type': 'csv', 'hasHeader': True, 'fieldSeparator': ','}, + 'show': True, + 'tableName': '', + 'sample': [], + 'defaultName': 'index_data', + 'query': '', + 'databaseName': 'default', + 'table': '', + 'inputFormats': [{'name': 'File', 'value': 'file'}, {'name': 'Manually', 'value': 'manual'}], + 'path': '/user/admin/index_data.csv', + 'draggedQuery': '', + 'isObjectStore': False, } destination = { - u'isTransactional': False, u'isInsertOnly': False, u'sourceType': 'impala', - u'KUDU_DEFAULT_PARTITION_COLUMN': {u'int_val': 16, u'name': u'HASH', u'columns': [], - u'range_partitions': [{u'include_upper_val': u'<=', u'upper_val': 1, u'name': u'VALUES', u'include_lower_val': u'<=', - u'lower_val': 0, u'values': [{u'value': u''}]}]}, u'tableName': u'index_data', - u'outputFormatsList': [{u'name': u'Table', u'value': u'table'}, {u'name': u'Solr+index', u'value': u'index'}, - {u'name': u'File', u'value': u'file'}, {u'name': u'Database', u'value': u'database'}], u'customRegexp': u'', - u'isTargetExisting': False, u'partitionColumns': [], u'useCustomDelimiters': True, - u'kuduPartitionColumns': [{u'int_val': 16, u'name': u'HASH', u'columns': [u'id'], - u'range_partitions': [{u'include_upper_val': u'<=', u'upper_val': 1, u'name': u'VALUES', u'include_lower_val': u'<=', - u'lower_val': 0, u'values': [{u'value': u''}]}]}], u'outputFormats': [{u'name': u'Table', u'value': u'table'}, - {u'name': u'Solr+index', u'value': u'index'}], u'customMapDelimiter': None, u'showProperties': False, u'useDefaultLocation': True, - u'description': u'Big Data', u'primaryKeyObjects': [{u'operations': [], u'comment': u'', u'name': u'id', u'level': 0, - u'keyType': u'string', u'required': False, u'nested': [], u'isPartition': False, u'length': 100, u'multiValued': False, - u'unique': False, u'type': u'string', u'showProperties': False, u'keep': True}], u'customFieldDelimiter': u',', - u'existingTargetUrl': u'', u'importData': True, u'isIceberg': False, u'useCopy': False, u'databaseName': u'default', - u'KUDU_DEFAULT_RANGE_PARTITION_COLUMN': {u'include_upper_val': u'<=', u'upper_val': 1, u'name': u'VALUES', - u'include_lower_val': u'<=', u'lower_val': 0, u'values': [{u'value': u''}]}, u'primaryKeys': [u'id'], - u'outputFormat': u'table', u'nonDefaultLocation': u'/user/admin/index_data.csv', u'name': u'index_data', - u'tableFormat': u'kudu', - u'bulkColumnNames': u'business_id,cool,date,funny,id,stars,text,type,useful,user_id,name,full_address,latitude,' - 'longitude,neighborhoods,open,review_count,state', u'columns': [{u'operations': [], u'comment': u'', u'name': u'business_id', - u'level': 0, u'keyType': u'string', u'required': False, u'nested': [], u'isPartition': False, u'length': 100, - u'multiValued': False, u'unique': False, u'type': u'string', u'showProperties': False, u'keep': True}, - {u'operations': [], u'comment': u'', u'name': u'cool', u'level': 0, u'keyType': u'string', u'required': False, - u'nested': [], u'isPartition': False, u'length': 100, u'multiValued': False, u'unique': False, u'type': u'bigint', - u'showProperties': False, u'keep': False}, {u'operations': [], u'comment': u'', u'name': u'date', u'level': 0, - u'keyType': u'string', u'required': False, u'nested': [], u'isPartition': False, u'length': 100, u'multiValued': False, - u'unique': False, u'type': u'string', u'showProperties': False, u'keep': True}, {u'operations': [], u'comment': u'', - u'name': u'funny', u'level': 0, u'scale': 4, u'precision': 10, u'keyType': u'string', u'required': False, u'nested': [], - u'isPartition': False, u'length': 100, u'multiValued': False, u'unique': False, u'type': u'decimal', u'showProperties': False, - u'keep': True}, {u'operations': [], u'comment': u'', u'name': u'id', u'level': 0, u'keyType': u'string', u'required': False, - u'nested': [], u'isPartition': False, u'length': 100, u'multiValued': False, u'unique': False, u'type': u'string', - u'showProperties': False, u'keep': True}, {u'operations': [], u'comment': u'', u'name': u'stars', u'level': 0, - u'keyType': u'string', u'required': False, u'nested': [], u'isPartition': False, u'length': 100, u'multiValued': False, - u'unique': False, u'type': u'bigint', u'showProperties': False, u'keep': True}, {u'operations': [], u'comment': u'', - u'name': u'text', u'level': 0, u'keyType': u'string', u'required': False, u'nested': [], u'isPartition': False, u'length': 100, - u'multiValued': False, u'unique': False, u'type': u'string', u'showProperties': False, u'keep': True}, - {u'operations': [], u'comment': u'', u'name': u'type', u'level': 0, u'keyType': u'string', u'required': False, u'nested': [], - u'isPartition': False, u'length': 100, u'multiValued': False, u'unique': False, u'type': u'string', u'showProperties': False, - u'keep': True}, {u'operations': [], u'comment': u'', u'name': u'useful', u'level': 0, u'keyType': u'string', u'required': False, - u'nested': [], u'isPartition': False, u'length': 100, u'multiValued': False, u'unique': False, u'type': u'bigint', - u'showProperties': False, u'keep': True}, {u'operations': [], u'comment': u'', u'name': u'user_id', u'level': 0, - u'keyType': u'string', u'required': False, u'nested': [], u'isPartition': False, u'length': 100, u'multiValued': False, - u'unique': False, u'type': u'string', u'showProperties': False, u'keep': True}, {u'operations': [], u'comment': u'', - u'name': u'name', u'level': 0, u'keyType': u'string', u'required': False, u'nested': [], u'isPartition': False, - u'length': 100, u'multiValued': False, u'unique': False, u'type': u'string', u'showProperties': False, u'keep': True}, - {u'operations': [], u'comment': u'', u'name': u'full_address', u'level': 0, u'keyType': u'string', u'required': False, - u'nested': [], u'isPartition': False, u'length': 100, u'multiValued': False, u'unique': False, u'type': u'string', - u'showProperties': False, u'keep': True}, {u'operations': [], u'comment': u'', u'name': u'latitude', u'level': 0, - u'keyType': u'string', u'required': False, u'nested': [], u'isPartition': False, u'length': 100, u'multiValued': False, - u'unique': False, u'type': u'double', u'showProperties': False, u'keep': True}, {u'operations': [], u'comment': u'', - u'name': u'longitude', u'level': 0, u'keyType': u'string', u'required': False, u'nested': [], u'isPartition': False, - u'length': 100, u'multiValued': False, u'unique': False, u'type': u'double', u'showProperties': False, u'keep': True}, - {u'operations': [], u'comment': u'', u'name': u'neighborhoods', u'level': 0, u'keyType': u'string', u'required': False, - u'nested': [], u'isPartition': False, u'length': 100, u'multiValued': False, u'unique': False, u'type': u'string', - u'showProperties': False, u'keep': True}, {u'operations': [], u'comment': u'', u'name': u'open', u'level': 0, - u'keyType': u'string', u'required': False, u'nested': [], u'isPartition': False, u'length': 100, u'multiValued': False, - u'unique': False, u'type': u'string', u'showProperties': False, u'keep': True}, {u'operations': [], u'comment': u'', - u'name': u'review_count', u'level': 0, u'keyType': u'string', u'required': False, u'nested': [], u'isPartition': False, - u'length': 100, u'multiValued': False, u'unique': False, u'type': u'bigint', u'showProperties': False, u'keep': True}, - {u'operations': [], u'comment': u'', u'name': u'state', u'level': 0, u'keyType': u'string', u'required': False, - u'nested': [], u'isPartition': False, u'length': 100, u'multiValued': False, u'unique': False, u'type': u'string', - u'showProperties': False, u'keep': True}], u'hasHeader': True, u'tableFormats': [{u'name': u'Text', u'value': u'text'}, - {u'name': u'Parquet', u'value': u'parquet'}, {u'name': u'Json', u'value': u'json'}, {u'name': u'Kudu', u'value': u'kudu'}, - {u'name': u'Avro', u'value': u'avro'}, {u'name': u'Regexp', u'value': u'regexp'}, {u'name': u'RCFile', u'value': u'rcfile'}, - {u'name': u'ORC', u'value': u'orc'}, {u'name': u'SequenceFile', u'value': u'sequencefile'}], u'customCollectionDelimiter': None + 'isTransactional': False, + 'isInsertOnly': False, + 'sourceType': 'impala', + 'KUDU_DEFAULT_PARTITION_COLUMN': { + 'int_val': 16, + 'name': 'HASH', + 'columns': [], + 'range_partitions': [ + {'include_upper_val': '<=', 'upper_val': 1, 'name': 'VALUES', 'include_lower_val': '<=', 'lower_val': 0, 'values': [{'value': ''}]} + ], + }, + 'tableName': 'index_data', + 'outputFormatsList': [ + {'name': 'Table', 'value': 'table'}, + {'name': 'Solr+index', 'value': 'index'}, + {'name': 'File', 'value': 'file'}, + {'name': 'Database', 'value': 'database'}, + ], + 'customRegexp': '', + 'isTargetExisting': False, + 'partitionColumns': [], + 'useCustomDelimiters': True, + 'kuduPartitionColumns': [ + { + 'int_val': 16, + 'name': 'HASH', + 'columns': ['id'], + 'range_partitions': [ + { + 'include_upper_val': '<=', + 'upper_val': 1, + 'name': 'VALUES', + 'include_lower_val': '<=', + 'lower_val': 0, + 'values': [{'value': ''}], + } + ], + } + ], + 'outputFormats': [{'name': 'Table', 'value': 'table'}, {'name': 'Solr+index', 'value': 'index'}], + 'customMapDelimiter': None, + 'showProperties': False, + 'useDefaultLocation': True, + 'description': 'Big Data', + 'primaryKeyObjects': [ + { + 'operations': [], + 'comment': '', + 'name': 'id', + 'level': 0, + 'keyType': 'string', + 'required': False, + 'nested': [], + 'isPartition': False, + 'length': 100, + 'multiValued': False, + 'unique': False, + 'type': 'string', + 'showProperties': False, + 'keep': True, + } + ], + 'customFieldDelimiter': ',', + 'existingTargetUrl': '', + 'importData': True, + 'isIceberg': False, + 'useCopy': False, + 'databaseName': 'default', + 'KUDU_DEFAULT_RANGE_PARTITION_COLUMN': { + 'include_upper_val': '<=', + 'upper_val': 1, + 'name': 'VALUES', + 'include_lower_val': '<=', + 'lower_val': 0, + 'values': [{'value': ''}], + }, + 'primaryKeys': ['id'], + 'outputFormat': 'table', + 'nonDefaultLocation': '/user/admin/index_data.csv', + 'name': 'index_data', + 'tableFormat': 'kudu', + 'bulkColumnNames': 'business_id,cool,date,funny,id,stars,text,type,useful,user_id,name,full_address,latitude,' + 'longitude,neighborhoods,open,review_count,state', + 'columns': [ + { + 'operations': [], + 'comment': '', + 'name': 'business_id', + 'level': 0, + 'keyType': 'string', + 'required': False, + 'nested': [], + 'isPartition': False, + 'length': 100, + 'multiValued': False, + 'unique': False, + 'type': 'string', + 'showProperties': False, + 'keep': True, + }, + { + 'operations': [], + 'comment': '', + 'name': 'cool', + 'level': 0, + 'keyType': 'string', + 'required': False, + 'nested': [], + 'isPartition': False, + 'length': 100, + 'multiValued': False, + 'unique': False, + 'type': 'bigint', + 'showProperties': False, + 'keep': False, + }, + { + 'operations': [], + 'comment': '', + 'name': 'date', + 'level': 0, + 'keyType': 'string', + 'required': False, + 'nested': [], + 'isPartition': False, + 'length': 100, + 'multiValued': False, + 'unique': False, + 'type': 'string', + 'showProperties': False, + 'keep': True, + }, + { + 'operations': [], + 'comment': '', + 'name': 'funny', + 'level': 0, + 'scale': 4, + 'precision': 10, + 'keyType': 'string', + 'required': False, + 'nested': [], + 'isPartition': False, + 'length': 100, + 'multiValued': False, + 'unique': False, + 'type': 'decimal', + 'showProperties': False, + 'keep': True, + }, + { + 'operations': [], + 'comment': '', + 'name': 'id', + 'level': 0, + 'keyType': 'string', + 'required': False, + 'nested': [], + 'isPartition': False, + 'length': 100, + 'multiValued': False, + 'unique': False, + 'type': 'string', + 'showProperties': False, + 'keep': True, + }, + { + 'operations': [], + 'comment': '', + 'name': 'stars', + 'level': 0, + 'keyType': 'string', + 'required': False, + 'nested': [], + 'isPartition': False, + 'length': 100, + 'multiValued': False, + 'unique': False, + 'type': 'bigint', + 'showProperties': False, + 'keep': True, + }, + { + 'operations': [], + 'comment': '', + 'name': 'text', + 'level': 0, + 'keyType': 'string', + 'required': False, + 'nested': [], + 'isPartition': False, + 'length': 100, + 'multiValued': False, + 'unique': False, + 'type': 'string', + 'showProperties': False, + 'keep': True, + }, + { + 'operations': [], + 'comment': '', + 'name': 'type', + 'level': 0, + 'keyType': 'string', + 'required': False, + 'nested': [], + 'isPartition': False, + 'length': 100, + 'multiValued': False, + 'unique': False, + 'type': 'string', + 'showProperties': False, + 'keep': True, + }, + { + 'operations': [], + 'comment': '', + 'name': 'useful', + 'level': 0, + 'keyType': 'string', + 'required': False, + 'nested': [], + 'isPartition': False, + 'length': 100, + 'multiValued': False, + 'unique': False, + 'type': 'bigint', + 'showProperties': False, + 'keep': True, + }, + { + 'operations': [], + 'comment': '', + 'name': 'user_id', + 'level': 0, + 'keyType': 'string', + 'required': False, + 'nested': [], + 'isPartition': False, + 'length': 100, + 'multiValued': False, + 'unique': False, + 'type': 'string', + 'showProperties': False, + 'keep': True, + }, + { + 'operations': [], + 'comment': '', + 'name': 'name', + 'level': 0, + 'keyType': 'string', + 'required': False, + 'nested': [], + 'isPartition': False, + 'length': 100, + 'multiValued': False, + 'unique': False, + 'type': 'string', + 'showProperties': False, + 'keep': True, + }, + { + 'operations': [], + 'comment': '', + 'name': 'full_address', + 'level': 0, + 'keyType': 'string', + 'required': False, + 'nested': [], + 'isPartition': False, + 'length': 100, + 'multiValued': False, + 'unique': False, + 'type': 'string', + 'showProperties': False, + 'keep': True, + }, + { + 'operations': [], + 'comment': '', + 'name': 'latitude', + 'level': 0, + 'keyType': 'string', + 'required': False, + 'nested': [], + 'isPartition': False, + 'length': 100, + 'multiValued': False, + 'unique': False, + 'type': 'double', + 'showProperties': False, + 'keep': True, + }, + { + 'operations': [], + 'comment': '', + 'name': 'longitude', + 'level': 0, + 'keyType': 'string', + 'required': False, + 'nested': [], + 'isPartition': False, + 'length': 100, + 'multiValued': False, + 'unique': False, + 'type': 'double', + 'showProperties': False, + 'keep': True, + }, + { + 'operations': [], + 'comment': '', + 'name': 'neighborhoods', + 'level': 0, + 'keyType': 'string', + 'required': False, + 'nested': [], + 'isPartition': False, + 'length': 100, + 'multiValued': False, + 'unique': False, + 'type': 'string', + 'showProperties': False, + 'keep': True, + }, + { + 'operations': [], + 'comment': '', + 'name': 'open', + 'level': 0, + 'keyType': 'string', + 'required': False, + 'nested': [], + 'isPartition': False, + 'length': 100, + 'multiValued': False, + 'unique': False, + 'type': 'string', + 'showProperties': False, + 'keep': True, + }, + { + 'operations': [], + 'comment': '', + 'name': 'review_count', + 'level': 0, + 'keyType': 'string', + 'required': False, + 'nested': [], + 'isPartition': False, + 'length': 100, + 'multiValued': False, + 'unique': False, + 'type': 'bigint', + 'showProperties': False, + 'keep': True, + }, + { + 'operations': [], + 'comment': '', + 'name': 'state', + 'level': 0, + 'keyType': 'string', + 'required': False, + 'nested': [], + 'isPartition': False, + 'length': 100, + 'multiValued': False, + 'unique': False, + 'type': 'string', + 'showProperties': False, + 'keep': True, + }, + ], + 'hasHeader': True, + 'tableFormats': [ + {'name': 'Text', 'value': 'text'}, + {'name': 'Parquet', 'value': 'parquet'}, + {'name': 'Json', 'value': 'json'}, + {'name': 'Kudu', 'value': 'kudu'}, + {'name': 'Avro', 'value': 'avro'}, + {'name': 'Regexp', 'value': 'regexp'}, + {'name': 'RCFile', 'value': 'rcfile'}, + {'name': 'ORC', 'value': 'orc'}, + {'name': 'SequenceFile', 'value': 'sequencefile'}, + ], + 'customCollectionDelimiter': None, } request = MockRequest(fs=MockFs()) @@ -452,7 +1015,8 @@ def test_generate_create_kudu_table_with_data(): TBLPROPERTIES('skip.header.line.count'='1', 'transactional'='false')''' assert statement in sql, sql - assert ('''CREATE TABLE `default`.`index_data` COMMENT "Big Data" + assert ( + '''CREATE TABLE `default`.`index_data` COMMENT "Big Data" PRIMARY KEY (id) PARTITION BY HASH PARTITIONS 16 STORED AS kudu @@ -460,13 +1024,15 @@ def test_generate_create_kudu_table_with_data(): 'kudu.num_tablet_replicas'='1' ) AS SELECT `id`, `business_id`, `date`, `funny`, `stars`, `text`, `type`, `useful`, `user_id`, `name`, ''' - '''`full_address`, `latitude`, `longitude`, `neighborhoods`, `open`, `review_count`, `state` - FROM `default`.`hue__tmp_index_data`''' in sql), sql + '''`full_address`, `latitude`, `longitude`, `neighborhoods`, `open`, `review_count`, `state` + FROM `default`.`hue__tmp_index_data`''' in sql + ), sql @pytest.mark.django_db def test_generate_create_parquet_table(): - source = json.loads('''{"sourceType": "hive", "name":"","sample":[["Bank Of America","3000000.0","US","Miami","37.6801986694",''' + source = json.loads( + '''{"sourceType": "hive", "name":"","sample":[["Bank Of America","3000000.0","US","Miami","37.6801986694",''' '''"-121.92150116"],["Citi Bank","2800000.0","US","Richmond","37.5242004395","-77.4932022095"],["Deutsche Bank","2600000.0","US",''' '''"Corpus Christi","40.7807998657","-73.9772033691"],["Thomson Reuters","2400000.0","US","Albany","35.7976989746",''' '''"-78.6252975464"],''' @@ -494,7 +1060,8 @@ def test_generate_create_parquet_table(): '''"fieldSeparator":",","recordSeparator":"\\n","quoteChar":"\\"","hasHeader":true,"status":0},"show":true,"defaultName":''' '''"default.query-hive-360"}''' ) - destination = json.loads('''{"isTransactional": false, "isInsertOnly": false, "sourceType": "hive", "name":"default.parquet_table"''' + destination = json.loads( + '''{"isTransactional": false, "isInsertOnly": false, "sourceType": "hive", "name":"default.parquet_table"''' ''',"apiHelperType":"hive","description":"","outputFormat":"table","outputFormatsList":[{"name":"Table","value":"table"},''' '''{"name":"Solr index","value":"index"},{"name":"File","value":"file"},{"name":"Database","value":"database"}],''' '''"outputFormats":[{"name":"Table","value":"table"},{"name":"Solr index","value":"index"}],"columns":[{"operations":[],''' @@ -548,11 +1115,14 @@ def test_generate_create_parquet_table(): ;''' assert statement in sql, sql - assert '''CREATE TABLE `default`.`parquet_table` + assert ( + '''CREATE TABLE `default`.`parquet_table` STORED AS parquet AS SELECT * FROM `default`.`hue__tmp_parquet_table`; -''' in sql, sql +''' + in sql + ), sql assert '''DROP TABLE IF EXISTS `default`.`hue__tmp_parquet_table`;''' in sql, sql @@ -710,7 +1280,8 @@ def test_generate_create_avro_table(): @pytest.mark.django_db def test_generate_create_iceberg_table(): - source = json.loads('''{"sourceType": "hive", "name":"","sample":[["Bank Of America","3000000.0","US","Miami","37.6801986694",''' + source = json.loads( + '''{"sourceType": "hive", "name":"","sample":[["Bank Of America","3000000.0","US","Miami","37.6801986694",''' '''"-121.92150116"],["Citi Bank","2800000.0","US","Richmond","37.5242004395","-77.4932022095"],["Deutsche Bank","2600000.0","US",''' '''"Corpus Christi","40.7807998657","-73.9772033691"],["Thomson Reuters","2400000.0","US","Albany","35.7976989746",''' '''"-78.6252975464"],''' @@ -738,7 +1309,8 @@ def test_generate_create_iceberg_table(): '''"fieldSeparator":",","recordSeparator":"\\n","quoteChar":"\\"","hasHeader":true,"status":0},"show":true,"defaultName":''' '''"default.query-hive-360"}''' ) - destination = json.loads('''{"isTransactional": false, "isInsertOnly": false, "sourceType": "hive", "name":"default.parquet_table"''' + destination = json.loads( + '''{"isTransactional": false, "isInsertOnly": false, "sourceType": "hive", "name":"default.parquet_table"''' ''',"apiHelperType":"hive","description":"","outputFormat":"table","outputFormatsList":[{"name":"Table","value":"table"},''' '''{"name":"Solr index","value":"index"},{"name":"File","value":"file"},{"name":"Database","value":"database"}],''' '''"outputFormats":[{"name":"Table","value":"table"},{"name":"Solr index","value":"index"}],"columns":[{"operations":[],''' @@ -793,19 +1365,23 @@ def test_generate_create_iceberg_table(): ;''' assert statement in sql, sql - assert '''CREATE TABLE `default`.`parquet_table` + assert ( + '''CREATE TABLE `default`.`parquet_table` STORED BY ICEBERG STORED AS parquet AS SELECT * FROM `default`.`hue__tmp_parquet_table`; -''' in sql, sql +''' + in sql + ), sql assert '''DROP TABLE IF EXISTS `default`.`hue__tmp_parquet_table`;''' in sql, sql @pytest.mark.django_db def test_generate_create_orc_table_transactional(): - source = json.loads('''{"sourceType": "hive", "name":"","sample":[["Bank Of America","3000000.0","US","Miami","37.6801986694",''' + source = json.loads( + '''{"sourceType": "hive", "name":"","sample":[["Bank Of America","3000000.0","US","Miami","37.6801986694",''' '''"-121.92150116"],["Citi Bank","2800000.0","US","Richmond","37.5242004395","-77.4932022095"],["Deutsche Bank","2600000.0","US",''' '''"Corpus Christi","40.7807998657","-73.9772033691"],["Thomson Reuters","2400000.0","US","Albany","35.7976989746",''' '''"-78.6252975464"],''' @@ -832,36 +1408,37 @@ def test_generate_create_orc_table_transactional(): '''"apiHelperType":"hive","query":"","draggedQuery":"","format":{"type":"csv","fieldSeparator":",","recordSeparator":"\\n",''' '''"quoteChar":"\\"","hasHeader":true,"status":0},"show":true,"defaultName":"default.query-hive-360"}''' ) - destination = json.loads('''{"isTransactional": true, "isInsertOnly": true, "sourceType": "hive", "name":''' - '''"default.parquet_table","apiHelperType":"hive","description":"","outputFormat":"table","outputFormatsList":''' - '''[{"name":"Table","value":"table"},{"name":"Solr index","value":"index"},{"name":"File","value":"file"},''' - '''{"name":"Database","value":"database"}],"outputFormats":[{"name":"Table","value":"table"},{"name":"Solr index","value":"index"}],''' - '''"columns":[{"operations":[],"comment":"","nested":[],"name":"acct_client","level":0,"keyType":"string","required":false,''' - '''"precision":10,"keep":true,"isPartition":false,"length":100,"partitionValue":"","multiValued":false,"unique":false,''' - '''"type":"string","showProperties":false,"scale":0},{"operations":[],"comment":"","nested":[],"name":"tran_amount",''' - '''"level":0,"keyType":"string","required":false,"precision":10,"keep":true,"isPartition":false,"length":100,''' - '''"partitionValue":"","multiValued":false,"unique":false,"type":"double","showProperties":false,"scale":0},''' - '''{"operations":[],"comment":"","nested":[],"name":"tran_country_cd","level":0,"keyType":"string","required":false,''' - '''"precision":10,"keep":true,"isPartition":false,"length":100,"partitionValue":"","multiValued":false,"unique":false,''' - '''"type":"string","showProperties":false,"scale":0},{"operations":[],"comment":"","nested":[],"name":"vrfcn_city",''' - '''"level":0,"keyType":"string","required":false,"precision":10,"keep":true,"isPartition":false,"length":100,''' - '''"partitionValue":"","multiValued":false,"unique":false,"type":"string","showProperties":false,"scale":0},''' - '''{"operations":[],"comment":"","nested":[],"name":"vrfcn_city_lat","level":0,"keyType":"string","required":false,''' - '''"precision":10,"keep":true,"isPartition":false,"length":100,"partitionValue":"","multiValued":false,"unique":false,''' - '''"type":"double","showProperties":false,"scale":0},{"operations":[],"comment":"","nested":[],"name":"vrfcn_city_lon",''' - '''"level":0,"keyType":"string","required":false,"precision":10,"keep":true,"isPartition":false,"length":100,"partitionValue":''' - '''"","multiValued":false,"unique":false,"type":"double","showProperties":false,"scale":0}],"bulkColumnNames":"acct_client,''' - '''tran_amount,tran_country_cd,vrfcn_city,vrfcn_city_lat,vrfcn_city_lon","showProperties":false,"isTargetExisting":false,''' - '''"isTargetChecking":false,"existingTargetUrl":"","tableName":"parquet_table","databaseName":"default","tableFormat":"orc",''' - '''"KUDU_DEFAULT_RANGE_PARTITION_COLUMN":{"values":[{"value":""}],"name":"VALUES","lower_val":0,"include_lower_val":"<=",''' - '''"upper_val":1,"include_upper_val":"<="},"KUDU_DEFAULT_PARTITION_COLUMN":{"columns":[],"range_partitions":[{"values":''' - '''[{"value":""}],"name":"VALUES","lower_val":0,"include_lower_val":"<=","upper_val":1,"include_upper_val":"<="}],"name":"HASH",''' - '''"int_val":16},"tableFormats":[{"value":"text","name":"Text"},{"value":"parquet","name":"Parquet"},{"value":"kudu","name":"Kudu"},''' - '''{"value":"csv","name":"Csv"},{"value":"avro","name":"Avro"},{"value":"json","name":"Json"},{"value":"regexp","name":"Regexp"},''' - '''{"value":"orc","name":"ORC"}],"partitionColumns":[],"kuduPartitionColumns":[],"primaryKeys":[],"primaryKeyObjects":[],''' - '''"importData":true,"useDefaultLocation":true,"nonDefaultLocation":"/user/hue/data/query-hive-360.csv","hasHeader":true,''' - '''"useCustomDelimiters":false,"customFieldDelimiter":",","customCollectionDelimiter":"\\\\002","customMapDelimiter":"\\\\003",''' - '''"customRegexp":"","isIceberg":false,"useCopy":false}''' + destination = json.loads( + '''{"isTransactional": true, "isInsertOnly": true, "sourceType": "hive", "name":''' + '''"default.parquet_table","apiHelperType":"hive","description":"","outputFormat":"table","outputFormatsList":''' + '''[{"name":"Table","value":"table"},{"name":"Solr index","value":"index"},{"name":"File","value":"file"},''' + '''{"name":"Database","value":"database"}],"outputFormats":[{"name":"Table","value":"table"},{"name":"Solr index","value":"index"}],''' + '''"columns":[{"operations":[],"comment":"","nested":[],"name":"acct_client","level":0,"keyType":"string","required":false,''' + '''"precision":10,"keep":true,"isPartition":false,"length":100,"partitionValue":"","multiValued":false,"unique":false,''' + '''"type":"string","showProperties":false,"scale":0},{"operations":[],"comment":"","nested":[],"name":"tran_amount",''' + '''"level":0,"keyType":"string","required":false,"precision":10,"keep":true,"isPartition":false,"length":100,''' + '''"partitionValue":"","multiValued":false,"unique":false,"type":"double","showProperties":false,"scale":0},''' + '''{"operations":[],"comment":"","nested":[],"name":"tran_country_cd","level":0,"keyType":"string","required":false,''' + '''"precision":10,"keep":true,"isPartition":false,"length":100,"partitionValue":"","multiValued":false,"unique":false,''' + '''"type":"string","showProperties":false,"scale":0},{"operations":[],"comment":"","nested":[],"name":"vrfcn_city",''' + '''"level":0,"keyType":"string","required":false,"precision":10,"keep":true,"isPartition":false,"length":100,''' + '''"partitionValue":"","multiValued":false,"unique":false,"type":"string","showProperties":false,"scale":0},''' + '''{"operations":[],"comment":"","nested":[],"name":"vrfcn_city_lat","level":0,"keyType":"string","required":false,''' + '''"precision":10,"keep":true,"isPartition":false,"length":100,"partitionValue":"","multiValued":false,"unique":false,''' + '''"type":"double","showProperties":false,"scale":0},{"operations":[],"comment":"","nested":[],"name":"vrfcn_city_lon",''' + '''"level":0,"keyType":"string","required":false,"precision":10,"keep":true,"isPartition":false,"length":100,"partitionValue":''' + '''"","multiValued":false,"unique":false,"type":"double","showProperties":false,"scale":0}],"bulkColumnNames":"acct_client,''' + '''tran_amount,tran_country_cd,vrfcn_city,vrfcn_city_lat,vrfcn_city_lon","showProperties":false,"isTargetExisting":false,''' + '''"isTargetChecking":false,"existingTargetUrl":"","tableName":"parquet_table","databaseName":"default","tableFormat":"orc",''' + '''"KUDU_DEFAULT_RANGE_PARTITION_COLUMN":{"values":[{"value":""}],"name":"VALUES","lower_val":0,"include_lower_val":"<=",''' + '''"upper_val":1,"include_upper_val":"<="},"KUDU_DEFAULT_PARTITION_COLUMN":{"columns":[],"range_partitions":[{"values":''' + '''[{"value":""}],"name":"VALUES","lower_val":0,"include_lower_val":"<=","upper_val":1,"include_upper_val":"<="}],"name":"HASH",''' + '''"int_val":16},"tableFormats":[{"value":"text","name":"Text"},{"value":"parquet","name":"Parquet"},{"value":"kudu","name":"Kudu"},''' + '''{"value":"csv","name":"Csv"},{"value":"avro","name":"Avro"},{"value":"json","name":"Json"},{"value":"regexp","name":"Regexp"},''' + '''{"value":"orc","name":"ORC"}],"partitionColumns":[],"kuduPartitionColumns":[],"primaryKeys":[],"primaryKeyObjects":[],''' + '''"importData":true,"useDefaultLocation":true,"nonDefaultLocation":"/user/hue/data/query-hive-360.csv","hasHeader":true,''' + '''"useCustomDelimiters":false,"customFieldDelimiter":",","customCollectionDelimiter":"\\\\002","customMapDelimiter":"\\\\003",''' + '''"customRegexp":"","isIceberg":false,"useCopy":false}''' ) path = {'isDir': False, 'split': ('/user/hue/data', 'query-hive-360.csv'), 'listdir': ['/user/hue/data']} @@ -887,21 +1464,28 @@ def test_generate_create_orc_table_transactional(): ;''' assert statement in sql, sql - assert '''CREATE TABLE `default`.`parquet_table` + assert ( + '''CREATE TABLE `default`.`parquet_table` STORED AS orc TBLPROPERTIES('transactional'='true', 'transactional_properties'='insert_only') AS SELECT * FROM `default`.`hue__tmp_parquet_table`; -''' in sql, sql +''' + in sql + ), sql - assert '''DROP TABLE IF EXISTS `default`.`hue__tmp_parquet_table`; -''' in sql, sql + assert ( + '''DROP TABLE IF EXISTS `default`.`hue__tmp_parquet_table`; +''' + in sql + ), sql @pytest.mark.django_db def test_generate_create_empty_kudu_table(): source = json.loads('''{"sourceType": "impala", "apiHelperType": "impala", "path": "", "inputFormat": "manual"}''') - destination = json.loads('''{"isTransactional": false, "isInsertOnly": false, "sourceType": "impala", ''' + destination = json.loads( + '''{"isTransactional": false, "isInsertOnly": false, "sourceType": "impala", ''' '''"name":"default.manual_empty_kudu","apiHelperType":"impala","description":"","outputFormat":"table",''' '''"columns":[{"operations":[],"comment":"","nested":[],"name":"acct_client","level":0,"keyType":"string","required":false,''' '''"precision":10,"keep":true,"isPartition":false,"length":100,"partitionValue":"","multiValued":false,"unique":false,''' @@ -936,7 +1520,8 @@ def test_generate_create_empty_kudu_table(): sql = SQLIndexer(user=request.user, fs=request.fs).create_table_from_a_file(source, destination).get_str() - assert '''CREATE TABLE `default`.`manual_empty_kudu` + assert ( + '''CREATE TABLE `default`.`manual_empty_kudu` ( `acct_client` string , `tran_amount` double , @@ -945,155 +1530,421 @@ def test_generate_create_empty_kudu_table(): `vrfcn_city_lat` double , `vrfcn_city_lon` double , PRIMARY KEY (acct_client) ) STORED AS kudu TBLPROPERTIES('transactional'='false') -;''' in sql, sql +;''' + in sql + ), sql @pytest.mark.django_db def test_create_ddl_with_nonascii(): - source = {u'kafkaFieldType': u'delimited', u'rdbmsUsername': u'', u'kafkaFieldTypes': u'', - u'selectedTableIndex': 0, u'rdbmsJdbcDriverNames': [], u'tableName': u'', - u'sample': [[u'Weihaiwei', u'\u5a01\u6d77\u536b\u5e02', u'Weihai', u'\u5a01\u6d77\u5e02', u'1949-11-01'], - [u'Xingshan', u'\u5174\u5c71\u5e02', u'Hegang', u'\u9e64\u5c97\u5e02', u'1950-03-23'], - [u"Xi'an", u'\u897f\u5b89\u5e02', u'Liaoyuan', u'\u8fbd\u6e90\u5e02', u'1952-04-03'], - [u'Nanzheng', u'\u5357\u90d1\u5e02', u'Hanzhong', u'\u6c49\u4e2d\u5e02', u'1953-10-24'], - [u'Dihua', u'\u8fea\u5316\u5e02', u'?r\xfcmqi', u'\u4e4c\u9c81\u6728\u9f50\u5e02', u'1953-11-20']], - u'rdbmsTypes': [], u'isFetchingDatabaseNames': False, u'rdbmsDbIsValid': False, u'query': u'', - u'channelSourceSelectedHosts': [], u'table': u'', u'rdbmsAllTablesSelected': False, - u'inputFormatsManual': [{u'name': u'Manually', u'value': u'manual'}], u'rdbmsPassword': u'', - u'isObjectStore': False, u'tables': [{u'name': u''}], u'streamUsername': u'', - u'kafkaSchemaManual': u'detect', u'connectorSelection': u'sfdc', u'namespace': - {u'status': u'CREATED', u'computes': - [{u'credentials': {}, u'type': u'direct', u'id': u'default', u'name': u'default'}], - u'id': u'default', u'name': u'default'}, u'rdbmsIsAllTables': False, u'rdbmsDatabaseNames': [], - u'hasStreamSelected': False, u'channelSourcePath': u'/var/log/hue-httpd/access_log', - u'channelSourceHosts': [], u'show': True, u'streamObjects': [], u'streamPassword': u'', - u'tablesNames': [], u'sampleCols': [{u'operations': [], u'comment': u'', u'unique': False, - u'name': u'Before', u'level': 0, u'keyType': u'string', - u'required': False, u'precision': 10, u'nested': [], - u'isPartition': False, u'length': 100, u'partitionValue': u'', - u'multiValued': False, u'keep': True, u'type': u'string', - u'showProperties': False, u'scale': 0}, - {u'operations': [], u'comment': u'', u'unique': False, - u'name': u'old_Chinese_name', u'level': 0, u'keyType': - u'string', u'required': False, u'precision': 10, u'nested': [], - u'isPartition': False, u'length': 100, u'partitionValue': u'', - u'multiValued': False, u'keep': True, u'type': u'string', - u'showProperties': False, u'scale': 0}, - {u'operations': [], u'comment': u'', u'unique': False, - u'name': u'After', u'level': 0, u'keyType': u'string', - u'required': False, u'precision': 10, u'nested': [], - u'isPartition': False, u'length': 100, u'partitionValue': u'', - u'multiValued': False, u'keep': True, u'type': u'string', - u'showProperties': False, u'scale': 0}, - {u'operations': [], u'comment': u'', u'unique': False, - u'name': u'new_Chinese_name', u'level': 0, u'keyType': - u'string', u'required': False, u'precision': 10, u'nested': [], - u'isPartition': False, u'length': 100, u'partitionValue': u'', - u'multiValued': False, u'keep': True, u'type': u'string', - u'showProperties': False, u'scale': 0}, - {u'operations': [], u'comment': u'', u'unique': False, - u'name': u'Renamed_date', u'level': 0, u'keyType': u'string', - u'required': False, u'precision': 10, u'nested': [], - u'isPartition': False, u'length': 100, u'partitionValue': u'', - u'multiValued': False, u'keep': True, u'type': u'string', - u'showProperties': False, u'scale': 0}], u'rdbmsDatabaseName': u'', - u'sourceType': u'hive', u'inputFormat': u'file', u'format': {u'status': 0, u'fieldSeparator': u',', - u'hasHeader': True, u'quoteChar': u'"', - u'recordSeparator': u'\\n', u'type': u'csv'}, - u'connectorList': [{u'name': u'Salesforce', u'value': u'sfdc'}], u'kafkaFieldDelimiter': u',', - u'rdbmsPort': u'', u'rdbmsTablesExclude': [], u'isFetchingDriverNames': False, u'publicStreams': - [{u'name': u'Kafka Topics', u'value': u'kafka'}, {u'name': u'Flume Agent', u'value': u'flume'}], - u'channelSourceTypes': [{u'name': u'Directory or File', u'value': u'directory'}, - {u'name': u'Program', u'value': u'exec'}, - {u'name': u'Syslogs', u'value': u'syslogs'}, - {u'name': u'HTTP', u'value': u'http'}], - u'databaseName': u'default', u'inputFormats': [{u'name': u'File', u'value': u'file'}, - {u'name': u'External Database', u'value': u'rdbms'}, - {u'name': u'Manually', u'value': u'manual'}], - u'path': u'/user/admin/renamed_chinese_cities_gb2312.csv', u'streamToken': u'', u'kafkaFieldNames': u'', - u'streamSelection': u'kafka', u'compute': {u'credentials': {}, u'type': u'direct', - u'id': u'default', u'name': u'default'}, - u'name': u'', u'kafkaFieldSchemaPath': u'', u'kafkaTopics': [], u'rdbmsJdbcDriver': u'', - u'rdbmsHostname': u'', u'isFetchingTableNames': False, u'rdbmsType': None, u'inputFormatsAll': - [{u'name': u'File', u'value': u'file'}, {u'name': u'External Database', u'value': u'rdbms'}, - {u'name': u'Manually', u'value': u'manual'}], u'rdbmsTableNames': [], - u'streamEndpointUrl': u'https://login.salesforce.com/services/Soap/u/42.0', u'kafkaSelectedTopics': u''} - destination = {u'isTransactionalVisible': True, u'KUDU_DEFAULT_PARTITION_COLUMN': - {u'int_val': 16, u'name': u'HASH', u'columns': [], u'range_partitions': - [{u'include_upper_val': u'<=', u'upper_val': 1, u'name': u'VALUES', u'include_lower_val': u'<=', - u'lower_val': 0, u'values': [{u'value': u''}]}]}, u'namespaces': - [{u'status': u'CREATED', u'computes': [{u'credentials': {}, u'type': u'direct', u'id': u'default', u'name': u'default'}], - u'id': u'default', u'name': u'default'}], u'isTargetChecking': False, 'ouputFormat': u'table', - u'tableName': u'renamed_chinese_cities_gb2312', u'outputFormatsList': - [{u'name': u'Table', u'value': u'table'}, {u'name': u'Search index', u'value': u'index'}, - {u'name': u'Database', u'value': u'database'}, {u'name': u'Folder', u'value': u'file'}, - {u'name': u'HBase Table', u'value': u'hbase'}], - u'fieldEditorPlaceHolder': u'Example: SELECT * FROM [object Promise]', u'indexerDefaultField': [], - u'fieldEditorValue': - u'SELECT Before,\n old_Chinese_name,\n After,\n new_Chinese_name,\n Renamed_date\n FROM [object Promise];', - u'customRegexp': u'', u'customLineDelimiter': u'\\n', u'isTargetExisting': False, - u'customEnclosedByDelimiter': u"'", u'indexerConfigSets': [], u'sourceType': u'hive', - u'useCustomDelimiters': False, u'apiHelperType': u'hive', u'numMappers': 1, - u'fieldEditorDatabase': u'default', u'namespace': {u'status': u'CREATED', u'computes': - [{u'credentials': {}, u'type': u'direct', u'id': u'default', u'name': u'default'}], u'id': u'default', u'name': u'default'}, - u'indexerPrimaryKeyObject': [], u'kuduPartitionColumns': [], u'rdbmsFileOutputFormats': - [{u'name': u'text', u'value': u'text'}, {u'name': u'sequence', u'value': u'sequence'}, - {u'name': u'avro', u'value': u'avro'}], u'outputFormats': [{u'name': u'Table', u'value': u'table'}, - {u'name': u'Search index', u'value': u'index'}], - u'fieldEditorEnabled': False, u'indexerDefaultFieldObject': [], - u'customMapDelimiter': u'', u'partitionColumns': [], u'rdbmsFileOutputFormat': u'text', - u'showProperties': False, u'isTransactional': True, u'useDefaultLocation': True, u'description': u'', - u'customFieldsDelimiter': u',', u'primaryKeyObjects': [], u'customFieldDelimiter': u',', - u'rdbmsSplitByColumn': [], u'existingTargetUrl': u'', u'channelSinkTypes': - [{u'name': u'This topic', u'value': u'kafka'}, {u'name': u'Solr', u'value': u'solr'}, - {u'name': u'HDFS', u'value': u'hdfs'}], u'defaultName': u'default.renamed_chinese_cities_gb2312', - u'isTransactionalUpdateEnabled': False, u'importData': True, u'isIceberg': False, u'useCopy': False, u'databaseName': - u'default', u'indexerRunJob': False, u'indexerReplicationFactor': 1, u'KUDU_DEFAULT_RANGE_PARTITION_COLUMN': - {u'include_upper_val': u'<=', u'upper_val': 1, u'name': u'VALUES', u'include_lower_val': u'<=', - u'lower_val': 0, u'values': [{u'value': u''}]}, u'primaryKeys': [], u'indexerConfigSet': u'', - u'sqoopJobLibPaths': [{u'path': u''}], u'outputFormat': u'table', - u'nonDefaultLocation': u'/user/admin/renamed_chinese_cities_gb2312.csv', - u'compute': {u'credentials': {}, u'type': u'direct', u'id': u'default', u'name': u'default'}, - u'name': u'default.renamed_chinese_cities_gb2312', u'tableFormat': u'text', u'isInsertOnly': True, - u'targetNamespaceId': u'default', u'bulkColumnNames': u'Before,old_Chinese_name,After,new_Chinese_name,Renamed_date', - u'columns': [{u'operations': [], u'comment': u'', u'unique': False, u'name': u'Before', u'level': 0, - u'keyType': u'string', u'required': False, u'precision': 10, u'nested': [], - u'isPartition': False, u'length': 100, u'partitionValue': u'', u'multiValued': False, - u'keep': True, u'type': u'string', u'showProperties': False, u'scale': 0}, - {u'operations': [], u'comment': u'', u'unique': False, u'name': u'old_Chinese_name', - u'level': 0, u'keyType': u'string', u'required': False, u'precision': 10, u'nested': [], - u'isPartition': False, u'length': 100, u'partitionValue': u'', u'multiValued': False, - u'keep': True, u'type': u'string', u'showProperties': False, u'scale': 0}, - {u'operations': [], u'comment': u'', u'unique': False, u'name': u'After', u'level': 0, - u'keyType': u'string', u'required': False, u'precision': 10, u'nested': [], - u'isPartition': False, u'length': 100, u'partitionValue': u'', u'multiValued': False, - u'keep': True, u'type': u'string', u'showProperties': False, u'scale': 0}, - {u'operations': [], u'comment': u'', u'unique': False, u'name': u'new_Chinese_name', - u'level': 0, u'keyType': u'string', u'required': False, u'precision': 10, u'nested': [], - u'isPartition': False, u'length': 100, u'partitionValue': u'', u'multiValued': False, - u'keep': True, u'type': u'string', u'showProperties': False, u'scale': 0}, - {u'operations': [], u'comment': u'', u'unique': False, u'name': u'Renamed_date', - u'level': 0, u'keyType': u'string', u'required': False, u'precision': 10, u'nested': [], - u'isPartition': False, u'length': 100, u'partitionValue': u'', u'multiValued': False, - u'keep': True, u'type': u'string', u'showProperties': False, u'scale': 0}], - u'hasHeader': True, u'indexerPrimaryKey': [], u'tableFormats': - [{u'name': u'Text', u'value': u'text'}, {u'name': u'Parquet', u'value': u'parquet'}, - {u'name': u'Csv', u'value': u'csv'}, {u'name': u'Avro', u'value': u'avro'}, - {u'name': u'Json', u'value': u'json'}, {u'name': u'Regexp', u'value': u'regexp'}, - {u'name': u'ORC', u'value': u'orc'}], u'customCollectionDelimiter': u'', u'indexerNumShards': 1, - u'useFieldEditor': False, u'indexerJobLibPath': u'/tmp/smart_indexer_lib'} - - file_encoding = u'gb2312' + source = { + 'kafkaFieldType': 'delimited', + 'rdbmsUsername': '', + 'kafkaFieldTypes': '', + 'selectedTableIndex': 0, + 'rdbmsJdbcDriverNames': [], + 'tableName': '', + 'sample': [ + ['Weihaiwei', '\u5a01\u6d77\u536b\u5e02', 'Weihai', '\u5a01\u6d77\u5e02', '1949-11-01'], + ['Xingshan', '\u5174\u5c71\u5e02', 'Hegang', '\u9e64\u5c97\u5e02', '1950-03-23'], + ["Xi'an", '\u897f\u5b89\u5e02', 'Liaoyuan', '\u8fbd\u6e90\u5e02', '1952-04-03'], + ['Nanzheng', '\u5357\u90d1\u5e02', 'Hanzhong', '\u6c49\u4e2d\u5e02', '1953-10-24'], + ['Dihua', '\u8fea\u5316\u5e02', '?r\xfcmqi', '\u4e4c\u9c81\u6728\u9f50\u5e02', '1953-11-20'], + ], + 'rdbmsTypes': [], + 'isFetchingDatabaseNames': False, + 'rdbmsDbIsValid': False, + 'query': '', + 'channelSourceSelectedHosts': [], + 'table': '', + 'rdbmsAllTablesSelected': False, + 'inputFormatsManual': [{'name': 'Manually', 'value': 'manual'}], + 'rdbmsPassword': '', + 'isObjectStore': False, + 'tables': [{'name': ''}], + 'streamUsername': '', + 'kafkaSchemaManual': 'detect', + 'connectorSelection': 'sfdc', + 'namespace': { + 'status': 'CREATED', + 'computes': [{'credentials': {}, 'type': 'direct', 'id': 'default', 'name': 'default'}], + 'id': 'default', + 'name': 'default', + }, + 'rdbmsIsAllTables': False, + 'rdbmsDatabaseNames': [], + 'hasStreamSelected': False, + 'channelSourcePath': '/var/log/hue-httpd/access_log', + 'channelSourceHosts': [], + 'show': True, + 'streamObjects': [], + 'streamPassword': '', + 'tablesNames': [], + 'sampleCols': [ + { + 'operations': [], + 'comment': '', + 'unique': False, + 'name': 'Before', + 'level': 0, + 'keyType': 'string', + 'required': False, + 'precision': 10, + 'nested': [], + 'isPartition': False, + 'length': 100, + 'partitionValue': '', + 'multiValued': False, + 'keep': True, + 'type': 'string', + 'showProperties': False, + 'scale': 0, + }, + { + 'operations': [], + 'comment': '', + 'unique': False, + 'name': 'old_Chinese_name', + 'level': 0, + 'keyType': 'string', + 'required': False, + 'precision': 10, + 'nested': [], + 'isPartition': False, + 'length': 100, + 'partitionValue': '', + 'multiValued': False, + 'keep': True, + 'type': 'string', + 'showProperties': False, + 'scale': 0, + }, + { + 'operations': [], + 'comment': '', + 'unique': False, + 'name': 'After', + 'level': 0, + 'keyType': 'string', + 'required': False, + 'precision': 10, + 'nested': [], + 'isPartition': False, + 'length': 100, + 'partitionValue': '', + 'multiValued': False, + 'keep': True, + 'type': 'string', + 'showProperties': False, + 'scale': 0, + }, + { + 'operations': [], + 'comment': '', + 'unique': False, + 'name': 'new_Chinese_name', + 'level': 0, + 'keyType': 'string', + 'required': False, + 'precision': 10, + 'nested': [], + 'isPartition': False, + 'length': 100, + 'partitionValue': '', + 'multiValued': False, + 'keep': True, + 'type': 'string', + 'showProperties': False, + 'scale': 0, + }, + { + 'operations': [], + 'comment': '', + 'unique': False, + 'name': 'Renamed_date', + 'level': 0, + 'keyType': 'string', + 'required': False, + 'precision': 10, + 'nested': [], + 'isPartition': False, + 'length': 100, + 'partitionValue': '', + 'multiValued': False, + 'keep': True, + 'type': 'string', + 'showProperties': False, + 'scale': 0, + }, + ], + 'rdbmsDatabaseName': '', + 'sourceType': 'hive', + 'inputFormat': 'file', + 'format': {'status': 0, 'fieldSeparator': ',', 'hasHeader': True, 'quoteChar': '"', 'recordSeparator': '\\n', 'type': 'csv'}, + 'connectorList': [{'name': 'Salesforce', 'value': 'sfdc'}], + 'kafkaFieldDelimiter': ',', + 'rdbmsPort': '', + 'rdbmsTablesExclude': [], + 'isFetchingDriverNames': False, + 'publicStreams': [{'name': 'Kafka Topics', 'value': 'kafka'}, {'name': 'Flume Agent', 'value': 'flume'}], + 'channelSourceTypes': [ + {'name': 'Directory or File', 'value': 'directory'}, + {'name': 'Program', 'value': 'exec'}, + {'name': 'Syslogs', 'value': 'syslogs'}, + {'name': 'HTTP', 'value': 'http'}, + ], + 'databaseName': 'default', + 'inputFormats': [ + {'name': 'File', 'value': 'file'}, + {'name': 'External Database', 'value': 'rdbms'}, + {'name': 'Manually', 'value': 'manual'}, + ], + 'path': '/user/admin/renamed_chinese_cities_gb2312.csv', + 'streamToken': '', + 'kafkaFieldNames': '', + 'streamSelection': 'kafka', + 'compute': {'credentials': {}, 'type': 'direct', 'id': 'default', 'name': 'default'}, + 'name': '', + 'kafkaFieldSchemaPath': '', + 'kafkaTopics': [], + 'rdbmsJdbcDriver': '', + 'rdbmsHostname': '', + 'isFetchingTableNames': False, + 'rdbmsType': None, + 'inputFormatsAll': [ + {'name': 'File', 'value': 'file'}, + {'name': 'External Database', 'value': 'rdbms'}, + {'name': 'Manually', 'value': 'manual'}, + ], + 'rdbmsTableNames': [], + 'streamEndpointUrl': 'https://login.salesforce.com/services/Soap/u/42.0', + 'kafkaSelectedTopics': '', + } + destination = { + 'isTransactionalVisible': True, + 'KUDU_DEFAULT_PARTITION_COLUMN': { + 'int_val': 16, + 'name': 'HASH', + 'columns': [], + 'range_partitions': [ + {'include_upper_val': '<=', 'upper_val': 1, 'name': 'VALUES', 'include_lower_val': '<=', 'lower_val': 0, 'values': [{'value': ''}]} + ], + }, + 'namespaces': [ + { + 'status': 'CREATED', + 'computes': [{'credentials': {}, 'type': 'direct', 'id': 'default', 'name': 'default'}], + 'id': 'default', + 'name': 'default', + } + ], + 'isTargetChecking': False, + 'ouputFormat': 'table', + 'tableName': 'renamed_chinese_cities_gb2312', + 'outputFormatsList': [ + {'name': 'Table', 'value': 'table'}, + {'name': 'Search index', 'value': 'index'}, + {'name': 'Database', 'value': 'database'}, + {'name': 'Folder', 'value': 'file'}, + {'name': 'HBase Table', 'value': 'hbase'}, + ], + 'fieldEditorPlaceHolder': 'Example: SELECT * FROM [object Promise]', + 'indexerDefaultField': [], + 'fieldEditorValue': 'SELECT Before,\n old_Chinese_name,\n After,\n new_Chinese_name,\n Renamed_date\n FROM [object Promise];', # noqa: E501 + 'customRegexp': '', + 'customLineDelimiter': '\\n', + 'isTargetExisting': False, + 'customEnclosedByDelimiter': "'", + 'indexerConfigSets': [], + 'sourceType': 'hive', + 'useCustomDelimiters': False, + 'apiHelperType': 'hive', + 'numMappers': 1, + 'fieldEditorDatabase': 'default', + 'namespace': { + 'status': 'CREATED', + 'computes': [{'credentials': {}, 'type': 'direct', 'id': 'default', 'name': 'default'}], + 'id': 'default', + 'name': 'default', + }, + 'indexerPrimaryKeyObject': [], + 'kuduPartitionColumns': [], + 'rdbmsFileOutputFormats': [ + {'name': 'text', 'value': 'text'}, + {'name': 'sequence', 'value': 'sequence'}, + {'name': 'avro', 'value': 'avro'}, + ], + 'outputFormats': [{'name': 'Table', 'value': 'table'}, {'name': 'Search index', 'value': 'index'}], + 'fieldEditorEnabled': False, + 'indexerDefaultFieldObject': [], + 'customMapDelimiter': '', + 'partitionColumns': [], + 'rdbmsFileOutputFormat': 'text', + 'showProperties': False, + 'isTransactional': True, + 'useDefaultLocation': True, + 'description': '', + 'customFieldsDelimiter': ',', + 'primaryKeyObjects': [], + 'customFieldDelimiter': ',', + 'rdbmsSplitByColumn': [], + 'existingTargetUrl': '', + 'channelSinkTypes': [{'name': 'This topic', 'value': 'kafka'}, {'name': 'Solr', 'value': 'solr'}, {'name': 'HDFS', 'value': 'hdfs'}], + 'defaultName': 'default.renamed_chinese_cities_gb2312', + 'isTransactionalUpdateEnabled': False, + 'importData': True, + 'isIceberg': False, + 'useCopy': False, + 'databaseName': 'default', + 'indexerRunJob': False, + 'indexerReplicationFactor': 1, + 'KUDU_DEFAULT_RANGE_PARTITION_COLUMN': { + 'include_upper_val': '<=', + 'upper_val': 1, + 'name': 'VALUES', + 'include_lower_val': '<=', + 'lower_val': 0, + 'values': [{'value': ''}], + }, + 'primaryKeys': [], + 'indexerConfigSet': '', + 'sqoopJobLibPaths': [{'path': ''}], + 'outputFormat': 'table', + 'nonDefaultLocation': '/user/admin/renamed_chinese_cities_gb2312.csv', + 'compute': {'credentials': {}, 'type': 'direct', 'id': 'default', 'name': 'default'}, + 'name': 'default.renamed_chinese_cities_gb2312', + 'tableFormat': 'text', + 'isInsertOnly': True, + 'targetNamespaceId': 'default', + 'bulkColumnNames': 'Before,old_Chinese_name,After,new_Chinese_name,Renamed_date', + 'columns': [ + { + 'operations': [], + 'comment': '', + 'unique': False, + 'name': 'Before', + 'level': 0, + 'keyType': 'string', + 'required': False, + 'precision': 10, + 'nested': [], + 'isPartition': False, + 'length': 100, + 'partitionValue': '', + 'multiValued': False, + 'keep': True, + 'type': 'string', + 'showProperties': False, + 'scale': 0, + }, + { + 'operations': [], + 'comment': '', + 'unique': False, + 'name': 'old_Chinese_name', + 'level': 0, + 'keyType': 'string', + 'required': False, + 'precision': 10, + 'nested': [], + 'isPartition': False, + 'length': 100, + 'partitionValue': '', + 'multiValued': False, + 'keep': True, + 'type': 'string', + 'showProperties': False, + 'scale': 0, + }, + { + 'operations': [], + 'comment': '', + 'unique': False, + 'name': 'After', + 'level': 0, + 'keyType': 'string', + 'required': False, + 'precision': 10, + 'nested': [], + 'isPartition': False, + 'length': 100, + 'partitionValue': '', + 'multiValued': False, + 'keep': True, + 'type': 'string', + 'showProperties': False, + 'scale': 0, + }, + { + 'operations': [], + 'comment': '', + 'unique': False, + 'name': 'new_Chinese_name', + 'level': 0, + 'keyType': 'string', + 'required': False, + 'precision': 10, + 'nested': [], + 'isPartition': False, + 'length': 100, + 'partitionValue': '', + 'multiValued': False, + 'keep': True, + 'type': 'string', + 'showProperties': False, + 'scale': 0, + }, + { + 'operations': [], + 'comment': '', + 'unique': False, + 'name': 'Renamed_date', + 'level': 0, + 'keyType': 'string', + 'required': False, + 'precision': 10, + 'nested': [], + 'isPartition': False, + 'length': 100, + 'partitionValue': '', + 'multiValued': False, + 'keep': True, + 'type': 'string', + 'showProperties': False, + 'scale': 0, + }, + ], + 'hasHeader': True, + 'indexerPrimaryKey': [], + 'tableFormats': [ + {'name': 'Text', 'value': 'text'}, + {'name': 'Parquet', 'value': 'parquet'}, + {'name': 'Csv', 'value': 'csv'}, + {'name': 'Avro', 'value': 'avro'}, + {'name': 'Json', 'value': 'json'}, + {'name': 'Regexp', 'value': 'regexp'}, + {'name': 'ORC', 'value': 'orc'}, + ], + 'customCollectionDelimiter': '', + 'indexerNumShards': 1, + 'useFieldEditor': False, + 'indexerJobLibPath': '/tmp/smart_indexer_lib', + } + + file_encoding = 'gb2312' path = { 'isDir': False, 'split': ('/user/admin', 'renamed_chinese_cities_gb2312.csv'), 'listdir': ['/user/admin/data'], - 'parent_path': '/user/admin/.scratchdir/03d184ad-dd11-4ae1-aace-378daaa094e5/renamed_chinese_cities_gb2312.csv/..' + 'parent_path': '/user/admin/.scratchdir/03d184ad-dd11-4ae1-aace-378daaa094e5/renamed_chinese_cities_gb2312.csv/..', } request = MockRequest(fs=MockFs(path=path)) - sql = SQLIndexer(user=request.user, fs=request.fs).create_table_from_a_file(source, destination, start_time=-1, - file_encoding=file_encoding).get_str() + sql = ( + SQLIndexer(user=request.user, fs=request.fs) + .create_table_from_a_file(source, destination, start_time=-1, file_encoding=file_encoding) + .get_str() + ) assert '''USE default;''' in sql, sql @@ -1111,8 +1962,9 @@ def test_create_ddl_with_nonascii(): ;''' assert statement in sql, sql - statement = "LOAD DATA INPATH '/user/admin/renamed_chinese_cities_gb2312.csv' " + \ - "INTO TABLE `default`.`hue__tmp_renamed_chinese_cities_gb2312`;" + statement = ( + "LOAD DATA INPATH '/user/admin/renamed_chinese_cities_gb2312.csv' " + "INTO TABLE `default`.`hue__tmp_renamed_chinese_cities_gb2312`;" + ) assert statement in sql, sql statement = '''CREATE TABLE `default`.`renamed_chinese_cities_gb2312` @@ -1125,8 +1977,7 @@ def test_create_ddl_with_nonascii(): statement = '''DROP TABLE IF EXISTS `default`.`hue__tmp_renamed_chinese_cities_gb2312`;''' assert statement in sql, sql - statement = '''ALTER TABLE `default`.`renamed_chinese_cities_gb2312` ''' + \ - '''SET serdeproperties ("serialization.encoding"="gb2312");''' + statement = '''ALTER TABLE `default`.`renamed_chinese_cities_gb2312` ''' + '''SET serdeproperties ("serialization.encoding"="gb2312");''' assert statement in sql, sql @@ -1136,12 +1987,12 @@ def test_create_ddl_with_abfs(): { 'default': { 'fs_defaultfs': 'abfs://my-data@yingstorage.dfs.core.windows.net', - 'webhdfs_url': 'https://yingstorage.dfs.core.windows.net' + 'webhdfs_url': 'https://yingstorage.dfs.core.windows.net', } } ) - form_data = {'path': u'abfs://my-data/test_data/cars.csv', 'partition_columns': [], 'overwrite': False} + form_data = {'path': 'abfs://my-data/test_data/cars.csv', 'partition_columns': [], 'overwrite': False} sql = '' request = MockRequest(fs=MockFs()) query_server_config = dbms.get_query_server_config(name='impala') @@ -1150,17 +2001,14 @@ def test_create_ddl_with_abfs(): sql = "\n\n%s;" % db.load_data('default', 'cars', form_data, None, generate_ddl_only=True) finally: finish() - assert u"\'abfs://my-data@yingstorage.dfs.core.windows.net/test_data/cars.csv\'" in sql + assert "'abfs://my-data@yingstorage.dfs.core.windows.net/test_data/cars.csv'" in sql @pytest.mark.django_db def test_create_table_from_local(): with patch('indexer.indexers.sql.get_interpreter') as get_interpreter: get_interpreter.return_value = {'Name': 'Hive', 'dialect': 'hive'} - source = { - 'path': '', - 'sourceType': 'hive' - } + source = {'path': '', 'sourceType': 'hive'} destination = { 'name': 'default.test1', 'columns': [ @@ -1180,7 +2028,7 @@ def test_create_table_from_local(): {'name': 'dist', 'type': 'bigint', 'keep': True}, ], 'indexerPrimaryKey': [], - 'sourceType': 'hive' + 'sourceType': 'hive', } sql = SQLIndexer(user=Mock(), fs=Mock()).create_table_from_local_file(source, destination).get_str() @@ -1208,11 +2056,7 @@ def test_create_table_from_local(): def test_create_table_from_local_mysql(): with patch('indexer.indexers.sql.get_interpreter') as get_interpreter: get_interpreter.return_value = {'Name': 'MySQL', 'dialect': 'mysql'} - source = { - 'path': BASE_DIR + '/apps/beeswax/data/tables/us_population.csv', - 'sourceType': 'mysql', - 'format': {'hasHeader': False} - } + source = {'path': BASE_DIR + '/apps/beeswax/data/tables/us_population.csv', 'sourceType': 'mysql', 'format': {'hasHeader': False}} destination = { 'name': 'default.test1', 'columns': [ @@ -1220,7 +2064,7 @@ def test_create_table_from_local_mysql(): {'name': 'field_2', 'type': 'string', 'keep': True}, {'name': 'field_3', 'type': 'bigint', 'keep': True}, ], - 'sourceType': 'mysql' + 'sourceType': 'mysql', } sql = SQLIndexer(user=Mock(), fs=Mock()).create_table_from_local_file(source, destination).get_str() @@ -1243,11 +2087,7 @@ def test_create_table_from_local_mysql(): def test_create_table_from_local_impala(): with patch('indexer.indexers.sql.get_interpreter') as get_interpreter: get_interpreter.return_value = {'Name': 'Impala', 'dialect': 'impala'} - source = { - 'path': BASE_DIR + '/apps/beeswax/data/tables/flights.csv', - 'sourceType': 'impala', - 'format': {'hasHeader': True} - } + source = {'path': BASE_DIR + '/apps/beeswax/data/tables/flights.csv', 'sourceType': 'impala', 'format': {'hasHeader': True}} destination = { 'name': 'default.test1', 'columns': [ @@ -1266,7 +2106,7 @@ def test_create_table_from_local_impala(): {'name': 'time', 'type': 'bigint', 'keep': True}, {'name': 'dist', 'type': 'bigint', 'keep': True}, ], - 'sourceType': 'impala' + 'sourceType': 'impala', } sql = SQLIndexer(user=Mock(), fs=Mock()).create_table_from_local_file(source, destination).get_str() @@ -1325,11 +2165,7 @@ def test_create_table_from_local_impala(): def test_create_table_only_header_file_local_impala(): with patch('indexer.indexers.sql.get_interpreter') as get_interpreter: get_interpreter.return_value = {'Name': 'Impala', 'dialect': 'impala'} - source = { - 'path': BASE_DIR + '/apps/beeswax/data/tables/onlyheader.csv', - 'sourceType': 'impala', - 'format': {'hasHeader': True} - } + source = {'path': BASE_DIR + '/apps/beeswax/data/tables/onlyheader.csv', 'sourceType': 'impala', 'format': {'hasHeader': True}} destination = { 'name': 'default.test1', 'columns': [ @@ -1348,7 +2184,7 @@ def test_create_table_only_header_file_local_impala(): {'name': 'time', 'type': 'bigint', 'keep': True}, {'name': 'dist', 'type': 'bigint', 'keep': True}, ], - 'sourceType': 'impala' + 'sourceType': 'impala', } sql = SQLIndexer(user=Mock(), fs=Mock()).create_table_from_local_file(source, destination).get_str() @@ -1397,10 +2233,7 @@ def test_create_table_only_header_file_local_impala(): def test_create_table_with_drop_column_from_local(): with patch('indexer.indexers.sql.get_interpreter') as get_interpreter: get_interpreter.return_value = {'Name': 'Hive', 'dialect': 'hive'} - source = { - 'path': '', - 'sourceType': 'hive' - } + source = {'path': '', 'sourceType': 'hive'} destination = { 'name': 'default.test1', 'columns': [ @@ -1411,7 +2244,7 @@ def test_create_table_with_drop_column_from_local(): {'name': 'arr', 'type': 'bigint', 'keep': False}, ], 'indexerPrimaryKey': [], - 'sourceType': 'hive' + 'sourceType': 'hive', } sql = SQLIndexer(user=Mock(), fs=Mock()).create_table_from_local_file(source, destination).get_str() diff --git a/desktop/libs/indexer/src/indexer/management/commands/indexer_setup.py b/desktop/libs/indexer/src/indexer/management/commands/indexer_setup.py index f2f47b97ad5..3447e39e4de 100644 --- a/desktop/libs/indexer/src/indexer/management/commands/indexer_setup.py +++ b/desktop/libs/indexer/src/indexer/management/commands/indexer_setup.py @@ -15,25 +15,18 @@ # See the License for the specific language governing permissions and # limitations under the License. -from builtins import next -from builtins import zip -import itertools -import logging import os import sys +import logging +import itertools +from builtins import next, zip from django.core.management.base import BaseCommand - -from useradmin.models import install_sample_user +from django.utils.translation import gettext as _ from indexer import utils from indexer.solr_client import SolrClient - -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - +from useradmin.models import install_sample_user LOG = logging.getLogger() @@ -42,6 +35,7 @@ class Command(BaseCommand): """ Install examples but do not overwrite them. """ + def handle(self, *args, **options): self.user = install_sample_user() self.client = SolrClient(self.user) @@ -50,70 +44,84 @@ def handle(self, *args, **options): if collection == 'twitter_demo': LOG.info("Installing twitter collection") - path = os.path.abspath(os.path.join(os.path.dirname(__file__), '../../../../../../../apps/search/examples/collections/solr_configs_twitter_demo/index_data.csv')) - self._setup_collection_from_csv({ + path = os.path.abspath( + os.path.join( + os.path.dirname(__file__), '../../../../../../../apps/search/examples/collections/solr_configs_twitter_demo/index_data.csv' + ) + ) + self._setup_collection_from_csv( + { 'name': 'twitter_demo', - 'fields': self._parse_fields(path, fieldtypes={ - 'source': 'string', - 'username': 'string', - }), + 'fields': self._parse_fields( + path, + fieldtypes={ + 'source': 'string', + 'username': 'string', + }, + ), 'uniqueKeyField': 'id', - 'df': 'text' + 'df': 'text', }, - path + path, ) LOG.info("Twitter collection successfully installed") if collection == 'yelp_demo': LOG.info("Installing yelp collection") - path = os.path.abspath(os.path.join(os.path.dirname(__file__), '../../../../../../../apps/search/examples/collections/solr_configs_yelp_demo/index_data.csv')) - self._setup_collection_from_csv({ + path = os.path.abspath( + os.path.join( + os.path.dirname(__file__), '../../../../../../../apps/search/examples/collections/solr_configs_yelp_demo/index_data.csv' + ) + ) + self._setup_collection_from_csv( + { 'name': 'yelp_demo', - 'fields': self._parse_fields(path, fieldtypes={ - 'name': 'string', - }), + 'fields': self._parse_fields( + path, + fieldtypes={ + 'name': 'string', + }, + ), 'uniqueKeyField': 'id', - 'df': 'text' + 'df': 'text', }, - path + path, ) LOG.info("Yelp collection successfully installed") if collection == 'log_analytics_demo': LOG.info("Installing logs collection") - path = os.path.abspath(os.path.join(os.path.dirname(__file__), '../../../../../../../apps/search/examples/collections/solr_configs_log_analytics_demo/index_data.csv')) - self._setup_collection_from_csv({ + path = os.path.abspath( + os.path.join( + os.path.dirname(__file__), '../../../../../../../apps/search/examples/collections/solr_configs_log_analytics_demo/index_data.csv' + ) + ) + self._setup_collection_from_csv( + { 'name': 'log_analytics_demo', - 'fields': self._parse_fields(path, fieldtypes={ - 'region_code': 'string', - 'referer': 'string', - 'user_agent': 'string' - }), + 'fields': self._parse_fields(path, fieldtypes={'region_code': 'string', 'referer': 'string', 'user_agent': 'string'}), 'uniqueKeyField': 'id', - 'df': 'record' + 'df': 'record', }, - path + path, ) LOG.info("Logs collection successfully installed") - def _setup_collection_from_csv(self, collection, path): if not self.client.exists(collection['name']): self.client.create_index( - name=collection['name'], - fields=collection['fields'], - unique_key_field=collection['uniqueKeyField'], - df=collection['df'] + name=collection['name'], fields=collection['fields'], unique_key_field=collection['uniqueKeyField'], df=collection['df'] ) with open(path) as fh: self.client.index(collection['name'], fh.read()) - def _parse_fields(self, path, separator=',', quote_character='"', fieldtypes={}): with open(path) as fh: field_generator = utils.field_values_from_separated_file(fh, separator, quote_character) row = next(field_generator) field_names = list(row.keys()) field_types = utils.get_field_types((list(row.values()) for row in itertools.chain([row], field_generator)), iterations=51) - return [{'name': field[0], 'type': field[0] in fieldtypes and fieldtypes[field[0]] or field[1]} for field in zip(field_names, field_types)] + return [ + {'name': field[0], 'type': field[0] in fieldtypes and fieldtypes[field[0]] or field[1]} for field in zip(field_names, field_types) + ] diff --git a/desktop/libs/indexer/src/indexer/solr_api.py b/desktop/libs/indexer/src/indexer/solr_api.py index c7109ccf009..4069524d4b7 100644 --- a/desktop/libs/indexer/src/indexer/solr_api.py +++ b/desktop/libs/indexer/src/indexer/solr_api.py @@ -15,23 +15,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +import sys import json import logging -import sys +from django.utils.translation import gettext as _ from django.views.decorators.http import require_GET, require_POST from desktop.lib.django_util import JsonResponse -from desktop.lib.i18n import smart_unicode -from libsolr.api import SolrApi - +from desktop.lib.i18n import smart_str from indexer.solr_client import SolrClient - -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - +from libsolr.api import SolrApi LOG = logging.getLogger() @@ -45,7 +39,7 @@ def decorator(*args, **kwargs): except Exception as e: LOG.exception('Error running %s' % func.__name__) response['status'] = -1 - response['message'] = smart_unicode(e) + response['message'] = smart_str(e) finally: if response: return JsonResponse(response) @@ -147,6 +141,7 @@ def delete_indexes(request): return JsonResponse(response) + @require_POST @api_error_handler def index(request): @@ -161,6 +156,7 @@ def index(request): return JsonResponse(response) + @require_POST @api_error_handler def create_alias(request): diff --git a/desktop/libs/indexer/src/indexer/solr_client.py b/desktop/libs/indexer/src/indexer/solr_client.py index 4a1324ec244..1cb982460eb 100644 --- a/desktop/libs/indexer/src/indexer/solr_client.py +++ b/desktop/libs/indexer/src/indexer/solr_client.py @@ -16,31 +16,26 @@ # See the License for the specific language governing permissions and # limitations under the License. -from builtins import object -import logging -import json import os -import shutil import sys +import json +import shutil +import logging +from builtins import object + +from django.utils.translation import gettext as _ from desktop.lib.exceptions_renderable import PopupException from desktop.lib.i18n import smart_str -from libsolr.api import SolrApi -from libzookeeper.models import ZookeeperClient - from indexer.conf import CORE_INSTANCE_DIR, get_solr_ensemble from indexer.utils import copy_configs - -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - +from libsolr.api import SolrApi +from libzookeeper.models import ZookeeperClient LOG = logging.getLogger() -MAX_UPLOAD_SIZE = 100 * 1024 * 1024 # 100 MB +MAX_UPLOAD_SIZE = 100 * 1024 * 1024 # 100 MB ALLOWED_FIELD_ATTRIBUTES = set(['name', 'type', 'indexed', 'stored']) FLAGS = [('I', 'indexed'), ('T', 'tokenized'), ('S', 'stored'), ('M', 'multivalued')] ZK_SOLR_CONFIG_NAMESPACE = 'configs' @@ -62,7 +57,6 @@ def __init__(self, user, api=None): self.user = user self.api = api if api is not None else SolrApi(user=self.user) - def get_indexes(self, include_cores=False): indexes = [] @@ -96,13 +90,14 @@ def get_indexes(self, include_cores=False): return sorted(indexes, key=lambda index: index['name']) - def create_index(self, name, fields, config_name=None, unique_key_field=None, df=None, shards=1, replication=1): if self.is_solr_cloud_mode(): if self.is_solr_six_or_more(): config_sets = self.list_configs() if not config_sets: - raise PopupException(_('Solr does not have any predefined (secure: %s) configSets: %s') % (self.is_sentry_protected(), self.list_configs())) + raise PopupException( + _('Solr does not have any predefined (secure: %s) configSets: %s') % (self.is_sentry_protected(), self.list_configs()) + ) if not config_name or config_name not in config_sets: config_name_target = 'managedTemplate' @@ -139,7 +134,7 @@ def create_index(self, name, fields, config_name=None, unique_key_field=None, df if self.is_solr_six_or_more(): self.api.update_config(name, { 'add-updateprocessor': { - "name" : "tolerant", + "name": "tolerant", "class": "solr.TolerantUpdateProcessorFactory", "maxErrors": "100" } @@ -150,19 +145,16 @@ def create_index(self, name, fields, config_name=None, unique_key_field=None, df else: self._create_non_solr_cloud_index(name, fields, unique_key_field, df) - def create_alias(self, name, collections): return self.api.create_alias(name, collections) - def index(self, name, data, content_type='csv', version=None, **kwargs): - """ + r""" e.g. Parameters: separator = ',', fieldnames = 'a,b,c', header=true, skip 'a,b', encapsulator=" escape=\, map, split, overwrite=true, rowid=id """ return self.api.update(name, data, content_type=content_type, version=version, **kwargs) - def exists(self, name): try: self.api.get_schema(name) @@ -171,7 +163,6 @@ def exists(self, name): LOG.info('Check if index %s existed failed: %s' % (name, e)) return False - def delete_index(self, name, keep_config=True): if not self.is_solr_cloud_mode(): raise PopupException(_('Cannot remove non-Solr cloud cores.')) @@ -193,34 +184,27 @@ def delete_index(self, name, keep_config=True): self.api.add_collection(name) raise PopupException(_('Error in deleting Solr configurations.'), detail=e) else: - if not 'Cannot unload non-existent core' in json.dumps(result): + if 'Cannot unload non-existent core' not in json.dumps(result): raise PopupException(_('Could not remove collection: %(message)s') % result) - def sample_index(self, collection, rows=100): return self.api.select(collection, rows=min(rows, 1000)) - def get_config(self, collection): return self.api.config(collection) - def list_configs(self): return self.api.configs() - def list_schema(self, index_name): return self.api.get_schema(index_name) - def delete_alias(self, name): return self.api.delete_alias(name) - def update_config(self, name, properties): return self.api.update_config(name, properties) - def is_solr_cloud_mode(self): global _IS_SOLR_CLOUD @@ -229,7 +213,6 @@ def is_solr_cloud_mode(self): return _IS_SOLR_CLOUD - def is_solr_six_or_more(self): global _IS_SOLR_6_OR_MORE @@ -238,7 +221,6 @@ def is_solr_six_or_more(self): return _IS_SOLR_6_OR_MORE - def is_solr_with_hdfs(self): global _IS_SOLR_WITH_HDFS @@ -247,7 +229,6 @@ def is_solr_with_hdfs(self): return _IS_SOLR_WITH_HDFS - def is_sentry_protected(self): global _IS_SENTRY_PROTECTED @@ -256,7 +237,6 @@ def is_sentry_protected(self): return _IS_SENTRY_PROTECTED - def get_zookeeper_host(self): global _ZOOKEEPER_HOST @@ -265,7 +245,6 @@ def get_zookeeper_host(self): return _ZOOKEEPER_HOST - # Deprecated def _create_cloud_config(self, name, fields, unique_key_field, df): with ZookeeperClient(hosts=self.get_zookeeper_host(), read_only=False) as zc: @@ -293,7 +272,6 @@ def _create_cloud_config(self, name, fields, unique_key_field, df): finally: shutil.rmtree(tmp_path) - # Deprecated def _create_non_solr_cloud_index(self, name, fields, unique_key_field, df): # Create instance directory locally. @@ -316,7 +294,6 @@ def _create_non_solr_cloud_index(self, name, fields, unique_key_field, df): finally: shutil.rmtree(instancedir) - def _fillup_properties(self): global _IS_SOLR_CLOUD global _IS_SOLR_6_OR_MORE @@ -340,14 +317,12 @@ def _fillup_properties(self): if '-Dsolr.authorization.sentry.site' in command_line_arg: _IS_SENTRY_PROTECTED = True - @staticmethod def _port_field_types(field): - if not field['type'].startswith('p'): # Check for automatically converting to new default Solr types + if not field['type'].startswith('p'): # Check for automatically converting to new default Solr types field['type'] = field['type'].replace('long', 'plong').replace('double', 'pdouble').replace('date', 'pdate') return field - @staticmethod def _reset_properties(): global _IS_SOLR_CLOUD @@ -358,7 +333,6 @@ def _reset_properties(): _IS_SOLR_CLOUD = _IS_SOLR_6_OR_MORE = _IS_SOLR_6_OR_MORE = _IS_SOLR_WITH_HDFS = _ZOOKEEPER_HOST = _IS_SENTRY_PROTECTED = None - # Used by morphline indexer def get_index_schema(self, index_name): try: diff --git a/desktop/libs/indexer/src/indexer/test_utils.py b/desktop/libs/indexer/src/indexer/test_utils.py index 32d7c711870..5176f0c1dab 100644 --- a/desktop/libs/indexer/src/indexer/test_utils.py +++ b/desktop/libs/indexer/src/indexer/test_utils.py @@ -16,21 +16,12 @@ # See the License for the specific language governing permissions and # limitations under the License. -from future import standard_library -standard_library.install_aliases() -import sys +from io import StringIO as string_io from desktop.lib.i18n import force_unicode - from indexer.utils import field_values_from_separated_file -if sys.version_info[0] > 2: - from io import StringIO as string_io -else: - from StringIO import StringIO as string_io - - def test_get_ensemble(): # Non ascii data = string_io('fieldA\nrel=""nofollow"">Twitter for Péché') @@ -43,9 +34,7 @@ def test_get_ensemble(): # Bad binary test_str = b'fieldA\naaa\x80\x02\x03' - if sys.version_info[0] > 2: - data = string_io(force_unicode(test_str, errors='ignore')) - else: - data = string_io(test_str) + data = string_io(force_unicode(test_str, errors='ignore')) + result = list(field_values_from_separated_file(data, delimiter='\t', quote_character='"')) assert u'aaa\x02\x03' == result[0]['fieldA'] diff --git a/desktop/libs/indexer/src/indexer/tests.py b/desktop/libs/indexer/src/indexer/tests.py index 579a1b158c2..c0831793c7a 100644 --- a/desktop/libs/indexer/src/indexer/tests.py +++ b/desktop/libs/indexer/src/indexer/tests.py @@ -15,28 +15,23 @@ # See the License for the specific language governing permissions and # limitations under the License. -from builtins import object -import json -import pytest import sys +import json +from builtins import object +from unittest.mock import Mock, patch +import pytest from django.urls import reverse -from hadoop.pseudo_hdfs4 import is_live_cluster, get_db_prefix -from libsolr import conf as libsolr_conf -from libzookeeper import conf as libzookeeper_conf +from desktop.lib.django_test_util import make_logged_in_client +from desktop.lib.test_utils import add_to_group, grant_access +from hadoop.pseudo_hdfs4 import get_db_prefix, is_live_cluster from indexer.conf import get_solr_ensemble from indexer.controller import CollectionManagerController +from libsolr import conf as libsolr_conf +from libzookeeper import conf as libzookeeper_conf from useradmin.models import User -from desktop.lib.django_test_util import make_logged_in_client -from desktop.lib.test_utils import add_to_group, grant_access - -if sys.version_info[0] > 2: - from unittest.mock import patch, Mock -else: - from mock import patch, Mock - def test_get_ensemble(): clears = [] @@ -75,7 +70,7 @@ def test_input_formats_no_fs(self): get_filesystem.return_value = None resp = self.client.get(reverse('indexer:importer')) - assert not b"{'value': 'file', 'name': 'Remote File'}" in resp.content + assert b"{'value': 'file', 'name': 'Remote File'}" not in resp.content class TestIndexerWithSolr(object): diff --git a/desktop/libs/indexer/src/indexer/urls.py b/desktop/libs/indexer/src/indexer/urls.py index ac3ce529357..76a4c6bf46c 100644 --- a/desktop/libs/indexer/src/indexer/urls.py +++ b/desktop/libs/indexer/src/indexer/urls.py @@ -17,18 +17,11 @@ import sys -from indexer import views as indexer_views -from indexer import solr_api as indexer_solr_api -from indexer import api3 as indexer_api3 -from indexer.indexers import rdbms as indexer_indexers_rdbms -from indexer import api as indexer_api +from django.urls import re_path +from indexer import api as indexer_api, api3 as indexer_api3, solr_api as indexer_solr_api, views as indexer_views from indexer.conf import ENABLE_NEW_INDEXER - -if sys.version_info[0] > 2: - from django.urls import re_path -else: - from django.conf.urls import url as re_path +from indexer.indexers import rdbms as indexer_indexers_rdbms urlpatterns = [ re_path(r'^install_examples$', indexer_views.install_examples, name='install_examples'), @@ -46,7 +39,7 @@ re_path(r'^$', indexer_views.indexes, name='indexes'), re_path(r'^indexes/?$', indexer_views.indexes, name='indexes'), re_path(r'^indexes/(?P[^/]+)/?$', indexer_views.indexes, name='indexes'), - re_path(r'^collections$', indexer_views.collections, name='collections'), # Old page + re_path(r'^collections$', indexer_views.collections, name='collections'), # Old page ] else: urlpatterns += [ @@ -78,12 +71,12 @@ re_path(r'^api/indexer/guess_format/?$', indexer_api3.guess_format, name='guess_format'), re_path(r'^api/indexer/guess_field_types/?$', indexer_api3.guess_field_types, name='guess_field_types'), re_path(r'^api/indexer/index/?$', indexer_api3.index, name='index'), - re_path(r'^api/importer/submit', indexer_api3.importer_submit, name='importer_submit'), re_path(r'^api/importer/save/?$', indexer_api3.save_pipeline, name='save_pipeline'), - re_path(r'^api/indexer/upload_local_file/?$', indexer_api3.upload_local_file, name='upload_local_file'), - re_path(r'^api/indexer/upload_local_file_drag_and_drop/?$', indexer_api3.upload_local_file_drag_and_drop, name='upload_local_file_drag_and_drop'), + re_path( + r'^api/indexer/upload_local_file_drag_and_drop/?$', indexer_api3.upload_local_file_drag_and_drop, name='upload_local_file_drag_and_drop' + ), ] urlpatterns += [ diff --git a/desktop/libs/indexer/src/indexer/utils.py b/desktop/libs/indexer/src/indexer/utils.py index 537d1ae6517..9aeab202eea 100644 --- a/desktop/libs/indexer/src/indexer/utils.py +++ b/desktop/libs/indexer/src/indexer/utils.py @@ -16,40 +16,27 @@ # See the License for the specific language governing permissions and # limitations under the License. -from future import standard_library -standard_library.install_aliases() -from builtins import next -from builtins import range -from builtins import object -import csv -import logging import os -import pytz import re +import csv +import uuid import shutil -import sys +import logging import tempfile -import uuid +from io import StringIO as string_io +import pytz from dateutil.parser import parse - from django.conf import settings +from django.utils.translation import gettext as _ from desktop.lib.i18n import force_unicode, smart_str - from indexer import conf -from indexer.models import DATE_FIELD_TYPES, TEXT_FIELD_TYPES, INTEGER_FIELD_TYPES, DECIMAL_FIELD_TYPES, BOOLEAN_FIELD_TYPES - -if sys.version_info[0] > 2: - from io import StringIO as string_io - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - from StringIO import StringIO as string_io +from indexer.models import BOOLEAN_FIELD_TYPES, DATE_FIELD_TYPES, DECIMAL_FIELD_TYPES, INTEGER_FIELD_TYPES, TEXT_FIELD_TYPES LOG = logging.getLogger() -TIMESTAMP_PATTERN = '\[([\w\d\s\-\/\:\+]*?)\]' -FIELD_XML_TEMPLATE = '' +TIMESTAMP_PATTERN = r'\[([\w\d\s\-\/\:\+]*?)\]' +FIELD_XML_TEMPLATE = '' # noqa: E501 DEFAULT_FIELD = { 'name': None, 'type': 'text', @@ -94,10 +81,14 @@ def __init__(self, xml): self.xml = xml def defaultField(self, df=None): - self.xml = force_unicode(force_unicode(self.xml).replace(u'text', u'%s' % force_unicode(df) if df is not None else '')) + self.xml = force_unicode( + force_unicode(self.xml).replace('text', '%s' % force_unicode(df) if df is not None else '') + ) -def copy_configs(fields, unique_key_field, df, solr_cloud_mode=True, is_solr_six_or_more=False, is_solr_hdfs_mode=True, is_sentry_protected=False): +def copy_configs( + fields, unique_key_field, df, solr_cloud_mode=True, is_solr_six_or_more=False, is_solr_hdfs_mode=True, is_sentry_protected=False +): # Create temporary copy of solr configs tmp_path = tempfile.mkdtemp() @@ -286,7 +277,8 @@ def field_values_from_separated_file(fh, delimiter, quote_character, fields=None remove_keys = None for row in reader: - row = dict([(force_unicode(k), force_unicode(v, errors='ignore')) for k, v in row.items()]) # Get rid of invalid binary chars and convert to unicode from DictReader + # Get rid of invalid binary chars and convert to unicode from DictReader + row = dict([(force_unicode(k), force_unicode(v, errors='ignore')) for k, v in row.items()]) # Remove keys that aren't in collection if remove_keys is None: @@ -333,7 +325,7 @@ def field_values_from_separated_file(fh, delimiter, quote_character, fields=None yield row -def field_values_from_log(fh, fields=[ {'name': 'message', 'type': 'text_general'}, {'name': 'tdate', 'type': 'timestamp'} ]): +def field_values_from_log(fh, fields=[{'name': 'message', 'type': 'text_general'}, {'name': 'tdate', 'type': 'timestamp'}]): """ Only timestamp and message """ @@ -345,12 +337,12 @@ def field_values_from_log(fh, fields=[ {'name': 'message', 'type': 'text_general else: try: timestamp_key = next(iter([field for field in fields if field['type'] in DATE_FIELD_TYPES]))['name'] - except: + except Exception: LOG.exception('failed to get timestamp key') timestamp_key = None try: message_key = next(iter([field for field in fields if field['type'] in TEXT_FIELD_TYPES]))['name'] - except: + except Exception: LOG.exception('failed to get message key') message_key = None @@ -370,7 +362,7 @@ def value_generator(buf): last_newline = content.rfind('\n') if last_newline > -1: buf = content[:last_newline] - content = content[last_newline+1:] + content = content[last_newline + 1:] for row in value_generator(buf): yield row prev = fh.read() diff --git a/desktop/libs/indexer/src/indexer/views.py b/desktop/libs/indexer/src/indexer/views.py index 32ffd42dbd6..ec63dfb2630 100644 --- a/desktop/libs/indexer/src/indexer/views.py +++ b/desktop/libs/indexer/src/indexer/views.py @@ -15,25 +15,20 @@ # See the License for the specific language governing permissions and # limitations under the License. -import logging -import json import sys +import json +import logging + +from django.utils.translation import gettext as _ from desktop.lib.django_util import JsonResponse, render from desktop.lib.exceptions_renderable import PopupException from desktop.models import get_cluster_config - -from indexer.solr_client import SolrClient from indexer.fields import FIELD_TYPES, Field from indexer.file_format import get_file_indexable_format_types -from indexer.management.commands import indexer_setup from indexer.indexers.morphline_operations import OPERATORS - -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - +from indexer.management.commands import indexer_setup +from indexer.solr_client import SolrClient LOG = logging.getLogger() diff --git a/desktop/libs/kafka/src/kafka/conf.py b/desktop/libs/kafka/src/kafka/conf.py index 339dce07872..9d5683c8efe 100644 --- a/desktop/libs/kafka/src/kafka/conf.py +++ b/desktop/libs/kafka/src/kafka/conf.py @@ -15,16 +15,12 @@ # See the License for the specific language governing permissions and # limitations under the License. -import logging import sys +import logging -from desktop.lib.conf import Config, ConfigSection, coerce_bool - -if sys.version_info[0] > 2: - from django.utils.translation import gettext_lazy as _t -else: - from django.utils.translation import ugettext_lazy as _t +from django.utils.translation import gettext_lazy as _t +from desktop.lib.conf import Config, ConfigSection, coerce_bool LOG = logging.getLogger() @@ -32,6 +28,7 @@ def has_kafka(): return KAFKA.IS_ENABLED.get() + def has_kafka_api(): return bool(KAFKA.API_URL.get()) diff --git a/desktop/libs/kafka/src/kafka/kafka_api.py b/desktop/libs/kafka/src/kafka/kafka_api.py index 80b3122e411..4b63577f86f 100644 --- a/desktop/libs/kafka/src/kafka/kafka_api.py +++ b/desktop/libs/kafka/src/kafka/kafka_api.py @@ -16,23 +16,18 @@ # See the License for the specific language governing permissions and # limitations under the License. +import sys import json import logging -import sys + +from django.utils.translation import gettext as _ from desktop.lib.django_util import JsonResponse from desktop.lib.i18n import force_unicode -from metadata.manager_client import ManagerApi -from notebook.models import _get_notebook_api - from kafka.conf import has_kafka_api from kafka.kafka_client import KafkaApi, KafkaApiException, SchemaRegistryApi - -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - +from metadata.manager_client import ManagerApi +from notebook.models import _get_notebook_api LOG = logging.getLogger() diff --git a/desktop/libs/kafka/src/kafka/kafka_client.py b/desktop/libs/kafka/src/kafka/kafka_client.py index e06f3d2ea4c..80c12011c73 100644 --- a/desktop/libs/kafka/src/kafka/kafka_client.py +++ b/desktop/libs/kafka/src/kafka/kafka_client.py @@ -16,26 +16,20 @@ # See the License for the specific language governing permissions and # limitations under the License. -from builtins import object -import logging -import json import sys - +import json +import logging +from builtins import object from subprocess import call -from desktop.lib.rest.http_client import RestException, HttpClient -from desktop.lib.rest.resource import Resource -from desktop.lib.i18n import smart_unicode +from django.utils.translation import gettext as _ +from desktop.lib.i18n import smart_str +from desktop.lib.rest.http_client import HttpClient, RestException +from desktop.lib.rest.resource import Resource from kafka.conf import KAFKA from libzookeeper.conf import zkensemble -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - - LOG = logging.getLogger() @@ -47,7 +41,7 @@ def __str__(self): return str(self.message) def __unicode__(self): - return smart_unicode(self.message) + return smart_str(self.message) class KafkaApi(object): @@ -62,7 +56,6 @@ def __init__(self, user=None, security_enabled=False, ssl_cert_ca_verify=False): self._client = HttpClient(self._api_url, logger=LOG) self._root = Resource(self._client) - def topics(self): try: response = self._root.get('topics') @@ -70,7 +63,6 @@ def topics(self): except RestException as e: raise KafkaApiException(e) - def create_topic(self, name, partitions=1, replication_factor=1): # Create/delete topics are not available in the REST API. # Here only works with hack if command is available on the Hue host. @@ -99,7 +91,6 @@ def __init__(self, user=None, security_enabled=False, ssl_cert_ca_verify=False): self._client = HttpClient(self._api_url, logger=LOG) self._root = Resource(self._client) - def subjects(self): try: response = self._root.get('subjects') diff --git a/desktop/libs/kafka/src/kafka/ksql_client.py b/desktop/libs/kafka/src/kafka/ksql_client.py index c29ae25eb3c..b8571ff3a38 100644 --- a/desktop/libs/kafka/src/kafka/ksql_client.py +++ b/desktop/libs/kafka/src/kafka/ksql_client.py @@ -16,21 +16,17 @@ # See the License for the specific language governing permissions and # limitations under the License. -from builtins import object -import logging -import json import sys +import json +import logging +from builtins import object from django.core.cache import cache +from django.utils.translation import gettext as _ -from desktop.lib.i18n import smart_unicode -from desktop.lib.rest.http_client import RestException from desktop.conf import has_channels - -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ +from desktop.lib.i18n import smart_str +from desktop.lib.rest.http_client import RestException if has_channels(): from notebook.consumer import _send_to_channel @@ -47,7 +43,7 @@ def __str__(self): return str(self.message) def __unicode__(self): - return smart_unicode(self.message) + return smart_str(self.message) class KSqlApi(object): @@ -72,7 +68,6 @@ def __init__(self, user=None, url=None, security_enabled=False, ssl_cert_ca_veri self.client = client = KSQLAPI(self._api_url) - def show_tables(self): try: response = self.client.ksql('SHOW TABLES') @@ -80,7 +75,6 @@ def show_tables(self): except Exception as e: raise KSqlApiException(e) - def show_topics(self): try: response = self.client.ksql('SHOW TOPICS') @@ -88,7 +82,6 @@ def show_topics(self): except Exception as e: raise KSqlApiException(e) - def show_streams(self): try: response = self.client.ksql('SHOW STREAMS') @@ -96,7 +89,6 @@ def show_streams(self): except Exception as e: raise KSqlApiException(e) - def get_columns(self, table): try: response = self.client.ksql('DESCRIBE %s' % table) @@ -104,13 +96,11 @@ def get_columns(self, table): except Exception as e: raise KSqlApiException(e) - def ksql(self, statement): response = self.client.ksql(statement) LOG.debug('ksqlDB response: %s' % response) return response[0] if response else {'@type': 'queries', 'queries': []} # INSERTs return empty currently - def query(self, statement, channel_name=None): data = [] metadata = [] @@ -188,11 +178,9 @@ def query(self, statement, channel_name=None): return data, metadata - def cancel(self, notebook, snippet): return {'status': -1} - def _decode_result(self, result): columns = [] data = [] diff --git a/desktop/libs/kafka/src/kafka/urls.py b/desktop/libs/kafka/src/kafka/urls.py index 1a50146e52f..4551a86ed67 100644 --- a/desktop/libs/kafka/src/kafka/urls.py +++ b/desktop/libs/kafka/src/kafka/urls.py @@ -17,12 +17,9 @@ import sys -from kafka import kafka_api as kafka_kafka_api +from django.urls import re_path -if sys.version_info[0] > 2: - from django.urls import re_path -else: - from django.conf.urls import url as re_path +from kafka import kafka_api as kafka_kafka_api urlpatterns = [ re_path(r'^api/topics/list/$', kafka_kafka_api.list_topics, name='list_topics'), diff --git a/desktop/libs/libanalyze/src/libanalyze/analyze_test.py b/desktop/libs/libanalyze/src/libanalyze/analyze_test.py index 27e974becd5..4eff4338172 100644 --- a/desktop/libs/libanalyze/src/libanalyze/analyze_test.py +++ b/desktop/libs/libanalyze/src/libanalyze/analyze_test.py @@ -14,20 +14,19 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from future import standard_library -standard_library.install_aliases() +import os +import time +import pstats +import logging +import cProfile from builtins import object -import cProfile, logging, os, pstats, sys, time -from libanalyze import analyze as a -from libanalyze import rules +from io import StringIO as string_io -if sys.version_info[0] > 2: - from io import StringIO as string_io -else: - from cStringIO import StringIO as string_io +from libanalyze import analyze as a, rules LOG = logging.getLogger() + def ordered(obj): if isinstance(obj, dict): return sorted((k, ordered(v)) for k, v in list(obj.items())) @@ -36,6 +35,7 @@ def ordered(obj): else: return obj + class AnalyzeTest(object): def setup_method(self): self.profile = a.analyze( @@ -51,16 +51,16 @@ def test_get_top_reasons_json(self): self.analyze.pre_process(self.profile) result = self.analyze.run(self.profile) assert len(result[0]['result']) == 67 - test = [{"result": [{"reason": [{"impact": 16798499570, "name": "Slow Aggregate", "fix": {"fixable": False}, "message": "Excess time (over expected time) spent in the aggregate; might be caused by complex group by", "unit": 5}, {"impact": 1841684634.666668, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "30", "wall_clock_time": 20683095270, "contribution_factor_str": "SQLOperator 30:AGGREGATION_NODE"}, {"reason": [{"impact": 16137425107, "name": "Slow Aggregate", "fix": {"fixable": False}, "message": "Excess time (over expected time) spent in the aggregate; might be caused by complex group by", "unit": 5}, {"impact": 1249201121.2222214, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "7", "wall_clock_time": 20022020807, "contribution_factor_str": "SQLOperator 07:AGGREGATION_NODE"}, {"reason": [{"impact": 15991669185, "name": "Slow Aggregate", "fix": {"fixable": False}, "message": "Excess time (over expected time) spent in the aggregate; might be caused by complex group by", "unit": 5}, {"impact": 1062368963.2222214, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "20", "wall_clock_time": 19681122971, "contribution_factor_str": "SQLOperator 20:AGGREGATION_NODE"}, {"reason": [{"impact": 538561025.333333, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "17", "wall_clock_time": 6966953012, "contribution_factor_str": "SQLOperator 17:HASH_JOIN_NODE"}, {"reason": [{"impact": 874553885.333333, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "29", "wall_clock_time": 6705756207, "contribution_factor_str": "SQLOperator 29:HASH_JOIN_NODE"}, {"reason": [{"impact": 496170372, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "27", "wall_clock_time": 6663793736, "contribution_factor_str": "SQLOperator 27:HASH_JOIN_NODE"}, {"reason": [{"impact": 467446848.55555534, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "4", "wall_clock_time": 6641201075, "contribution_factor_str": "SQLOperator 04:HASH_JOIN_NODE"}, {"reason": [{"impact": 503890745.8888893, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "6", "wall_clock_time": 6611505627, "contribution_factor_str": "SQLOperator 06:HASH_JOIN_NODE"}, {"reason": [{"impact": 634909229.333333, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "19", "wall_clock_time": 6401734479, "contribution_factor_str": "SQLOperator 19:HASH_JOIN_NODE"}, {"reason": [], "result_id": -1, "wall_clock_time": 2612825457, "contribution_factor_str": "RemoteFragmentsStarted -1:N/A"}, {"reason": [{"impact": 3672332795.524691, "name": "Slow HDFS Scan", "fix": {"fixable": False}, "message": "Predicates might be expensive (expectes speed 10m rows per sec per core)", "unit": 5}, {"impact": 1271091421, "name": "HDFS NN RPC", "fix": {"fixable": False}, "message": "This is the time waiting for HDFS NN RPC.", "unit": 5}, {"impact": 929179291.4444444, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}, {"impact": 45400713.888888806, "name": "Rows Read Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) in rows processed", "unit": 5}, {"impact": 0.006735614444444418, "name": "Bytes Read Skew", "fix": {"fixable": False}, "message": "Addition IO time cost by the skew (assuming 5 disks)", "unit": 5}], "result_id": "14", "wall_clock_time": 2320876241, "contribution_factor_str": "SQLOperator 14:HDFS_SCAN_NODE"}, {"reason": [{"impact": 165377262.44444442, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "5", "wall_clock_time": 2258327578, "contribution_factor_str": "SQLOperator 05:HASH_JOIN_NODE"}, {"reason": [{"impact": 174711179.44444442, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "28", "wall_clock_time": 2231494483, "contribution_factor_str": "SQLOperator 28:HASH_JOIN_NODE"}, {"reason": [{"impact": 4598206116.796875, "name": "Slow HDFS Scan", "fix": {"fixable": False}, "message": "Predicates might be expensive (expectes speed 10m rows per sec per core)", "unit": 5}, {"impact": 1261948355, "name": "HDFS NN RPC", "fix": {"fixable": False}, "message": "This is the time waiting for HDFS NN RPC.", "unit": 5}, {"impact": 836163684.8888888, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}, {"impact": 49606693.93939389, "name": "Lack of scanner thread parallelism", "fix": {"fixable": False}, "message": "Speed can be improved by that much if there's 8 scanner threads", "unit": 5}, {"impact": 45400713.888888806, "name": "Rows Read Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) in rows processed", "unit": 5}, {"impact": 0.006735614444444418, "name": "Bytes Read Skew", "fix": {"fixable": False}, "message": "Addition IO time cost by the skew (assuming 5 disks)", "unit": 5}], "result_id": "1", "wall_clock_time": 2201407589, "contribution_factor_str": "SQLOperator 01:HDFS_SCAN_NODE"}, {"reason": [{"impact": 4407935855.252918, "name": "Slow HDFS Scan", "fix": {"fixable": False}, "message": "Predicates might be expensive (expectes speed 10m rows per sec per core)", "unit": 5}, {"impact": 1767671213, "name": "HDFS NN RPC", "fix": {"fixable": False}, "message": "This is the time waiting for HDFS NN RPC.", "unit": 5}, {"impact": 722860231, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}, {"impact": 45400713.888888806, "name": "Rows Read Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) in rows processed", "unit": 5}, {"impact": 0.006735614444444418, "name": "Bytes Read Skew", "fix": {"fixable": False}, "message": "Addition IO time cost by the skew (assuming 5 disks)", "unit": 5}], "result_id": "24", "wall_clock_time": 2193866884, "contribution_factor_str": "SQLOperator 24:HDFS_SCAN_NODE"}, {"reason": [{"impact": 96606459.11111116, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "18", "wall_clock_time": 2180207014, "contribution_factor_str": "SQLOperator 18:HASH_JOIN_NODE"}, {"reason": [{"impact": 1111759224.8888888, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "F04 0", "wall_clock_time": 1250729128, "contribution_factor_str": "SQLOperator F04 0:CodeGen"}, {"reason": [{"impact": 193415667.33333337, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "F04 1", "wall_clock_time": 1201795461, "contribution_factor_str": "SQLOperator F04 1:CodeGen"}, {"reason": [{"impact": 92531774.55555558, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "F00 0", "wall_clock_time": 1062080747, "contribution_factor_str": "SQLOperator F00 0:CodeGen"}, {"reason": [{"impact": 118700210.11111116, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "F05 0", "wall_clock_time": 1009980856, "contribution_factor_str": "SQLOperator F05 0:CodeGen"}, {"reason": [{"impact": 132909682.88888884, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "F09 0", "wall_clock_time": 950194410, "contribution_factor_str": "SQLOperator F09 0:CodeGen"}, {"reason": [{"impact": 95305427.33333337, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "F10 0", "wall_clock_time": 878960263, "contribution_factor_str": "SQLOperator F10 0:CodeGen"}, {"reason": [{"impact": 46199805, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "F14 0", "wall_clock_time": 769058113, "contribution_factor_str": "SQLOperator F14 0:CodeGen"}, {"reason": [], "result_id": -1, "wall_clock_time": 613452579, "contribution_factor_str": "PlanningTime -1:N/A"}, {"reason": [{"impact": 306772810, "name": "Slow Aggregate", "fix": {"fixable": False}, "message": "Excess time (over expected time) spent in the aggregate; might be caused by complex group by", "unit": 5}, {"impact": 42519756.55555558, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "45", "wall_clock_time": 319264610, "contribution_factor_str": "SQLOperator 45:AGGREGATION_NODE"}, {"reason": [{"impact": 297637309, "name": "Slow Aggregate", "fix": {"fixable": False}, "message": "Excess time (over expected time) spent in the aggregate; might be caused by complex group by", "unit": 5}, {"impact": 29017600.555555582, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "51", "wall_clock_time": 309567409, "contribution_factor_str": "SQLOperator 51:AGGREGATION_NODE"}, {"reason": [], "result_id": -1, "wall_clock_time": 107247619, "contribution_factor_str": "ClientFetchWaitTimer -1:N/A"}, {"reason": [{"impact": 97484030, "name": "Slow Sorting", "fix": {"fixable": False}, "message": "Excess time (over expected time) spent in the sort; might be caused by too many sorting column", "unit": 5}, {"impact": 36347752, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "31", "wall_clock_time": 98861130, "contribution_factor_str": "SQLOperator 31:SORT_NODE"}, {"reason": [{"impact": 67982884, "name": "Slow Aggregate", "fix": {"fixable": False}, "message": "Excess time (over expected time) spent in the aggregate; might be caused by complex group by", "unit": 5}, {"impact": 7664156.555555552, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "40", "wall_clock_time": 80474684, "contribution_factor_str": "SQLOperator 40:AGGREGATION_NODE"}, {"reason": [{"impact": 32130961.111111112, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "12", "wall_clock_time": 71088072, "contribution_factor_str": "SQLOperator 12:SELECT_NODE"}, {"reason": [{"impact": 58733676, "name": "Slow Sorting", "fix": {"fixable": False}, "message": "Excess time (over expected time) spent in the sort; might be caused by too many sorting column", "unit": 5}, {"impact": 5766554.333333336, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "8", "wall_clock_time": 60080276, "contribution_factor_str": "SQLOperator 08:SORT_NODE"}, {"reason": [{"impact": 57966057, "name": "Slow Sorting", "fix": {"fixable": False}, "message": "Excess time (over expected time) spent in the sort; might be caused by too many sorting column", "unit": 5}, {"impact": 4243951.444444448, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "21", "wall_clock_time": 59294857, "contribution_factor_str": "SQLOperator 21:SORT_NODE"}, {"reason": [{"impact": 47950535, "name": "Lack of scanner thread parallelism", "fix": {"fixable": False}, "message": "Speed can be improved by that much if there's 8 scanner threads", "unit": 5}, {"impact": 37688100, "name": "Slow HDFS Scan", "fix": {"fixable": False}, "message": "Predicates might be expensive (expectes speed 10m rows per sec per core)", "unit": 5}], "result_id": "15", "wall_clock_time": 47950535, "contribution_factor_str": "SQLOperator 15:HDFS_SCAN_NODE"}, {"reason": [{"impact": 17818123.666666668, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "52", "wall_clock_time": 44603227, "contribution_factor_str": "SQLOperator 52:EXCHANGE_NODE"}, {"reason": [{"impact": 9621600, "name": "Wrong join strategy", "fix": {"fixable": False}, "message": "RHS 121390; LHS 105174", "unit": 5}, {"impact": 4113826, "name": "Slow Hash Join", "fix": {"fixable": False}, "message": "Excess time (over expected time) spent in the hash join", "unit": 5}, {"impact": 2924865.666666664, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "34", "wall_clock_time": 43779812, "contribution_factor_str": "SQLOperator 34:HASH_JOIN_NODE"}, {"reason": [{"impact": 14784147, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "32", "wall_clock_time": 42111797, "contribution_factor_str": "SQLOperator 32:ANALYTIC_EVAL_NODE"}, {"reason": [{"impact": 39518015, "name": "Lack of scanner thread parallelism", "fix": {"fixable": False}, "message": "Speed can be improved by that much if there's 8 scanner threads", "unit": 5}, {"impact": 29689100, "name": "Slow HDFS Scan", "fix": {"fixable": False}, "message": "Predicates might be expensive (expectes speed 10m rows per sec per core)", "unit": 5}], "result_id": "2", "wall_clock_time": 39518015, "contribution_factor_str": "SQLOperator 02:HDFS_SCAN_NODE"}, {"reason": [{"impact": 20851584.222222224, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "46", "wall_clock_time": 38647270, "contribution_factor_str": "SQLOperator 46:EXCHANGE_NODE"}, {"reason": [{"impact": 8035800, "name": "Wrong join strategy", "fix": {"fixable": False}, "message": "RHS 105576; LHS 121383", "unit": 5}, {"impact": 3816722, "name": "Slow Hash Join", "fix": {"fixable": False}, "message": "Excess time (over expected time) spent in the hash join", "unit": 5}, {"impact": 1904130.4444444478, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "33", "wall_clock_time": 37364443, "contribution_factor_str": "SQLOperator 33:HASH_JOIN_NODE"}, {"reason": [{"impact": 31174821, "name": "Slow Sorting", "fix": {"fixable": False}, "message": "Excess time (over expected time) spent in the sort; might be caused by too many sorting column", "unit": 5}, {"impact": 1894590, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "10", "wall_clock_time": 32551921, "contribution_factor_str": "SQLOperator 10:SORT_NODE"}, {"reason": [{"impact": 26659473.75, "name": "Lack of scanner thread parallelism", "fix": {"fixable": False}, "message": "Speed can be improved by that much if there's 8 scanner threads", "unit": 5}, {"impact": 20690100, "name": "Slow HDFS Scan", "fix": {"fixable": False}, "message": "Predicates might be expensive (expectes speed 10m rows per sec per core)", "unit": 5}], "result_id": "25", "wall_clock_time": 30467970, "contribution_factor_str": "SQLOperator 25:HDFS_SCAN_NODE"}, {"reason": [{"impact": 7084883.444444444, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "11", "wall_clock_time": 28336314, "contribution_factor_str": "SQLOperator 11:ANALYTIC_EVAL_NODE"}, {"reason": [{"impact": 2135688.222222224, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "9", "wall_clock_time": 22614443, "contribution_factor_str": "SQLOperator 09:ANALYTIC_EVAL_NODE"}, {"reason": [{"impact": 1150084.666666668, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "22", "wall_clock_time": 22144125, "contribution_factor_str": "SQLOperator 22:ANALYTIC_EVAL_NODE"}, {"reason": [{"impact": 2047632, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "39", "wall_clock_time": 11957699, "contribution_factor_str": "SQLOperator 39:EXCHANGE_NODE"}, {"reason": [{"impact": 1332451, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "44", "wall_clock_time": 11506235, "contribution_factor_str": "SQLOperator 44:EXCHANGE_NODE"}, {"reason": [{"impact": 728588, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "50", "wall_clock_time": 10172630, "contribution_factor_str": "SQLOperator 50:EXCHANGE_NODE"}, {"reason": [{"impact": 3334413, "name": "Lack of scanner thread parallelism", "fix": {"fixable": False}, "message": "Speed can be improved by that much if there's 8 scanner threads", "unit": 5}, {"impact": 1199000, "name": "Slow HDFS Scan", "fix": {"fixable": False}, "message": "Predicates might be expensive (expectes speed 10m rows per sec per core)", "unit": 5}], "result_id": "0", "wall_clock_time": 3334413, "contribution_factor_str": "SQLOperator 00:HDFS_SCAN_NODE"}, {"reason": [], "result_id": "53", "wall_clock_time": 3082111, "contribution_factor_str": "SQLOperator 53:EXCHANGE_NODE"}, {"reason": [{"impact": 2594847, "name": "Lack of scanner thread parallelism", "fix": {"fixable": False}, "message": "Speed can be improved by that much if there's 8 scanner threads", "unit": 5}, {"impact": 1199000, "name": "Slow HDFS Scan", "fix": {"fixable": False}, "message": "Predicates might be expensive (expectes speed 10m rows per sec per core)", "unit": 5}], "result_id": "23", "wall_clock_time": 2594847, "contribution_factor_str": "SQLOperator 23:HDFS_SCAN_NODE"}, {"reason": [{"impact": 2452312, "name": "Lack of scanner thread parallelism", "fix": {"fixable": False}, "message": "Speed can be improved by that much if there's 8 scanner threads", "unit": 5}, {"impact": 2198000, "name": "Slow HDFS Scan", "fix": {"fixable": False}, "message": "Predicates might be expensive (expectes speed 10m rows per sec per core)", "unit": 5}], "result_id": "13", "wall_clock_time": 2452312, "contribution_factor_str": "SQLOperator 13:HDFS_SCAN_NODE"}, {"reason": [{"impact": 1706125, "name": "Lack of scanner thread parallelism", "fix": {"fixable": False}, "message": "Speed can be improved by that much if there's 8 scanner threads", "unit": 5}, {"impact": 287883, "name": "HDFS NN RPC", "fix": {"fixable": False}, "message": "This is the time waiting for HDFS NN RPC.", "unit": 5}], "result_id": "16", "wall_clock_time": 1706125, "contribution_factor_str": "SQLOperator 16:HDFS_SCAN_NODE"}, {"reason": [{"impact": 1619889, "name": "Lack of scanner thread parallelism", "fix": {"fixable": False}, "message": "Speed can be improved by that much if there's 8 scanner threads", "unit": 5}, {"impact": 601555, "name": "HDFS NN RPC", "fix": {"fixable": False}, "message": "This is the time waiting for HDFS NN RPC.", "unit": 5}], "result_id": "3", "wall_clock_time": 1619889, "contribution_factor_str": "SQLOperator 03:HDFS_SCAN_NODE"}, {"reason": [{"impact": 1385497, "name": "Lack of scanner thread parallelism", "fix": {"fixable": False}, "message": "Speed can be improved by that much if there's 8 scanner threads", "unit": 5}, {"impact": 181359, "name": "HDFS NN RPC", "fix": {"fixable": False}, "message": "This is the time waiting for HDFS NN RPC.", "unit": 5}], "result_id": "26", "wall_clock_time": 1385497, "contribution_factor_str": "SQLOperator 26:HDFS_SCAN_NODE"}, {"reason": [{"impact": 559177.1111111111, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "43", "wall_clock_time": 1378341, "contribution_factor_str": "SQLOperator 43:EXCHANGE_NODE"}, {"reason": [{"impact": 362490.3333333334, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "38", "wall_clock_time": 1291643, "contribution_factor_str": "SQLOperator 38:EXCHANGE_NODE"}, {"reason": [{"impact": 265681, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "49", "wall_clock_time": 1177394, "contribution_factor_str": "SQLOperator 49:EXCHANGE_NODE"}, {"reason": [], "result_id": -1, "wall_clock_time": 775849, "contribution_factor_str": "RowMaterializationTimer -1:N/A"}, {"reason": [{"impact": 235417.66666666666, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "48", "wall_clock_time": 279531, "contribution_factor_str": "SQLOperator 48:EXCHANGE_NODE"}, {"reason": [{"impact": 10539.11111111111, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "36", "wall_clock_time": 31603, "contribution_factor_str": "SQLOperator 36:EXCHANGE_NODE"}, {"reason": [{"impact": 8916.666666666668, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "47", "wall_clock_time": 29729, "contribution_factor_str": "SQLOperator 47:EXCHANGE_NODE"}, {"reason": [{"impact": 8002.1111111111095, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "41", "wall_clock_time": 29716, "contribution_factor_str": "SQLOperator 41:EXCHANGE_NODE"}, {"reason": [{"impact": 1725.1111111111113, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "42", "wall_clock_time": 16432, "contribution_factor_str": "SQLOperator 42:EXCHANGE_NODE"}, {"reason": [{"impact": 791.1111111111113, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "37", "wall_clock_time": 14808, "contribution_factor_str": "SQLOperator 37:EXCHANGE_NODE"}, {"reason": [], "result_id": "35", "wall_clock_time": 0, "contribution_factor_str": "SQLOperator 35:SORT_NODE"}, {"reason": [{"impact": 1111759224.8888888, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "F04 0", "wall_clock_time": 0, "contribution_factor_str": "SQLOperator F04 0:BlockMgr"}, {"reason": [], "result_id": "F15 0", "wall_clock_time": 0, "contribution_factor_str": "SQLOperator F15 0:BlockMgr"}], "rule": {"message": "Top contributing factors and its reasons", "prio": 1, "label": "Top Down Analysis"}, "template": "alan-tpl"}] + test = [{"result": [{"reason": [{"impact": 16798499570, "name": "Slow Aggregate", "fix": {"fixable": False}, "message": "Excess time (over expected time) spent in the aggregate; might be caused by complex group by", "unit": 5}, {"impact": 1841684634.666668, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "30", "wall_clock_time": 20683095270, "contribution_factor_str": "SQLOperator 30:AGGREGATION_NODE"}, {"reason": [{"impact": 16137425107, "name": "Slow Aggregate", "fix": {"fixable": False}, "message": "Excess time (over expected time) spent in the aggregate; might be caused by complex group by", "unit": 5}, {"impact": 1249201121.2222214, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "7", "wall_clock_time": 20022020807, "contribution_factor_str": "SQLOperator 07:AGGREGATION_NODE"}, {"reason": [{"impact": 15991669185, "name": "Slow Aggregate", "fix": {"fixable": False}, "message": "Excess time (over expected time) spent in the aggregate; might be caused by complex group by", "unit": 5}, {"impact": 1062368963.2222214, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "20", "wall_clock_time": 19681122971, "contribution_factor_str": "SQLOperator 20:AGGREGATION_NODE"}, {"reason": [{"impact": 538561025.333333, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "17", "wall_clock_time": 6966953012, "contribution_factor_str": "SQLOperator 17:HASH_JOIN_NODE"}, {"reason": [{"impact": 874553885.333333, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "29", "wall_clock_time": 6705756207, "contribution_factor_str": "SQLOperator 29:HASH_JOIN_NODE"}, {"reason": [{"impact": 496170372, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "27", "wall_clock_time": 6663793736, "contribution_factor_str": "SQLOperator 27:HASH_JOIN_NODE"}, {"reason": [{"impact": 467446848.55555534, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "4", "wall_clock_time": 6641201075, "contribution_factor_str": "SQLOperator 04:HASH_JOIN_NODE"}, {"reason": [{"impact": 503890745.8888893, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "6", "wall_clock_time": 6611505627, "contribution_factor_str": "SQLOperator 06:HASH_JOIN_NODE"}, {"reason": [{"impact": 634909229.333333, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "19", "wall_clock_time": 6401734479, "contribution_factor_str": "SQLOperator 19:HASH_JOIN_NODE"}, {"reason": [], "result_id": -1, "wall_clock_time": 2612825457, "contribution_factor_str": "RemoteFragmentsStarted -1:N/A"}, {"reason": [{"impact": 3672332795.524691, "name": "Slow HDFS Scan", "fix": {"fixable": False}, "message": "Predicates might be expensive (expectes speed 10m rows per sec per core)", "unit": 5}, {"impact": 1271091421, "name": "HDFS NN RPC", "fix": {"fixable": False}, "message": "This is the time waiting for HDFS NN RPC.", "unit": 5}, {"impact": 929179291.4444444, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}, {"impact": 45400713.888888806, "name": "Rows Read Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) in rows processed", "unit": 5}, {"impact": 0.006735614444444418, "name": "Bytes Read Skew", "fix": {"fixable": False}, "message": "Addition IO time cost by the skew (assuming 5 disks)", "unit": 5}], "result_id": "14", "wall_clock_time": 2320876241, "contribution_factor_str": "SQLOperator 14:HDFS_SCAN_NODE"}, {"reason": [{"impact": 165377262.44444442, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "5", "wall_clock_time": 2258327578, "contribution_factor_str": "SQLOperator 05:HASH_JOIN_NODE"}, {"reason": [{"impact": 174711179.44444442, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "28", "wall_clock_time": 2231494483, "contribution_factor_str": "SQLOperator 28:HASH_JOIN_NODE"}, {"reason": [{"impact": 4598206116.796875, "name": "Slow HDFS Scan", "fix": {"fixable": False}, "message": "Predicates might be expensive (expectes speed 10m rows per sec per core)", "unit": 5}, {"impact": 1261948355, "name": "HDFS NN RPC", "fix": {"fixable": False}, "message": "This is the time waiting for HDFS NN RPC.", "unit": 5}, {"impact": 836163684.8888888, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}, {"impact": 49606693.93939389, "name": "Lack of scanner thread parallelism", "fix": {"fixable": False}, "message": "Speed can be improved by that much if there's 8 scanner threads", "unit": 5}, {"impact": 45400713.888888806, "name": "Rows Read Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) in rows processed", "unit": 5}, {"impact": 0.006735614444444418, "name": "Bytes Read Skew", "fix": {"fixable": False}, "message": "Addition IO time cost by the skew (assuming 5 disks)", "unit": 5}], "result_id": "1", "wall_clock_time": 2201407589, "contribution_factor_str": "SQLOperator 01:HDFS_SCAN_NODE"}, {"reason": [{"impact": 4407935855.252918, "name": "Slow HDFS Scan", "fix": {"fixable": False}, "message": "Predicates might be expensive (expectes speed 10m rows per sec per core)", "unit": 5}, {"impact": 1767671213, "name": "HDFS NN RPC", "fix": {"fixable": False}, "message": "This is the time waiting for HDFS NN RPC.", "unit": 5}, {"impact": 722860231, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}, {"impact": 45400713.888888806, "name": "Rows Read Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) in rows processed", "unit": 5}, {"impact": 0.006735614444444418, "name": "Bytes Read Skew", "fix": {"fixable": False}, "message": "Addition IO time cost by the skew (assuming 5 disks)", "unit": 5}], "result_id": "24", "wall_clock_time": 2193866884, "contribution_factor_str": "SQLOperator 24:HDFS_SCAN_NODE"}, {"reason": [{"impact": 96606459.11111116, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "18", "wall_clock_time": 2180207014, "contribution_factor_str": "SQLOperator 18:HASH_JOIN_NODE"}, {"reason": [{"impact": 1111759224.8888888, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "F04 0", "wall_clock_time": 1250729128, "contribution_factor_str": "SQLOperator F04 0:CodeGen"}, {"reason": [{"impact": 193415667.33333337, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "F04 1", "wall_clock_time": 1201795461, "contribution_factor_str": "SQLOperator F04 1:CodeGen"}, {"reason": [{"impact": 92531774.55555558, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "F00 0", "wall_clock_time": 1062080747, "contribution_factor_str": "SQLOperator F00 0:CodeGen"}, {"reason": [{"impact": 118700210.11111116, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "F05 0", "wall_clock_time": 1009980856, "contribution_factor_str": "SQLOperator F05 0:CodeGen"}, {"reason": [{"impact": 132909682.88888884, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "F09 0", "wall_clock_time": 950194410, "contribution_factor_str": "SQLOperator F09 0:CodeGen"}, {"reason": [{"impact": 95305427.33333337, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "F10 0", "wall_clock_time": 878960263, "contribution_factor_str": "SQLOperator F10 0:CodeGen"}, {"reason": [{"impact": 46199805, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "F14 0", "wall_clock_time": 769058113, "contribution_factor_str": "SQLOperator F14 0:CodeGen"}, {"reason": [], "result_id": -1, "wall_clock_time": 613452579, "contribution_factor_str": "PlanningTime -1:N/A"}, {"reason": [{"impact": 306772810, "name": "Slow Aggregate", "fix": {"fixable": False}, "message": "Excess time (over expected time) spent in the aggregate; might be caused by complex group by", "unit": 5}, {"impact": 42519756.55555558, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "45", "wall_clock_time": 319264610, "contribution_factor_str": "SQLOperator 45:AGGREGATION_NODE"}, {"reason": [{"impact": 297637309, "name": "Slow Aggregate", "fix": {"fixable": False}, "message": "Excess time (over expected time) spent in the aggregate; might be caused by complex group by", "unit": 5}, {"impact": 29017600.555555582, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "51", "wall_clock_time": 309567409, "contribution_factor_str": "SQLOperator 51:AGGREGATION_NODE"}, {"reason": [], "result_id": -1, "wall_clock_time": 107247619, "contribution_factor_str": "ClientFetchWaitTimer -1:N/A"}, {"reason": [{"impact": 97484030, "name": "Slow Sorting", "fix": {"fixable": False}, "message": "Excess time (over expected time) spent in the sort; might be caused by too many sorting column", "unit": 5}, {"impact": 36347752, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "31", "wall_clock_time": 98861130, "contribution_factor_str": "SQLOperator 31:SORT_NODE"}, {"reason": [{"impact": 67982884, "name": "Slow Aggregate", "fix": {"fixable": False}, "message": "Excess time (over expected time) spent in the aggregate; might be caused by complex group by", "unit": 5}, {"impact": 7664156.555555552, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "40", "wall_clock_time": 80474684, "contribution_factor_str": "SQLOperator 40:AGGREGATION_NODE"}, {"reason": [{"impact": 32130961.111111112, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "12", "wall_clock_time": 71088072, "contribution_factor_str": "SQLOperator 12:SELECT_NODE"}, {"reason": [{"impact": 58733676, "name": "Slow Sorting", "fix": {"fixable": False}, "message": "Excess time (over expected time) spent in the sort; might be caused by too many sorting column", "unit": 5}, {"impact": 5766554.333333336, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "8", "wall_clock_time": 60080276, "contribution_factor_str": "SQLOperator 08:SORT_NODE"}, {"reason": [{"impact": 57966057, "name": "Slow Sorting", "fix": {"fixable": False}, "message": "Excess time (over expected time) spent in the sort; might be caused by too many sorting column", "unit": 5}, {"impact": 4243951.444444448, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "21", "wall_clock_time": 59294857, "contribution_factor_str": "SQLOperator 21:SORT_NODE"}, {"reason": [{"impact": 47950535, "name": "Lack of scanner thread parallelism", "fix": {"fixable": False}, "message": "Speed can be improved by that much if there's 8 scanner threads", "unit": 5}, {"impact": 37688100, "name": "Slow HDFS Scan", "fix": {"fixable": False}, "message": "Predicates might be expensive (expectes speed 10m rows per sec per core)", "unit": 5}], "result_id": "15", "wall_clock_time": 47950535, "contribution_factor_str": "SQLOperator 15:HDFS_SCAN_NODE"}, {"reason": [{"impact": 17818123.666666668, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "52", "wall_clock_time": 44603227, "contribution_factor_str": "SQLOperator 52:EXCHANGE_NODE"}, {"reason": [{"impact": 9621600, "name": "Wrong join strategy", "fix": {"fixable": False}, "message": "RHS 121390; LHS 105174", "unit": 5}, {"impact": 4113826, "name": "Slow Hash Join", "fix": {"fixable": False}, "message": "Excess time (over expected time) spent in the hash join", "unit": 5}, {"impact": 2924865.666666664, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "34", "wall_clock_time": 43779812, "contribution_factor_str": "SQLOperator 34:HASH_JOIN_NODE"}, {"reason": [{"impact": 14784147, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "32", "wall_clock_time": 42111797, "contribution_factor_str": "SQLOperator 32:ANALYTIC_EVAL_NODE"}, {"reason": [{"impact": 39518015, "name": "Lack of scanner thread parallelism", "fix": {"fixable": False}, "message": "Speed can be improved by that much if there's 8 scanner threads", "unit": 5}, {"impact": 29689100, "name": "Slow HDFS Scan", "fix": {"fixable": False}, "message": "Predicates might be expensive (expectes speed 10m rows per sec per core)", "unit": 5}], "result_id": "2", "wall_clock_time": 39518015, "contribution_factor_str": "SQLOperator 02:HDFS_SCAN_NODE"}, {"reason": [{"impact": 20851584.222222224, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "46", "wall_clock_time": 38647270, "contribution_factor_str": "SQLOperator 46:EXCHANGE_NODE"}, {"reason": [{"impact": 8035800, "name": "Wrong join strategy", "fix": {"fixable": False}, "message": "RHS 105576; LHS 121383", "unit": 5}, {"impact": 3816722, "name": "Slow Hash Join", "fix": {"fixable": False}, "message": "Excess time (over expected time) spent in the hash join", "unit": 5}, {"impact": 1904130.4444444478, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "33", "wall_clock_time": 37364443, "contribution_factor_str": "SQLOperator 33:HASH_JOIN_NODE"}, {"reason": [{"impact": 31174821, "name": "Slow Sorting", "fix": {"fixable": False}, "message": "Excess time (over expected time) spent in the sort; might be caused by too many sorting column", "unit": 5}, {"impact": 1894590, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "10", "wall_clock_time": 32551921, "contribution_factor_str": "SQLOperator 10:SORT_NODE"}, {"reason": [{"impact": 26659473.75, "name": "Lack of scanner thread parallelism", "fix": {"fixable": False}, "message": "Speed can be improved by that much if there's 8 scanner threads", "unit": 5}, {"impact": 20690100, "name": "Slow HDFS Scan", "fix": {"fixable": False}, "message": "Predicates might be expensive (expectes speed 10m rows per sec per core)", "unit": 5}], "result_id": "25", "wall_clock_time": 30467970, "contribution_factor_str": "SQLOperator 25:HDFS_SCAN_NODE"}, {"reason": [{"impact": 7084883.444444444, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "11", "wall_clock_time": 28336314, "contribution_factor_str": "SQLOperator 11:ANALYTIC_EVAL_NODE"}, {"reason": [{"impact": 2135688.222222224, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "9", "wall_clock_time": 22614443, "contribution_factor_str": "SQLOperator 09:ANALYTIC_EVAL_NODE"}, {"reason": [{"impact": 1150084.666666668, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "22", "wall_clock_time": 22144125, "contribution_factor_str": "SQLOperator 22:ANALYTIC_EVAL_NODE"}, {"reason": [{"impact": 2047632, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "39", "wall_clock_time": 11957699, "contribution_factor_str": "SQLOperator 39:EXCHANGE_NODE"}, {"reason": [{"impact": 1332451, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "44", "wall_clock_time": 11506235, "contribution_factor_str": "SQLOperator 44:EXCHANGE_NODE"}, {"reason": [{"impact": 728588, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "50", "wall_clock_time": 10172630, "contribution_factor_str": "SQLOperator 50:EXCHANGE_NODE"}, {"reason": [{"impact": 3334413, "name": "Lack of scanner thread parallelism", "fix": {"fixable": False}, "message": "Speed can be improved by that much if there's 8 scanner threads", "unit": 5}, {"impact": 1199000, "name": "Slow HDFS Scan", "fix": {"fixable": False}, "message": "Predicates might be expensive (expectes speed 10m rows per sec per core)", "unit": 5}], "result_id": "0", "wall_clock_time": 3334413, "contribution_factor_str": "SQLOperator 00:HDFS_SCAN_NODE"}, {"reason": [], "result_id": "53", "wall_clock_time": 3082111, "contribution_factor_str": "SQLOperator 53:EXCHANGE_NODE"}, {"reason": [{"impact": 2594847, "name": "Lack of scanner thread parallelism", "fix": {"fixable": False}, "message": "Speed can be improved by that much if there's 8 scanner threads", "unit": 5}, {"impact": 1199000, "name": "Slow HDFS Scan", "fix": {"fixable": False}, "message": "Predicates might be expensive (expectes speed 10m rows per sec per core)", "unit": 5}], "result_id": "23", "wall_clock_time": 2594847, "contribution_factor_str": "SQLOperator 23:HDFS_SCAN_NODE"}, {"reason": [{"impact": 2452312, "name": "Lack of scanner thread parallelism", "fix": {"fixable": False}, "message": "Speed can be improved by that much if there's 8 scanner threads", "unit": 5}, {"impact": 2198000, "name": "Slow HDFS Scan", "fix": {"fixable": False}, "message": "Predicates might be expensive (expectes speed 10m rows per sec per core)", "unit": 5}], "result_id": "13", "wall_clock_time": 2452312, "contribution_factor_str": "SQLOperator 13:HDFS_SCAN_NODE"}, {"reason": [{"impact": 1706125, "name": "Lack of scanner thread parallelism", "fix": {"fixable": False}, "message": "Speed can be improved by that much if there's 8 scanner threads", "unit": 5}, {"impact": 287883, "name": "HDFS NN RPC", "fix": {"fixable": False}, "message": "This is the time waiting for HDFS NN RPC.", "unit": 5}], "result_id": "16", "wall_clock_time": 1706125, "contribution_factor_str": "SQLOperator 16:HDFS_SCAN_NODE"}, {"reason": [{"impact": 1619889, "name": "Lack of scanner thread parallelism", "fix": {"fixable": False}, "message": "Speed can be improved by that much if there's 8 scanner threads", "unit": 5}, {"impact": 601555, "name": "HDFS NN RPC", "fix": {"fixable": False}, "message": "This is the time waiting for HDFS NN RPC.", "unit": 5}], "result_id": "3", "wall_clock_time": 1619889, "contribution_factor_str": "SQLOperator 03:HDFS_SCAN_NODE"}, {"reason": [{"impact": 1385497, "name": "Lack of scanner thread parallelism", "fix": {"fixable": False}, "message": "Speed can be improved by that much if there's 8 scanner threads", "unit": 5}, {"impact": 181359, "name": "HDFS NN RPC", "fix": {"fixable": False}, "message": "This is the time waiting for HDFS NN RPC.", "unit": 5}], "result_id": "26", "wall_clock_time": 1385497, "contribution_factor_str": "SQLOperator 26:HDFS_SCAN_NODE"}, {"reason": [{"impact": 559177.1111111111, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "43", "wall_clock_time": 1378341, "contribution_factor_str": "SQLOperator 43:EXCHANGE_NODE"}, {"reason": [{"impact": 362490.3333333334, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "38", "wall_clock_time": 1291643, "contribution_factor_str": "SQLOperator 38:EXCHANGE_NODE"}, {"reason": [{"impact": 265681, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "49", "wall_clock_time": 1177394, "contribution_factor_str": "SQLOperator 49:EXCHANGE_NODE"}, {"reason": [], "result_id": -1, "wall_clock_time": 775849, "contribution_factor_str": "RowMaterializationTimer -1:N/A"}, {"reason": [{"impact": 235417.66666666666, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "48", "wall_clock_time": 279531, "contribution_factor_str": "SQLOperator 48:EXCHANGE_NODE"}, {"reason": [{"impact": 10539.11111111111, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "36", "wall_clock_time": 31603, "contribution_factor_str": "SQLOperator 36:EXCHANGE_NODE"}, {"reason": [{"impact": 8916.666666666668, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "47", "wall_clock_time": 29729, "contribution_factor_str": "SQLOperator 47:EXCHANGE_NODE"}, {"reason": [{"impact": 8002.1111111111095, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "41", "wall_clock_time": 29716, "contribution_factor_str": "SQLOperator 41:EXCHANGE_NODE"}, {"reason": [{"impact": 1725.1111111111113, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "42", "wall_clock_time": 16432, "contribution_factor_str": "SQLOperator 42:EXCHANGE_NODE"}, {"reason": [{"impact": 791.1111111111113, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "37", "wall_clock_time": 14808, "contribution_factor_str": "SQLOperator 37:EXCHANGE_NODE"}, {"reason": [], "result_id": "35", "wall_clock_time": 0, "contribution_factor_str": "SQLOperator 35:SORT_NODE"}, {"reason": [{"impact": 1111759224.8888888, "name": "TotalTime Skew", "fix": {"fixable": False}, "message": "The skew (max-avg) contributed this amount of time to this SQL operator", "unit": 5}], "result_id": "F04 0", "wall_clock_time": 0, "contribution_factor_str": "SQLOperator F04 0:BlockMgr"}, {"reason": [], "result_id": "F15 0", "wall_clock_time": 0, "contribution_factor_str": "SQLOperator F15 0:BlockMgr"}], "rule": {"message": "Top contributing factors and its reasons", "prio": 1, "label": "Top Down Analysis"}, "template": "alan-tpl"}] # noqa: E501 assert ordered(result) == ordered(test) def test_performance(self): pr = cProfile.Profile() pr.enable() - ts1 = time.time()*1000.0 + ts1 = time.time() * 1000.0 self.analyze.pre_process(self.profile) result = self.analyze.run(self.profile) - ts2 = time.time()*1000.0 + ts2 = time.time() * 1000.0 dts = ts2 - ts1 pr.disable() s = string_io() @@ -68,4 +68,4 @@ def test_performance(self): ps = pstats.Stats(pr, stream=s).sort_stats(sortby) ps.print_stats() LOG.info(s.getvalue()) - assert dts <= 1000 \ No newline at end of file + assert dts <= 1000 diff --git a/desktop/libs/libanalyze/src/libanalyze/rules.py b/desktop/libs/libanalyze/src/libanalyze/rules.py index cbb8c3d47b8..189fdc77af5 100644 --- a/desktop/libs/libanalyze/src/libanalyze/rules.py +++ b/desktop/libs/libanalyze/src/libanalyze/rules.py @@ -14,42 +14,30 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import division -from builtins import zip -from builtins import range -from builtins import object -from functools import reduce + +import os +import re import copy import glob import json -import logging import math -import os -import re -import types -import sys import struct +import logging +from functools import reduce +from itertools import groupby from dateutil.parser import parse as dtparse -from itertools import groupby -from libanalyze import models -from libanalyze import exprs -from libanalyze import utils +from libanalyze import exprs, models, utils from libanalyze.utils import Timer -if sys.version_info[0] > 2: - string_types = str -else: - string_types = types.StringTypes - - LOG = logging.getLogger() def to_double(metric_value): return struct.unpack('d', struct.pack('q', metric_value))[0] + class ProfileContext(object): """This is the main wrapper around the runtime profile tree. Main accessor methods are implemented here.""" @@ -70,7 +58,7 @@ class SQLOperatorReason(object): def __init__(self, node_name, metric_names, rule, exprs=[], to_json=True, **kwargs): self.node_name = node_name - if isinstance(metric_names, string_types): + if isinstance(metric_names, str): self.metric_names = [metric_names] else: self.metric_names = metric_names @@ -117,8 +105,8 @@ def getNumInputRows(self, node): if nodeType == 'HdfsTableSink': return node.find_metric_by_name('RowsInserted')[0]['value'] - metrics = reduce(lambda x,y: x + y.find_metric_by_name('RowsReturned'), node.children, []) - return reduce(lambda x,y: x + y['value'], metrics, 0) + metrics = reduce(lambda x, y: x + y.find_metric_by_name('RowsReturned'), node.children, []) + return reduce(lambda x, y: x + y['value'], metrics, 0) def evaluate(self, profile, plan_node_id): """ @@ -198,6 +186,7 @@ def check_exprs(self, group): [g.value for g in group])) return result + class SummaryReason(SQLOperatorReason): def evaluate(self, profile, plan_node_id): @@ -257,9 +246,10 @@ def evaluate(self, profile, plan_node_id): "label": self.rule["label"] } + class JoinOrderStrategyCheck(SQLOperatorReason): def __init__(self): - self.kwargs = {'fix': { 'fixable': False }, 'unit': 5} + self.kwargs = {'fix': {'fixable': False}, 'unit': 5} def evaluate(self, profile, plan_node_id): """ @@ -308,9 +298,10 @@ def evaluate(self, profile, plan_node_id): "label": "Wrong join strategy" } + class ExplodingJoinCheck(SQLOperatorReason): def __init__(self): - self.kwargs = {'fix': { 'fixable': False }, 'unit': 5} + self.kwargs = {'fix': {'fixable': False}, 'unit': 5} def evaluate(self, profile, plan_node_id): """ @@ -338,9 +329,10 @@ def evaluate(self, profile, plan_node_id): "label": "Exploding join" } + class NNRpcCheck(SQLOperatorReason): def __init__(self): - self.kwargs = {'fix': { 'fixable': False }, 'unit': 5} + self.kwargs = {'fix': {'fixable': False}, 'unit': 5} def evaluate(self, profile, plan_node_id): """ @@ -363,6 +355,7 @@ def evaluate(self, profile, plan_node_id): "label": "HDFS NN RPC" } + class TopDownAnalysis(object): def __init__(self): @@ -381,10 +374,10 @@ def __init__(self): nodes = [node_names] if type == 'SQLOperator': for node in nodes: - self.sqlOperatorReasons.setdefault(node,[])\ + self.sqlOperatorReasons.setdefault(node, [])\ .append(SQLOperatorReason(**json_object)) else: - self.sqlOperatorReasons.setdefault(type,[])\ + self.sqlOperatorReasons.setdefault(type, [])\ .append(SummaryReason(**json_object)) # Manually append specially coded reaason @@ -428,7 +421,6 @@ def getTopReasons(self, contributor): """ return sorted(contributor.reason, key=lambda x: x.impact, reverse=True) if contributor.reason else contributor.reason - def createContributors(self, profile): """ Return the models.Contributor objects. Contributor can be planning time, admission control wait time, query fragment distribution time, SQL operator, DML @@ -438,10 +430,10 @@ def createContributors(self, profile): persisted in the database. """ execution_profile = profile.find_by_name('Execution Profile') - #summary = _profile.find_by_name("Summary") + # summary = _profile.find_by_name("Summary") counter_map = profile.find_by_name('Summary').counter_map() counter_map.update(profile.find_by_name("ImpalaServer").counter_map()) - #counter_map = summary.counter_map() + # counter_map = summary.counter_map() # list of non-SQL operator contributor # TODO: add admission control, DML Metastore update; profile does not have it yet. @@ -453,14 +445,14 @@ def createContributors(self, profile): contributor = models.Contributor(type=metric, wall_clock_time=counter_map[metric].value, plan_node_id=-1, plan_node_name="N/A") - #models.db.session.add(contributor) + # models.db.session.add(contributor) contributors += [contributor] if self.isDebugBuilt(profile): contributor = models.Contributor(type="Debug Built", wall_clock_time=9999999999999999, plan_node_id=-1, plan_node_name="N/A") - #models.db.session.add(contributor) + # models.db.session.add(contributor) contributors += [contributor] # Get the top N contributor from query execution @@ -468,9 +460,9 @@ def createContributors(self, profile): # Get the plan node execution time # Note: ignore DataStreamSender because its metrics is useless nodes = execution_profile.find_all_non_fragment_nodes() - nodes = [x for x in nodes if x.fragment and x.fragment.is_averaged() == False] + nodes = [x for x in nodes if x.fragment and x.fragment.is_averaged() is False] nodes = [x for x in nodes if x.name() != 'DataStreamSender'] - metrics = reduce(lambda x,y: x + y.find_metric_by_name('LocalTime'), nodes, []) + metrics = reduce(lambda x, y: x + y.find_metric_by_name('LocalTime'), nodes, []) metrics = sorted(metrics, key=lambda x: (x['node'].id(), x['node'].name())) for k, g in groupby(metrics, lambda x: (x['node'].id(), x['node'].name())): grouped = list(g) @@ -481,7 +473,6 @@ def createContributors(self, profile): plan_node_id=grouped[0]['node'].id(), plan_node_name=grouped[0]['node'].name()) contributors += [contributor] - # Sort execTime based on wall_clock_time and cut it off at limit contributors = sorted(contributors, key=lambda x: x.wall_clock_time, reverse=True) return contributors @@ -493,7 +484,7 @@ def createExecSqlNodeReason(self, contributor, profile): The result will be in the form of """ reasons = [] - self.sqlOperatorReasons.setdefault(contributor.plan_node_name,[]) + self.sqlOperatorReasons.setdefault(contributor.plan_node_name, []) for cause in self.sqlOperatorReasons[contributor.plan_node_name] + self.sqlOperatorReasons["ANY"]: evaluation = cause.evaluate(profile, contributor.plan_node_id) impact = evaluation["impact"] @@ -515,7 +506,7 @@ def createExecNodeReason(self, contributor, profile): The result will be in the form of """ reasons = [] - self.sqlOperatorReasons.setdefault(contributor.type,[]) + self.sqlOperatorReasons.setdefault(contributor.type, []) for cause in self.sqlOperatorReasons[contributor.type]: evaluation = cause.evaluate(profile, contributor.plan_node_id) impact = evaluation["impact"] @@ -606,7 +597,7 @@ def add_host(node, exec_summary_json=exec_summary_json): is_plan_node = node.is_plan_node() node_id = node.id() nid = int(node_id) if node_id and node.is_regular() else -1 - # Setup Hosts & Broadcast + # Setup Hosts & Broadcast if node_id and node.is_regular() and nid in exec_summary_json: exec_summary_node = exec_summary_json.get(nid, {}) node.val.counters.append(models.TCounter(name='Hosts', value=exec_summary_node.get('hosts', ''), unit=0)) @@ -665,7 +656,7 @@ def add_host(node, exec_summary_json=exec_summary_json): grouping_aggregator = node.find_by_name('GroupingAggregator') if grouping_aggregator and grouping_aggregator.counter_map().get('SpilledPartitions', models.TCounter(value=0)).value > 0: has_spilled = True - elif is_plan_node and node_name == 'HASH_JOIN_NODE': # For Hash Join, if the "LocalTime" metrics + elif is_plan_node and node_name == 'HASH_JOIN_NODE': # For Hash Join, if the "LocalTime" metrics hash_join_builder = node.find_by_name('Hash Join Builder') if hash_join_builder and hash_join_builder.counter_map().get('SpilledPartitions', models.TCounter(value=0)).value > 0: has_spilled = True @@ -688,6 +679,7 @@ def add_host(node, exec_summary_json=exec_summary_json): node.val.counters.append(models.TCounter(name='ChildTime', value=child_time, unit=5)) nodes = {} + def create_map(node, nodes=nodes): nid = node.id() if nid: @@ -708,21 +700,21 @@ def run(self, profile): if self.isDebugBuilt(profile): topContributions += [{ - "result_id" : result_id, - "contribution_factor_str" : "Using Debug Built", - "wall_clock_time" : 9999, - "reason" : [] + "result_id": result_id, + "contribution_factor_str": "Using Debug Built", + "wall_clock_time": 9999, + "reason": [] }] for contributor in topContributors: reasons = self.getTopReasons(contributor) topContributions += [{ - "result_id" : contributor.plan_node_id if contributor.plan_node_id != -1 else -1, - "contribution_factor_str" : contributor.type + " " + + "result_id": contributor.plan_node_id if contributor.plan_node_id != -1 else -1, + "contribution_factor_str": contributor.type + " " + str(contributor.plan_node_id).zfill(2) + ":" + contributor.plan_node_name, - "wall_clock_time" : contributor.wall_clock_time, - "reason" : [reason.__dict__ for reason in reasons] + "wall_clock_time": contributor.wall_clock_time, + "reason": [reason.__dict__ for reason in reasons] }] result = [] diff --git a/desktop/libs/liboauth/src/liboauth/backend.py b/desktop/libs/liboauth/src/liboauth/backend.py index fdc599ba02a..5b01644d912 100644 --- a/desktop/libs/liboauth/src/liboauth/backend.py +++ b/desktop/libs/liboauth/src/liboauth/backend.py @@ -18,39 +18,33 @@ See desktop/auth/backend.py """ -from future import standard_library -standard_library.install_aliases() -import json import cgi -import logging import sys +import json +import logging +from urllib.parse import urlencode as lib_urlencode -LOG = logging.getLogger() - -from desktop.auth.backend import force_username_case, DesktopBackendBase - -from useradmin.models import get_profile, get_default_user_group, UserProfile, User +from django.utils.translation import gettext as _ import liboauth.conf import liboauth.metrics +from desktop.auth.backend import DesktopBackendBase, force_username_case +from useradmin.models import User, UserProfile, get_default_user_group, get_profile + +LOG = logging.getLogger() try: import httplib2 except ImportError: LOG.warning('httplib2 module not found') + + try: import oauth2 as oauth except ImportError: LOG.warning('oauth2 module not found') oauth = None -if sys.version_info[0] > 2: - from urllib.parse import urlencode as lib_urlencode - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - from urllib import urlencode as lib_urlencode - class OAuthBackend(DesktopBackendBase): @@ -89,7 +83,6 @@ def authenticate(self, access_token): return user - @classmethod def manages_passwords_externally(cls): return True @@ -99,21 +92,20 @@ def is_first_login_ever(cls): """ Return true if no external user has ever logged in to Desktop yet. """ return not UserProfile.objects.filter(creation_method=UserProfile.CreationMethod.EXTERNAL.name).exists() - @classmethod def handleAuthenticationRequest(cls, request): assert oauth is not None if 'oauth_verifier' in request.GET: social = 'twitter' - consumer_key=liboauth.conf.CONSUMER_KEY_TWITTER.get() - consumer_secret=liboauth.conf.CONSUMER_SECRET_TWITTER.get() - access_token_uri=liboauth.conf.ACCESS_TOKEN_URL_TWITTER.get() + consumer_key = liboauth.conf.CONSUMER_KEY_TWITTER.get() + consumer_secret = liboauth.conf.CONSUMER_SECRET_TWITTER.get() + access_token_uri = liboauth.conf.ACCESS_TOKEN_URL_TWITTER.get() consumer = oauth.Consumer(consumer_key, consumer_secret) token = oauth.Token(request.session['request_token']['oauth_token'], request.session['request_token']['oauth_token_secret']) client = oauth.Client(consumer, token) - oauth_verifier=request.GET['oauth_verifier'] + oauth_verifier = request.GET['oauth_verifier'] resp, content = client.request(access_token_uri + oauth_verifier, "GET") if resp['status'] != '200': raise Exception(_("Invalid response from OAuth provider: %s") % resp) @@ -135,49 +127,49 @@ def handleAuthenticationRequest(cls, request): social = state_split[0] if social == 'google': - consumer_key=liboauth.conf.CONSUMER_KEY_GOOGLE.get() - consumer_secret=liboauth.conf.CONSUMER_SECRET_GOOGLE.get() - access_token_uri=liboauth.conf.ACCESS_TOKEN_URL_GOOGLE.get() - authentication_token_uri=liboauth.conf.AUTHORIZE_URL_GOOGLE.get() + consumer_key = liboauth.conf.CONSUMER_KEY_GOOGLE.get() + consumer_secret = liboauth.conf.CONSUMER_SECRET_GOOGLE.get() + access_token_uri = liboauth.conf.ACCESS_TOKEN_URL_GOOGLE.get() + authentication_token_uri = liboauth.conf.AUTHORIZE_URL_GOOGLE.get() elif social == 'facebook': - consumer_key=liboauth.conf.CONSUMER_KEY_FACEBOOK.get() - consumer_secret=liboauth.conf.CONSUMER_SECRET_FACEBOOK.get() - access_token_uri=liboauth.conf.ACCESS_TOKEN_URL_FACEBOOK.get() - authentication_token_uri=liboauth.conf.AUTHORIZE_URL_FACEBOOK.get() + consumer_key = liboauth.conf.CONSUMER_KEY_FACEBOOK.get() + consumer_secret = liboauth.conf.CONSUMER_SECRET_FACEBOOK.get() + access_token_uri = liboauth.conf.ACCESS_TOKEN_URL_FACEBOOK.get() + authentication_token_uri = liboauth.conf.AUTHORIZE_URL_FACEBOOK.get() elif social == 'linkedin': - consumer_key=liboauth.conf.CONSUMER_KEY_LINKEDIN.get() - consumer_secret=liboauth.conf.CONSUMER_SECRET_LINKEDIN.get() - access_token_uri=liboauth.conf.ACCESS_TOKEN_URL_LINKEDIN.get() - authentication_token_uri=liboauth.conf.AUTHORIZE_URL_LINKEDIN.get() + consumer_key = liboauth.conf.CONSUMER_KEY_LINKEDIN.get() + consumer_secret = liboauth.conf.CONSUMER_SECRET_LINKEDIN.get() + access_token_uri = liboauth.conf.ACCESS_TOKEN_URL_LINKEDIN.get() + authentication_token_uri = liboauth.conf.AUTHORIZE_URL_LINKEDIN.get() params = lib_urlencode({ - 'code':code, - 'redirect_uri':redirect_uri, + 'code': code, + 'redirect_uri': redirect_uri, 'client_id': consumer_key, 'client_secret': consumer_secret, - 'grant_type':grant_type + 'grant_type': grant_type }) - headers={'content-type':'application/x-www-form-urlencoded'} - resp, cont = parser.request(access_token_uri, method = 'POST', body = params, headers = headers) + headers = {'content-type': 'application/x-www-form-urlencoded'} + resp, cont = parser.request(access_token_uri, method='POST', body=params, headers=headers) if resp['status'] != '200': raise Exception(_("Invalid response from OAuth provider: %s") % resp) - #google + # google if social == 'google': access_tok = (json.loads(cont))['access_token'] auth_token_uri = authentication_token_uri + access_tok resp, content = parser.request(auth_token_uri, "GET") if resp['status'] != '200': raise Exception(_("Invalid response from OAuth provider: %s") % resp) - username=(json.loads(content))["email"] + username = (json.loads(content))["email"] access_token = dict(screen_name=map_username(username), oauth_token_secret=access_tok) whitelisted_domains = liboauth.conf.WHITELISTED_DOMAINS_GOOGLE.get() if whitelisted_domains: if username.split('@')[1] not in whitelisted_domains: access_token = "" - #facebook + # facebook elif social == 'facebook': access_tok = (dict(cgi.parse_qsl(cont)))['access_token'] auth_token_uri = authentication_token_uri + access_tok @@ -186,7 +178,7 @@ def handleAuthenticationRequest(cls, request): raise Exception(_("Invalid response from OAuth provider: %s") % resp) username = (json.loads(content))["email"] access_token = dict(screen_name=map_username(username), oauth_token_secret=access_tok) - #linkedin + # linkedin elif social == 'linkedin': access_tok = (json.loads(cont))['access_token'] auth_token_uri = authentication_token_uri + access_tok @@ -196,10 +188,8 @@ def handleAuthenticationRequest(cls, request): username = (json.loads(content))['emailAddress'] access_token = dict(screen_name=map_username(username), oauth_token_secret=access_tok) - return access_token, nexturl - @classmethod def handleLoginRequest(cls, request): assert oauth is not None @@ -211,30 +201,30 @@ def handleLoginRequest(cls, request): state = social + "," + request.GET.get('next', '/') if social == 'google': - consumer_key=liboauth.conf.CONSUMER_KEY_GOOGLE.get() + consumer_key = liboauth.conf.CONSUMER_KEY_GOOGLE.get() token_request_uri = liboauth.conf.REQUEST_TOKEN_URL_GOOGLE.get() scope = "https://www.googleapis.com/auth/userinfo.email" - access_type="offline" - approval_prompt="force" - - url = "{token_request_uri}?response_type={response_type}&client_id={client_id}&redirect_uri={redirect_uri}&scope={scope}&state={state}&access_type={access_type}&approval_prompt={approval_prompt}".format( - token_request_uri = token_request_uri, - response_type = response_type, - client_id = consumer_key, - redirect_uri = redirect_uri, - scope = scope, - state = state, - access_type = access_type, - approval_prompt = approval_prompt) - - #facebook + access_type = "offline" + approval_prompt = "force" + + url = "{token_request_uri}?response_type={response_type}&client_id={client_id}&redirect_uri={redirect_uri}&scope={scope}&state={state}&access_type={access_type}&approval_prompt={approval_prompt}".format( # noqa: E501 + token_request_uri=token_request_uri, + response_type=response_type, + client_id=consumer_key, + redirect_uri=redirect_uri, + scope=scope, + state=state, + access_type=access_type, + approval_prompt=approval_prompt) + + # facebook elif social == 'facebook': - consumer_key=liboauth.conf.CONSUMER_KEY_FACEBOOK.get() + consumer_key = liboauth.conf.CONSUMER_KEY_FACEBOOK.get() token_request_uri = liboauth.conf.REQUEST_TOKEN_URL_FACEBOOK.get() scope = "email" grant_type = "client_credentials" - url = "{token_request_uri}?client_id={client_id}&redirect_uri={redirect_uri}&grant_type={grant_type}&scope={scope}&state={state}".format( + url = "{token_request_uri}?client_id={client_id}&redirect_uri={redirect_uri}&grant_type={grant_type}&scope={scope}&state={state}".format( # noqa: E501 token_request_uri=token_request_uri, client_id=consumer_key, redirect_uri=redirect_uri, @@ -242,23 +232,23 @@ def handleLoginRequest(cls, request): scope=scope, state=state) - #linkedin + # linkedin elif social == 'linkedin': - consumer_key=liboauth.conf.CONSUMER_KEY_LINKEDIN.get() + consumer_key = liboauth.conf.CONSUMER_KEY_LINKEDIN.get() token_request_uri = liboauth.conf.REQUEST_TOKEN_URL_LINKEDIN.get() - scope= "r_emailaddress" + scope = "r_emailaddress" - url = "{token_request_uri}?response_type={response_type}&client_id={client_id}&scope={scope}&state={state}&redirect_uri={redirect_uri}".format( + url = "{token_request_uri}?response_type={response_type}&client_id={client_id}&scope={scope}&state={state}&redirect_uri={redirect_uri}".format( # noqa: E501 token_request_uri=token_request_uri, response_type=response_type, client_id=consumer_key, scope=scope, state=state, redirect_uri=redirect_uri) - #twitter + # twitter else: - consumer_key=liboauth.conf.CONSUMER_KEY_TWITTER.get() - consumer_secret=liboauth.conf.CONSUMER_SECRET_TWITTER.get() + consumer_key = liboauth.conf.CONSUMER_KEY_TWITTER.get() + consumer_secret = liboauth.conf.CONSUMER_SECRET_TWITTER.get() token_request_uri = liboauth.conf.REQUEST_TOKEN_URL_TWITTER.get() token_authentication_uri = liboauth.conf.AUTHORIZE_URL_TWITTER.get() @@ -274,6 +264,7 @@ def handleLoginRequest(cls, request): ) return url + def map_username(username): username_map = liboauth.conf.USERNAME_MAP.get() if username_map: @@ -281,6 +272,7 @@ def map_username(username): username = username.replace(key, value) return ''.join([x for x in username if x.isalnum()]) + def find_or_create_user(username, password=None): try: user = User.objects.get(username=username) @@ -296,6 +288,7 @@ def find_or_create_user(username, password=None): user.save() return user + def get_redirect_uri(request): # Either use the proxy-specified protocol or the one from the request itself. # This is useful if the server is behind some kind of proxy diff --git a/desktop/libs/liboauth/src/liboauth/conf.py b/desktop/libs/liboauth/src/liboauth/conf.py index ccdf8df1a3e..8e8340799e0 100644 --- a/desktop/libs/liboauth/src/liboauth/conf.py +++ b/desktop/libs/liboauth/src/liboauth/conf.py @@ -18,13 +18,9 @@ import os import sys -from desktop.lib.conf import Config, coerce_bool, coerce_csv, coerce_json_dict - -if sys.version_info[0] > 2: - from django.utils.translation import gettext_lazy as _t, gettext as _ -else: - from django.utils.translation import ugettext_lazy as _t, ugettext as _ +from django.utils.translation import gettext as _, gettext_lazy as _t +from desktop.lib.conf import Config, coerce_bool, coerce_csv, coerce_json_dict CONSUMER_KEY_TWITTER = Config( key="consumer_key_twitter", @@ -168,4 +164,3 @@ type=coerce_json_dict, default='{}' ) - diff --git a/desktop/libs/liboauth/src/liboauth/urls.py b/desktop/libs/liboauth/src/liboauth/urls.py index 700b8d85408..fb69885fcc1 100644 --- a/desktop/libs/liboauth/src/liboauth/urls.py +++ b/desktop/libs/liboauth/src/liboauth/urls.py @@ -17,12 +17,9 @@ import sys -from liboauth import views as liboauth_views +from django.urls import re_path -if sys.version_info[0] > 2: - from django.urls import re_path -else: - from django.conf.urls import url as re_path +from liboauth import views as liboauth_views urlpatterns = [ re_path(r'^accounts/login/$', liboauth_views.show_login_page, name='show_oauth_login'), diff --git a/desktop/libs/liboauth/src/liboauth/views.py b/desktop/libs/liboauth/src/liboauth/views.py index 61bd11310ca..110c7e139b0 100644 --- a/desktop/libs/liboauth/src/liboauth/views.py +++ b/desktop/libs/liboauth/src/liboauth/views.py @@ -15,43 +15,35 @@ # See the License for the specific language governing permissions and # limitations under the License. -from future import standard_library -standard_library.install_aliases() - import logging -import sys - -LOG = logging.getLogger() - -import urllib.request, urllib.parse, urllib.error -try: - import httplib2 -except ImportError: - LOG.warning('httplib2 module not found') +import urllib.error +import urllib.parse +import urllib.request import django.contrib.auth.views -from django.core.exceptions import SuspiciousOperation -from django.contrib.auth import login, get_backends, authenticate +from django.contrib.auth import authenticate, get_backends, login from django.contrib.sessions.models import Session +from django.core.exceptions import SuspiciousOperation from django.http import HttpResponseRedirect from django.urls import reverse -from hadoop.fs.exceptions import WebHdfsException -from useradmin.models import User -from useradmin.views import ensure_home_directory +from django.utils.translation import gettext as _ +import liboauth.conf from desktop.auth.backend import AllowFirstUserDjangoBackend -from desktop.auth.forms import UserCreationForm, AuthenticationForm -from desktop.lib.django_util import render -from desktop.lib.django_util import login_notrequired +from desktop.auth.forms import AuthenticationForm, UserCreationForm +from desktop.lib.django_util import login_notrequired, render from desktop.log.access import access_warn, last_access_map - -import liboauth.conf +from hadoop.fs.exceptions import WebHdfsException from liboauth.backend import OAuthBackend +from useradmin.models import User +from useradmin.views import ensure_home_directory -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ +LOG = logging.getLogger() + +try: + import httplib2 +except ImportError: + LOG.warning('httplib2 module not found') @login_notrequired @@ -73,7 +65,6 @@ def show_login_page(request, login_errors=False): }) - @login_notrequired def oauth_login(request): if 'social' not in request.GET: diff --git a/desktop/libs/liboozie/src/liboozie/conf.py b/desktop/libs/liboozie/src/liboozie/conf.py index 55b6f0c0a6a..5ab6d9e7da8 100644 --- a/desktop/libs/liboozie/src/liboozie/conf.py +++ b/desktop/libs/liboozie/src/liboozie/conf.py @@ -15,20 +15,16 @@ # See the License for the specific language governing permissions and # limitations under the License. -from builtins import oct, object -import logging import sys +import logging +from builtins import object, oct + +from django.utils.translation import gettext as _, gettext_lazy as _t from desktop import appmanager from desktop.conf import default_ssl_validate from desktop.lib.conf import Config, coerce_bool, validate_path -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _, gettext_lazy as _t -else: - from django.utils.translation import ugettext as _, ugettext_lazy as _t - - LOG = logging.getLogger() @@ -55,7 +51,7 @@ ) ) -SSL_CERT_CA_VERIFY=Config( +SSL_CERT_CA_VERIFY = Config( key="ssl_cert_ca_verify", help="In secure mode (HTTPS), if SSL certificates from Oozie Rest APIs have to be verified against certificate authority", dynamic_default=default_ssl_validate, @@ -79,9 +75,9 @@ def get_oozie_status(user): status = 'down' try: - if not 'test' in sys.argv: # Avoid tests hanging + if 'test' not in sys.argv: # Avoid tests hanging status = str(get_oozie(user).get_oozie_status()) - except: + except Exception: LOG.exception('failed to get oozie status') return status @@ -131,9 +127,16 @@ def config_validator(user): api = get_oozie(user, api_version="v2") configuration = api.get_configuration() - if 'org.apache.oozie.service.MetricsInstrumentationService' in [c.strip() for c in configuration.get('oozie.services.ext', '').split(',')]: + if 'org.apache.oozie.service.MetricsInstrumentationService' in [ + c.strip() for c in configuration.get('oozie.services.ext', '').split(',') + ]: metrics = api.get_metrics() - sharelib_url = 'gauges' in metrics and 'libs.sharelib.system.libpath' in metrics['gauges'] and [metrics['gauges']['libs.sharelib.system.libpath']['value']] or [] + sharelib_url = ( + 'gauges' in metrics + and 'libs.sharelib.system.libpath' in metrics['gauges'] + and [metrics['gauges']['libs.sharelib.system.libpath']['value']] + or [] + ) else: intrumentation = api.get_instrumentation() sharelib_url = [ diff --git a/desktop/libs/liboozie/src/liboozie/conf_tests.py b/desktop/libs/liboozie/src/liboozie/conf_tests.py index c45db57cd09..0429d8061cb 100644 --- a/desktop/libs/liboozie/src/liboozie/conf_tests.py +++ b/desktop/libs/liboozie/src/liboozie/conf_tests.py @@ -15,19 +15,14 @@ # See the License for the specific language governing permissions and # limitations under the License. -import pytest import sys +from unittest.mock import Mock, patch -from useradmin.models import User -from desktop.lib.django_test_util import make_logged_in_client +import pytest +from desktop.lib.django_test_util import make_logged_in_client from liboozie.conf import config_validator - - -if sys.version_info[0] > 2: - from unittest.mock import patch, Mock -else: - from mock import patch, Mock +from useradmin.models import User @pytest.mark.django_db diff --git a/desktop/libs/liboozie/src/liboozie/credentials.py b/desktop/libs/liboozie/src/liboozie/credentials.py index 811d61f804a..c22915c0bbf 100644 --- a/desktop/libs/liboozie/src/liboozie/credentials.py +++ b/desktop/libs/liboozie/src/liboozie/credentials.py @@ -15,15 +15,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -from builtins import object -import logging import sys +import logging +from builtins import object -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - +from django.utils.translation import gettext as _ LOG = logging.getLogger() @@ -46,15 +42,16 @@ def fetch(self, oozie_api): self.credentials = self._parse_oozie(configuration) def _parse_oozie(self, configuration_dic): - return dict([cred.strip().split('=') for cred in configuration_dic.get('oozie.credentials.credentialclasses', '').strip().split(',') if cred]) + return dict( + [cred.strip().split('=') for cred in configuration_dic.get('oozie.credentials.credentialclasses', '').strip().split(',') if cred]) @property def class_to_name_credentials(self): - return dict((v,k) for k, v in self.credentials.items()) + return dict((v, k) for k, v in self.credentials.items()) def get_properties(self, hive_properties=None): credentials = {} - from beeswax import hive_site, conf + from beeswax import conf, hive_site if not hasattr(conf.HIVE_SERVER_HOST, 'get') or not conf.HIVE_SERVER_HOST.get(): LOG.warning('Could not get all the Oozie credentials: beeswax app is blacklisted.') diff --git a/desktop/libs/liboozie/src/liboozie/oozie_api.py b/desktop/libs/liboozie/src/liboozie/oozie_api.py index 348db58665a..7709763739d 100644 --- a/desktop/libs/liboozie/src/liboozie/oozie_api.py +++ b/desktop/libs/liboozie/src/liboozie/oozie_api.py @@ -14,25 +14,31 @@ # See the License for the specific language governing permissions and # limitations under the License. -from builtins import object +import sys import logging import posixpath -import sys +from builtins import object -from desktop.conf import TIME_ZONE -from desktop.conf import DEFAULT_USER +from desktop.conf import DEFAULT_USER, TIME_ZONE from desktop.lib.rest.http_client import HttpClient from desktop.lib.rest.resource import Resource - -from liboozie.conf import SECURITY_ENABLED, OOZIE_URL, SSL_CERT_CA_VERIFY -from liboozie.types import WorkflowList, CoordinatorList, Coordinator, Workflow,\ - CoordinatorAction, WorkflowAction, BundleList, Bundle, BundleAction +from liboozie.conf import OOZIE_URL, SECURITY_ENABLED, SSL_CERT_CA_VERIFY +from liboozie.types import ( + Bundle, + BundleAction, + BundleList, + Coordinator, + CoordinatorAction, + CoordinatorList, + Workflow, + WorkflowAction, + WorkflowList, +) from liboozie.utils import config_gen - LOG = logging.getLogger() DEFAULT_USER = DEFAULT_USER.get() -API_VERSION = 'v1' # Overridden to v2 for SLA +API_VERSION = 'v1' # Overridden to v2 for SLA _XML_CONTENT_TYPE = 'application/xml;charset=UTF-8' @@ -178,10 +184,9 @@ def get_job_definition(self, jobid): params = self._get_params() params['show'] = 'definition' job_def = self._root.get('job/%s' % (jobid,), params) - if sys.version_info[0] > 2: - job_def = job_def.decode() - return job_def + job_def = job_def.decode() + return job_def def get_job_log(self, jobid, logfilter=None): """ @@ -199,10 +204,9 @@ def get_job_log(self, jobid, logfilter=None): filter_list.append('%s=%s' % (key, val)) params['logfilter'] = ';'.join(filter_list) log = self._root.get('job/%s' % (jobid,), params) - if sys.version_info[0] > 2: - log = log.decode() - return log + log = log.decode() + return log def get_job_graph(self, jobid, format='svg'): params = self._get_params() @@ -212,7 +216,6 @@ def get_job_graph(self, jobid, format='svg'): svg_data = self._root.get('job/%s' % (jobid,), params) return svg_data - def get_job_status(self, jobid): params = self._get_params() params['show'] = 'status' @@ -247,8 +250,7 @@ def job_control(self, jobid, action, properties=None, parameters=None): params.update(parameters) resp = self._root.put('job/%s' % jobid, params, data=config_gen(properties), contenttype=_XML_CONTENT_TYPE) - if sys.version_info[0] > 2: - resp = resp.decode() + resp = resp.decode() return resp diff --git a/desktop/libs/liboozie/src/liboozie/submission2.py b/desktop/libs/liboozie/src/liboozie/submission2.py index 06ce7a01eec..da3a09519c1 100644 --- a/desktop/libs/liboozie/src/liboozie/submission2.py +++ b/desktop/libs/liboozie/src/liboozie/submission2.py @@ -15,16 +15,16 @@ # See the License for the specific language governing permissions and # limitations under the License. -from builtins import object -import errno -import logging import os import sys import time - +import errno +import logging +from builtins import object from string import Template from django.utils.functional import wraps +from django.utils.translation import gettext as _ from beeswax.hive_site import get_hive_site_content from desktop.lib.exceptions_renderable import PopupException @@ -32,21 +32,14 @@ from desktop.lib.parameterization import find_variables from desktop.lib.paths import get_desktop_root from desktop.models import Document2 -from indexer.conf import CONFIG_JDBC_LIBS_PATH -from metadata.conf import ALTUS -from oozie.utils import convert_to_server_timezone - from hadoop import cluster from hadoop.fs.hadoopfs import Hdfs - +from indexer.conf import CONFIG_JDBC_LIBS_PATH from liboozie.conf import REMOTE_DEPLOYMENT_DIR, USE_LIBPATH_FOR_JARS from liboozie.credentials import Credentials from liboozie.oozie_api import get_oozie - -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ +from metadata.conf import ALTUS +from oozie.utils import convert_to_server_timezone LOG = logging.getLogger() @@ -96,7 +89,7 @@ def __init__(self, user, job=None, fs=None, jt=None, properties=None, oozie_id=N local_tz = self.job.data.get('properties')['timezone'] # Modify start_date & end_date only when it's a coordinator - from oozie.models2 import Coordinator, Bundle + from oozie.models2 import Bundle, Coordinator if type(self.job) is Coordinator: if 'start_date' in self.properties: properties['start_date'] = convert_to_server_timezone(self.properties['start_date'], local_tz) @@ -292,8 +285,8 @@ def deploy(self, deployment_dir=None): self.fs.do_as_user(self.user, self.fs.copyFromLocal, os.path.join(source_path, name), destination_path) elif action.data['type'] == 'impala' or action.data['type'] == 'impala-document': - from oozie.models2 import _get_impala_url from impala.impala_flags import get_ssl_server_certificate + from oozie.models2 import _get_impala_url if action.data['type'] == 'impala-document': from notebook.models import Notebook @@ -620,7 +613,7 @@ def _create_file(self, deployment_dir, file_name, data, do_as=False): # In Py3 because of i18n, the xml data is not properly utf-8 encoded for some languages. # This can later throw UnicodeEncodeError exception for request body in HDFS or other FS API calls. To tackle this, # We are converting the data into bytes by utf-8 encoding instead of str type. - data = smart_str(data).encode('utf-8') if sys.version_info[0] > 2 else smart_str(data) + data = smart_str(data).encode('utf-8') if do_as: self.fs.do_as_user(self.user, self.fs.create, file_path, overwrite=True, permission=0o644, data=data) @@ -679,7 +672,7 @@ def _generate_altus_job_action_script(self, service, cluster, jobs, auth_key_id, else: hostname = ALTUS.HOSTNAME.get() - if type(cluster) == dict: + if type(cluster) is dict: command = 'createAWSCluster' arguments = cluster else: diff --git a/desktop/libs/liboozie/src/liboozie/submittion.py b/desktop/libs/liboozie/src/liboozie/submittion.py index ec299d12daf..2d00b8a0c7f 100644 --- a/desktop/libs/liboozie/src/liboozie/submittion.py +++ b/desktop/libs/liboozie/src/liboozie/submittion.py @@ -15,30 +15,25 @@ # See the License for the specific language governing permissions and # limitations under the License. -from builtins import object -import errno -import logging import os import re import sys import time +import errno +import logging +from builtins import object from django.utils.functional import wraps +from django.utils.translation import gettext as _ from desktop.lib.exceptions_renderable import PopupException from desktop.lib.i18n import smart_str from desktop.lib.parameterization import find_variables from hadoop import cluster from hadoop.fs.hadoopfs import Hdfs - -from liboozie.oozie_api import get_oozie from liboozie.conf import REMOTE_DEPLOYMENT_DIR from liboozie.credentials import Credentials - -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ +from liboozie.oozie_api import get_oozie LOG = logging.getLogger() @@ -51,7 +46,7 @@ def decorate(self, deployment_dir=None): jt_address = cluster.get_cluster_addr_for_job_submission() if deployment_dir is None: - self._update_properties(jt_address) # Needed as we need to set some properties like Credentials before + self._update_properties(jt_address) # Needed as we need to set some properties like Credentials before deployment_dir = self.deploy() self._update_properties(jt_address, deployment_dir) @@ -73,7 +68,7 @@ def __init__(self, user, job=None, fs=None, jt=None, properties=None, oozie_id=N self.job = job self.user = user self.fs = fs - self.jt = jt # Deprecated with YARN, we now use logical names only for RM + self.jt = jt # Deprecated with YARN, we now use logical names only for RM self.oozie_id = oozie_id self.api = get_oozie(self.user) @@ -116,7 +111,7 @@ def rerun(self, deployment_dir, fail_nodes=None, skip_nodes=None): if fail_nodes: self.properties.update({'oozie.wf.rerun.failnodes': fail_nodes}) elif not skip_nodes: - self.properties.update({'oozie.wf.rerun.failnodes': 'false'}) # Case empty 'skip_nodes' list + self.properties.update({'oozie.wf.rerun.failnodes': 'false'}) # Case empty 'skip_nodes' list else: self.properties.update({'oozie.wf.rerun.skip.nodes': skip_nodes}) @@ -126,7 +121,6 @@ def rerun(self, deployment_dir, fail_nodes=None, skip_nodes=None): return self.oozie_id - def rerun_coord(self, deployment_dir, params): jt_address = cluster.get_cluster_addr_for_job_submission() @@ -138,7 +132,6 @@ def rerun_coord(self, deployment_dir, params): return self.oozie_id - def rerun_bundle(self, deployment_dir, params): jt_address = cluster.get_cluster_addr_for_job_submission() @@ -149,7 +142,6 @@ def rerun_bundle(self, deployment_dir, params): return self.oozie_id - def deploy(self): try: deployment_dir = self._create_deployment_dir() @@ -160,10 +152,10 @@ def deploy(self): if self.api.security_enabled: jt_address = cluster.get_cluster_addr_for_job_submission() - self._update_properties(jt_address) # Needed for coordinator deploying workflows + self._update_properties(jt_address) # Needed for coordinator deploying workflows oozie_xml = self.job.to_xml(self.properties) - self._do_as(self.user.username , self._copy_files, deployment_dir, oozie_xml) + self._do_as(self.user.username, self._copy_files, deployment_dir, oozie_xml) if hasattr(self.job, 'actions'): for action in self.job.actions: @@ -176,7 +168,6 @@ def deploy(self): return deployment_dir - def get_external_parameters(self, application_path): """From XML and job.properties HDFS files""" deployment_dir = os.path.dirname(application_path) @@ -192,11 +183,16 @@ def get_external_parameters(self, application_path): def _get_external_parameters(self, xml, properties=None): from oozie.models import DATASET_FREQUENCY - parameters = dict([(var, '') for var in find_variables(xml, include_named=False) if not self._is_coordinator() or var not in DATASET_FREQUENCY]) + parameters = dict( + [(var, '') for var in find_variables(xml, include_named=False) if not self._is_coordinator() or var not in DATASET_FREQUENCY]) if properties: - parameters.update(dict([re.split(r'(? 2: - from io import BytesIO as string_io - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - from cStringIO import StringIO as string_io +from desktop.lib import i18n +from desktop.lib.exceptions_renderable import PopupException +from desktop.log.access import access_warn +from liboozie.utils import catch_unicode_time, format_time, parse_timestamp LOG = logging.getLogger() @@ -256,7 +243,7 @@ def _fixup(self): else: self.conf_dict = {} - self.title = ' %s-%s'% (self.actionNumber, format_time(self.nominalTime)) + self.title = ' %s-%s' % (self.actionNumber, format_time(self.nominalTime)) class BundleAction(Action): @@ -311,7 +298,7 @@ def get_progress(self): """How much more time before the next action.""" if self.lastAction is None: return 0 - + next = mktime(parse_timestamp(self.lastAction)) start = mktime(parse_timestamp(self.startTime)) end = mktime(parse_timestamp(self.endTime)) @@ -325,7 +312,7 @@ def get_progress(self): class Job(object): - MAX_LOG_SIZE = 3500 * 20 # 20 pages + MAX_LOG_SIZE = 3500 * 20 # 20 pages """ Accessing log and definition will trigger Oozie API calls. @@ -501,10 +488,10 @@ def get_absolute_url(self, format='html'): def get_progress(self, full_node_list=None): if self.status in ('SUCCEEDED', 'KILLED', 'FAILED'): - return 100 # Case of decision nodes + return 100 # Case of decision nodes else: if full_node_list is not None: # Should remove the un-reached branches if decision node - total_actions = len(full_node_list) - 1 # -1 because of Kill node + total_actions = len(full_node_list) - 1 # -1 because of Kill node else: total_actions = len(self.actions) return int(sum([action.is_finished() for action in self.actions]) / float(max(total_actions, 1)) * 100) @@ -560,7 +547,6 @@ def _fixup(self): if self.pauseTime: self.pauseTime = parse_timestamp(self.pauseTime) - # For when listing/mixing all the jobs together self.id = self.coordJobId self.appName = self.coordJobName @@ -723,4 +709,3 @@ def __init__(self, api, json_dict, filters=None): class BundleList(JobList): def __init__(self, api, json_dict, filters=None): super(BundleList, self).__init__(Bundle, 'bundlejobs', api, json_dict, filters) - diff --git a/desktop/libs/liboozie/src/liboozie/utils.py b/desktop/libs/liboozie/src/liboozie/utils.py index fc53d641dce..e21240b0cbb 100644 --- a/desktop/libs/liboozie/src/liboozie/utils.py +++ b/desktop/libs/liboozie/src/liboozie/utils.py @@ -18,34 +18,20 @@ """ Misc helper functions """ -from __future__ import print_function -from future import standard_library -standard_library.install_aliases() -from past.builtins import basestring - -import logging import re -import sys import time - +import logging from datetime import datetime -from dateutil.parser import parse +from io import StringIO as string_io from time import strftime from xml.sax.saxutils import escape -if sys.version_info[0] > 2: - from io import StringIO as string_io - new_str = str -else: - try: - from cStringIO import StringIO as string_io - except: - from StringIO import StringIO as string_io - new_str = unicode +from dateutil.parser import parse +from past.builtins import basestring LOG = logging.getLogger() -_NAME_REGEX = re.compile('^[a-zA-Z][\-_a-zA-Z0-0]*$') +_NAME_REGEX = re.compile(r'^[a-zA-Z][\-_a-zA-Z0-0]*$') def catch_unicode_time(u_time): @@ -67,7 +53,7 @@ def parse_timestamp(timestamp, time_format=None): return time.strptime(timestamp, time_format) except ValueError: try: - return time.strptime(re.sub(' \w+$', '', timestamp), time_format.replace(' %Z', '')) + return time.strptime(re.sub(r' \w+$', '', timestamp), time_format.replace(' %Z', '')) except ValueError: LOG.error("Failed to convert Oozie timestamp: %s" % time_format) except Exception: @@ -84,7 +70,7 @@ def config_gen(dic): print("", file=sio) # if dic's key contains <,>,& then it will be escaped and if dic's value contains ']]>' then ']]>' will be stripped for k, v in sorted(dic.items()): - print("\n %s\n \n\n" \ + print("\n %s\n \n\n" % (escape(k), v.replace(']]>', '') if isinstance(v, basestring) else v), file=sio) print("", file=sio) sio.flush() @@ -95,23 +81,24 @@ def config_gen(dic): def is_valid_node_name(name): return _NAME_REGEX.match(name) is not None + def format_time(time, format='%d %b %Y %H:%M:%S'): if time is None: return '' fmt_time = None - if type(time) == new_str: + if type(time) is str: return time else: try: fmt_time = strftime(format, time) - except: + except Exception: fmt_time = None if fmt_time is None: try: - fmt_time = strftime(format+" %f", time) - except: + fmt_time = strftime(format + " %f", time) + except Exception: fmt_time = None return fmt_time diff --git a/desktop/libs/librdbms/src/librdbms/conf.py b/desktop/libs/librdbms/src/librdbms/conf.py index 9419d7a096e..f47ed42df89 100644 --- a/desktop/libs/librdbms/src/librdbms/conf.py +++ b/desktop/libs/librdbms/src/librdbms/conf.py @@ -17,16 +17,10 @@ import sys -from desktop.lib.conf import Config, UnspecifiedConfigSection,\ - ConfigSection, coerce_json_dict,\ - coerce_password_from_script -from desktop.conf import coerce_database - -if sys.version_info[0] > 2: - from django.utils.translation import gettext_lazy as _t, gettext as _ -else: - from django.utils.translation import ugettext_lazy as _t, ugettext as _ +from django.utils.translation import gettext as _, gettext_lazy as _t +from desktop.conf import coerce_database +from desktop.lib.conf import Config, ConfigSection, UnspecifiedConfigSection, coerce_json_dict, coerce_password_from_script DATABASES = UnspecifiedConfigSection( key="databases", @@ -102,7 +96,8 @@ def config_validator(user): if engine in ('sqlite', 'sqlite3'): res.append((DATABASES[server].NAME, _("Database name should not be empty for the SQLite backend."))) if engine == 'oracle': - res.append((DATABASES[server].NAME, _("Database name should not be empty for the Oracle backend. It should be the SID of your database."))) + res.append(( + DATABASES[server].NAME, _("Database name should not be empty for the Oracle backend. It should be the SID of your database."))) if engine in ('postgresql_psycopg2', 'postgresql'): res.append((DATABASES[server].NAME, _("Database name should not be empty for the PostgreSQL backend."))) diff --git a/desktop/libs/librdbms/src/librdbms/design.py b/desktop/libs/librdbms/src/librdbms/design.py index e4f75c9df6b..852c8e7f6b4 100644 --- a/desktop/libs/librdbms/src/librdbms/design.py +++ b/desktop/libs/librdbms/src/librdbms/design.py @@ -19,21 +19,17 @@ The HQLdesign class can (de)serialize a design to/from a QueryDict. """ -from builtins import object +import sys import json import logging -import sys +from builtins import object import django.http +from django.utils.translation import gettext as _ -from beeswax.design import normalize_form_dict, denormalize_form_dict, split_statements +from beeswax.design import denormalize_form_dict, normalize_form_dict, split_statements from notebook.sql_utils import strip_trailing_semicolon -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - LOG = logging.getLogger() SERIALIZATION_VERSION = "0.0.1" @@ -43,12 +39,12 @@ class SQLdesign(object): """ Represents an SQL design, with methods to perform (de)serialization. """ - _QUERY_ATTRS = [ 'query', 'type', 'database', 'server' ] + _QUERY_ATTRS = ['query', 'type', 'database', 'server'] def __init__(self, form=None, query_type=None): """Initialize the design from a valid form data.""" if form is not None: - self._data_dict = dict(query = normalize_form_dict(form, SQLdesign._QUERY_ATTRS)) + self._data_dict = dict(query=normalize_form_dict(form, SQLdesign._QUERY_ATTRS)) if query_type is not None: self._data_dict['query']['type'] = query_type @@ -98,7 +94,8 @@ def get_query_statement(self, n=0): @property def statements(self): sql_query = strip_trailing_semicolon(self.sql_query) - return [strip_trailing_semicolon(statement.strip()) for (start_row, start_col), (end_row, end_col), statement in split_statements(sql_query)] + return [ + strip_trailing_semicolon(statement.strip()) for (start_row, start_col), (end_row, end_col), statement in split_statements(sql_query)] @staticmethod def loads(data): @@ -119,4 +116,4 @@ def loads(data): design = SQLdesign() design._data_dict = dic - return design \ No newline at end of file + return design diff --git a/desktop/libs/librdbms/src/librdbms/server/mysql_lib.py b/desktop/libs/librdbms/src/librdbms/server/mysql_lib.py index 18b2e748485..9dd23d785cf 100644 --- a/desktop/libs/librdbms/src/librdbms/server/mysql_lib.py +++ b/desktop/libs/librdbms/src/librdbms/server/mysql_lib.py @@ -15,8 +15,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -import logging import sys +import logging try: import MySQLdb as Database @@ -33,23 +33,20 @@ from django.core.exceptions import ImproperlyConfigured raise ImproperlyConfigured("MySQLdb-1.2.1p2 or newer is required; you have %s" % Database.__version__) +from django.utils.translation import gettext as _ from MySQLdb.converters import FIELD_TYPE from librdbms.server.rdbms_base_lib import BaseRDBMSDataTable, BaseRDBMSResult, BaseRDMSClient -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - - LOG = logging.getLogger() -class DataTable(BaseRDBMSDataTable): pass +class DataTable(BaseRDBMSDataTable): + pass -class Result(BaseRDBMSResult): pass +class Result(BaseRDBMSResult): + pass def _convert_types(t): @@ -109,7 +106,6 @@ def __init__(self, *args, **kwargs): super(MySQLClient, self).__init__(*args, **kwargs) self.connection = Database.connect(**self._conn_params) - @property def _conn_params(self): params = { @@ -128,7 +124,6 @@ def _conn_params(self): return params - def use(self, database): if 'db' in self._conn_params and self._conn_params['db'] != database: raise RuntimeError(_("Database '%s' is not allowed. Please use database '%s'.") % (database, self._conn_params['db'])) @@ -137,7 +132,6 @@ def use(self, database): cursor.execute("USE `%s`" % database) self.connection.commit() - def execute_statement(self, statement): cursor = self.connection.cursor() cursor.execute(statement) @@ -149,7 +143,6 @@ def execute_statement(self, statement): columns = [] return self.data_table_cls(cursor, columns) - def get_databases(self): cursor = self.connection.cursor() cursor.execute("SHOW DATABASES") @@ -163,7 +156,6 @@ def get_databases(self): else: return databases - def get_tables(self, database, table_names=[]): cursor = self.connection.cursor() query = 'SHOW TABLES' @@ -175,7 +167,6 @@ def get_tables(self, database, table_names=[]): self.connection.commit() return [row[0] for row in cursor.fetchall()] - def get_columns(self, database, table, names_only=True): cursor = self.connection.cursor() cursor.execute("SHOW COLUMNS FROM %s.%s" % (database, table)) @@ -186,7 +177,6 @@ def get_columns(self, database, table, names_only=True): columns = [dict(name=row[0], type=row[1], comment='') for row in cursor.fetchall()] return columns - def get_sample_data(self, database, table, column=None, limit=100): column = '`%s`' % column if column else '*' statement = "SELECT %s FROM `%s`.`%s` LIMIT %d" % (column, database, table, limit) diff --git a/desktop/libs/libsaml/src/libsaml/conf.py b/desktop/libs/libsaml/src/libsaml/conf.py index 7b36c869db3..546ef3c1222 100644 --- a/desktop/libs/libsaml/src/libsaml/conf.py +++ b/desktop/libs/libsaml/src/libsaml/conf.py @@ -15,18 +15,15 @@ # See the License for the specific language governing permissions and # limitations under the License. +import os +import sys import json import logging -import os import subprocess -import sys -from desktop.lib.conf import Config, coerce_bool, coerce_csv, coerce_password_from_script +from django.utils.translation import gettext as _, gettext_lazy as _t -if sys.version_info[0] > 2: - from django.utils.translation import gettext_lazy as _t, gettext as _ -else: - from django.utils.translation import ugettext_lazy as _t, ugettext as _ +from desktop.lib.conf import Config, coerce_bool, coerce_csv, coerce_password_from_script LOG = logging.getLogger() @@ -212,6 +209,7 @@ def dict_list_map(value): default="", help=_t("To log users out of magic-sso, CDP control panel use Logout URL")) + def get_key_file_password(): password = os.environ.get('HUE_SAML_KEY_FILE_PASSWORD') if password is not None: @@ -230,6 +228,7 @@ def config_validator(user): res.append(("libsaml.username_source", _("username_source not configured properly. SAML integration may not work."))) return res + def get_logout_redirect_url(): # This logic was derived from KNOX. prod_url = "consoleauth.altus.cloudera.com" diff --git a/desktop/libs/libsaml/src/libsaml/tests.py b/desktop/libs/libsaml/src/libsaml/tests.py index e9ab55ea0b5..ecb727887bb 100644 --- a/desktop/libs/libsaml/src/libsaml/tests.py +++ b/desktop/libs/libsaml/src/libsaml/tests.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -## -*- coding: utf-8 -*- +# -*- coding: utf-8 -*- # Licensed to Cloudera, Inc. under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information @@ -17,19 +17,16 @@ # limitations under the License. import sys +from unittest.mock import Mock, patch from libsaml.conf import xmlsec -if sys.version_info[0] > 2: - from unittest.mock import patch, Mock -else: - from mock import patch, Mock def test_xmlsec_dynamic_default_no_which(): with patch('libsaml.conf.subprocess') as subprocess: subprocess.Popen = Mock( - side_effect = OSError('No such file or directory. `which` command is not present') + side_effect=OSError('No such file or directory. `which` command is not present') ) assert '/usr/local/bin/xmlsec1' == xmlsec() diff --git a/desktop/libs/libsaml/src/libsaml/urls.py b/desktop/libs/libsaml/src/libsaml/urls.py index d1eafe16fda..bdb1d16fbf3 100644 --- a/desktop/libs/libsaml/src/libsaml/urls.py +++ b/desktop/libs/libsaml/src/libsaml/urls.py @@ -15,18 +15,16 @@ # See the License for the specific language governing permissions and # limitations under the License. -import logging import sys +import logging -if sys.version_info[0] > 2: - from django.urls import re_path -else: - from django.conf.urls import url as re_path +from django.urls import re_path LOG = logging.getLogger() try: from djangosaml2 import views as djangosaml2_views + from libsaml import views as libsaml_views except ImportError: LOG.warning('djangosaml2 module not found') diff --git a/desktop/libs/libsentry/src/libsentry/api.py b/desktop/libs/libsentry/src/libsentry/api.py index 62e96d53f0f..e22a3bade64 100644 --- a/desktop/libs/libsentry/src/libsentry/api.py +++ b/desktop/libs/libsentry/src/libsentry/api.py @@ -15,24 +15,19 @@ # See the License for the specific language governing permissions and # limitations under the License. -from builtins import object -import logging import sys +import logging import threading +from builtins import object + +from django.utils.translation import gettext as _ from desktop.lib.exceptions import StructuredThriftTransportException from desktop.lib.exceptions_renderable import PopupException - from libsentry.client import SentryClient -from libsentry.sentry_ha import get_next_available_server, create_client +from libsentry.sentry_ha import create_client, get_next_available_server from libsentry.sentry_site import get_sentry_server, is_ha_enabled -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - - LOG = logging.getLogger() API_CACHE = None @@ -233,7 +228,6 @@ def rename_sentry_privileges(self, oldAuthorizable, newAuthorizable): else: raise SentryException(response) - def _massage_privilege(self, privilege): return { 'scope': privilege.privilegeScope, @@ -247,7 +241,6 @@ def _massage_privilege(self, privilege): 'column': privilege.columnName, } - def _massage_authorizable(self, authorizable): return { 'server': authorizable.server, diff --git a/desktop/libs/libsentry/src/libsentry/api2.py b/desktop/libs/libsentry/src/libsentry/api2.py index 82be39f88e8..0f1cab6cd1c 100644 --- a/desktop/libs/libsentry/src/libsentry/api2.py +++ b/desktop/libs/libsentry/src/libsentry/api2.py @@ -15,24 +15,19 @@ # See the License for the specific language governing permissions and # limitations under the License. -from builtins import object -import logging import sys +import logging import threading +from builtins import object + +from django.utils.translation import gettext as _ from desktop.lib.exceptions import StructuredThriftTransportException from desktop.lib.exceptions_renderable import PopupException - from libsentry.client2 import SentryClient -from libsentry.sentry_ha import get_next_available_server, create_client +from libsentry.sentry_ha import create_client, get_next_available_server from libsentry.sentry_site import get_sentry_server, is_ha_enabled -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - - LOG = logging.getLogger() API_CACHE = None @@ -49,7 +44,8 @@ def decorator(*args, **kwargs): raise PopupException(_('Failed to connect to Sentry server %s, and Sentry HA is not enabled.') % args[0].client.host, detail=e) else: LOG.warning("Failed to connect to Sentry server %s, will attempt to find next available host." % args[0].client.host) - server, attempts = get_next_available_server(client_class=SentryClient, username=args[0].client.username, failed_host=args[0].client.host, component=args[0].client.component) + server, attempts = get_next_available_server( + client_class=SentryClient, username=args[0].client.username, failed_host=args[0].client.host, component=args[0].client.component) if server is not None: args[0].client = create_client(SentryClient, args[0].client.username, server, args[0].client.component) set_api_cache(server) @@ -236,7 +232,6 @@ def rename_sentry_privileges(self, oldAuthorizable, newAuthorizable): else: raise SentryException(response) - def _massage_privilege(self, privilege): return { 'component': privilege.component, @@ -248,11 +243,9 @@ def _massage_privilege(self, privilege): 'grantOption': privilege.grantOption == 1, } - def _massage_authorizable(self, authorizables): return [{'type': auth.type, 'name': auth.name} for auth in authorizables] - def _massage_string_authorizable(self, authorizables): return [{'type': auth.split('=')[0], 'name': auth.split('=')[1]} for auth in authorizables.split('->')] diff --git a/desktop/libs/libsentry/src/libsentry/conf.py b/desktop/libs/libsentry/src/libsentry/conf.py index 0ea4bab762a..b8a22c08974 100644 --- a/desktop/libs/libsentry/src/libsentry/conf.py +++ b/desktop/libs/libsentry/src/libsentry/conf.py @@ -15,29 +15,24 @@ # See the License for the specific language governing permissions and # limitations under the License. -import logging import os -import sys - -from desktop.lib.conf import Config +import logging -if sys.version_info[0] > 2: - from django.utils.translation import gettext_lazy as _t -else: - from django.utils.translation import ugettext_lazy as _t +from django.utils.translation import gettext_lazy as _t +from desktop.lib.conf import Config LOG = logging.getLogger() -HOSTNAME=Config( +HOSTNAME = Config( key='hostname', help=_t('Hostname or IP of server.'), type=str, default='localhost', ) -PORT=Config( +PORT = Config( key='port', help=_t('Port the sentry service is running on.'), type=int, @@ -50,7 +45,7 @@ default=os.environ.get("SENTRY_CONF_DIR", '/etc/sentry/conf') ) -PRIVILEGE_CHECKER_CACHING=Config( +PRIVILEGE_CHECKER_CACHING = Config( key='privilege_checker_caching', help=_t('Number of seconds when the privilege list of a user is cached.'), type=int, @@ -59,7 +54,7 @@ def is_enabled(): - from hadoop import cluster # Avoid dependencies conflicts + from hadoop import cluster # Avoid dependencies conflicts cluster = cluster.get_cluster_conf_for_job_submission() return HOSTNAME.get() != 'localhost' and cluster.SECURITY_ENABLED.get() diff --git a/desktop/libs/libsentry/src/libsentry/sentry_ha.py b/desktop/libs/libsentry/src/libsentry/sentry_ha.py index fb5726fce9a..f20e5cdfa4b 100644 --- a/desktop/libs/libsentry/src/libsentry/sentry_ha.py +++ b/desktop/libs/libsentry/src/libsentry/sentry_ha.py @@ -15,22 +15,17 @@ # See the License for the specific language governing permissions and # limitations under the License. -import logging import sys import time +import logging + +from django.utils.translation import gettext as _ from desktop.lib.exceptions import StructuredThriftTransportException from desktop.lib.exceptions_renderable import PopupException - from libsentry.client2 import SentryClient from libsentry.sentry_site import get_sentry_server -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - - LOG = logging.getLogger() @@ -67,7 +62,8 @@ def get_next_available_server(client_class, username, failed_host=None, componen client = create_client_fn(client_class, username, next_server, component) client.list_sentry_roles_by_group(groupName='*') # If above operation succeeds, return client - LOG.info('Successfully connected to Sentry server %s, after attempting [%s], returning client.' % (client.host, ', '.join(attempted_hosts))) + LOG.info( + 'Successfully connected to Sentry server %s, after attempting [%s], returning client.' % (client.host, ', '.join(attempted_hosts))) return next_server, attempted_hosts except StructuredThriftTransportException as e: # If we have come back around to the original failed client, exit diff --git a/desktop/libs/libsentry/src/libsentry/sentry_site.py b/desktop/libs/libsentry/src/libsentry/sentry_site.py index 3ec02f83d70..817667db306 100644 --- a/desktop/libs/libsentry/src/libsentry/sentry_site.py +++ b/desktop/libs/libsentry/src/libsentry/sentry_site.py @@ -15,24 +15,18 @@ # See the License for the specific language governing permissions and # limitations under the License. +import sys import errno +import random import logging import os.path -import random -import sys -from hadoop import confparse +from django.utils.translation import gettext as _ from desktop.lib import security_util from desktop.lib.exceptions_renderable import PopupException - -from libsentry.conf import SENTRY_CONF_DIR, HOSTNAME, PORT - -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - +from hadoop import confparse +from libsentry.conf import HOSTNAME, PORT, SENTRY_CONF_DIR LOG = logging.getLogger() @@ -114,7 +108,7 @@ def get_sentry_server(current_host=None): servers = get_sentry_servers() hosts = [s['hostname'] for s in servers] - next_idx = random.randint(0, len(servers)-1) + next_idx = random.randint(0, len(servers) - 1) if current_host is not None and hosts: try: current_idx = hosts.index(current_host) @@ -161,7 +155,7 @@ def get_sentry_servers(): def _parse_sites(): global _SITE_DICT - _SITE_DICT ={} + _SITE_DICT = {} paths = [ ('sentry', os.path.join(SENTRY_CONF_DIR.get(), 'sentry-site.xml')), diff --git a/desktop/libs/libsentry/src/libsentry/test_client.py b/desktop/libs/libsentry/src/libsentry/test_client.py index 0985d9bd1b3..ee79a503e20 100644 --- a/desktop/libs/libsentry/src/libsentry/test_client.py +++ b/desktop/libs/libsentry/src/libsentry/test_client.py @@ -17,19 +17,12 @@ import os import shutil -import sys import tempfile from libsentry import sentry_site -from libsentry.conf import SENTRY_CONF_DIR -from libsentry.sentry_site import get_sentry_server_principal,\ - get_sentry_server_admin_groups from libsentry.client import SentryClient - -if sys.version_info[0] > 2: - open_file = open -else: - open_file = file +from libsentry.conf import SENTRY_CONF_DIR +from libsentry.sentry_site import get_sentry_server_admin_groups, get_sentry_server_principal def test_security_plain(): @@ -38,7 +31,7 @@ def test_security_plain(): try: xml = sentry_site_xml(provider='default') - open_file(os.path.join(tmpdir, 'sentry-site.xml'), 'w').write(xml) + open(os.path.join(tmpdir, 'sentry-site.xml'), 'w').write(xml) sentry_site.reset() assert 'test/test.com@TEST.COM' == get_sentry_server_principal() @@ -47,7 +40,7 @@ def test_security_plain(): security = SentryClient('test.com', 11111, 'test')._get_security() assert 'test' == security['kerberos_principal_short_name'] - assert False == security['use_sasl'] + assert False is security['use_sasl'] assert 'NOSASL' == security['mechanism'] finally: sentry_site.reset() @@ -61,12 +54,12 @@ def test_security_kerberos(): try: xml = sentry_site_xml(provider='default', authentication='kerberos') - open_file(os.path.join(tmpdir, 'sentry-site.xml'), 'w').write(xml) + open(os.path.join(tmpdir, 'sentry-site.xml'), 'w').write(xml) sentry_site.reset() security = SentryClient('test.com', 11111, 'test')._get_security() - assert True == security['use_sasl'] + assert True is security['use_sasl'] assert 'GSSAPI' == security['mechanism'] finally: sentry_site.reset() diff --git a/desktop/libs/libsolr/src/libsolr/api.py b/desktop/libs/libsolr/src/libsolr/api.py index d8cf488f277..b2ed89337de 100644 --- a/desktop/libs/libsolr/src/libsolr/api.py +++ b/desktop/libs/libsolr/src/libsolr/api.py @@ -16,52 +16,37 @@ # See the License for the specific language governing permissions and # limitations under the License. -from future import standard_library -standard_library.install_aliases() -from builtins import next -from builtins import str -from builtins import zip -from builtins import object +import re import json import logging -import re -import sys - +import urllib.error +import urllib.parse +import urllib.request from itertools import groupby +from urllib.parse import quote as urllib_quote, unquote as urllib_unquote + +from django.utils.translation import gettext as _ from dashboard.facet_builder import _compute_range_facet from dashboard.models import Collection2 -from desktop.lib.exceptions_renderable import PopupException from desktop.conf import SERVER_USER +from desktop.lib.exceptions_renderable import PopupException from desktop.lib.i18n import force_unicode -from desktop.lib.rest.http_client import HttpClient, RestException from desktop.lib.rest import resource - +from desktop.lib.rest.http_client import HttpClient, RestException from libsolr.conf import SSL_CERT_CA_VERIFY -if sys.version_info[0] > 2: - import urllib.request, urllib.parse, urllib.error - from urllib.parse import quote as urllib_quote - from urllib.parse import unquote as urllib_unquote - new_str = str - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - from urllib import quote as urllib_quote - from urllib import unquote as urllib_unquote - new_str = unicode - LOG = logging.getLogger() try: - from search.conf import EMPTY_QUERY, SECURITY_ENABLED, SOLR_URL, DOWNLOAD_LIMIT + from search.conf import DOWNLOAD_LIMIT, EMPTY_QUERY, SECURITY_ENABLED, SOLR_URL except ImportError as e: LOG.warning('Solr Search is not enabled') def utf_quoter(what): - return urllib_quote(new_str(what).encode('utf-8'), safe='~@#$&()*!+=;,.?/\'') + return urllib_quote(str(what).encode('utf-8'), safe='~@#$&()*!+=;,.?/\'') class SolrApi(object): @@ -92,7 +77,6 @@ def __init__(self, solr_url=None, user=None, security_enabled=False, ssl_cert_ca if self.security_enabled: self._root.invoke('HEAD', '/') - def query(self, collection, query): solr_query = {} json_facets = {} @@ -192,7 +176,7 @@ def query(self, collection, query): 'numBuckets': True, 'allBuckets': True, 'sort': sort - #'prefix': '' # Forbidden on numeric fields + # 'prefix': '' # Forbidden on numeric fields }) json_facets[facet['id']] = _f['facet'][dim_key] elif facet['type'] == 'function': @@ -200,7 +184,7 @@ def query(self, collection, query): json_facets[facet['id']] = self._get_aggregate_function(facet['properties']['facets'][0]) if facet['properties']['compare']['is_enabled']: # TODO: global compare override - unit = re.split('\d+', facet['properties']['compare']['gap'])[1] + unit = re.split(r'\d+', facet['properties']['compare']['gap'])[1] json_facets[facet['id']] = { 'type': 'range', 'field': collection['timeFilter'].get('field'), @@ -246,7 +230,7 @@ def query(self, collection, query): if nested_fields: fl += urllib_unquote(utf_quoter(',[child parentFilter="%s"]' % ' OR '.join(nested_fields))) - if collection['template']['moreLikeThis'] and fl != ['*']: # Potential conflict with nested documents + if collection['template']['moreLikeThis'] and fl != ['*']: # Potential conflict with nested documents id_field = collection.get('idField', 'id') params += ( ('mlt', 'true'), @@ -256,8 +240,8 @@ def query(self, collection, query): ('mlt.maxdf', 50), ('mlt.maxntp', 1000), ('mlt.count', 10), - #('mlt.minwl', 1), - #('mlt.maxwl', 1), + # ('mlt.minwl', 1), + # ('mlt.maxwl', 1), ) fl = '*' @@ -270,7 +254,7 @@ def query(self, collection, query): ('hl.fragsize', 1000), ) - #if query.get('timezone'): + # if query.get('timezone'): # params += (('TZ', query.get('timezone')),) if collection['template']['fieldsSelected']: @@ -296,7 +280,6 @@ def query(self, collection, query): return self._get_json(response) - def _n_facet_dimension(self, widget, _f, facets, dim, timeFilter, collection, can_range=None): facet = facets[0] f_name = 'dim_%02d:%s' % (dim, facet['field']) @@ -324,10 +307,10 @@ def _n_facet_dimension(self, widget, _f, facets, dim, timeFilter, collection, ca 'allBuckets': True, 'sort': sort, 'missing': facet.get('missing', False) - #'prefix': '' # Forbidden on numeric fields + # 'prefix': '' # Forbidden on numeric fields } if int(facet['mincount']): - _f[f_name]['mincount'] = int(facet['mincount']) # Forbidden on n > 0 field if mincount = 0 + _f[f_name]['mincount'] = int(facet['mincount']) # Forbidden on n > 0 field if mincount = 0 if 'start' in facet and not facet.get('type') == 'field': _f[f_name].update({ @@ -339,14 +322,14 @@ def _n_facet_dimension(self, widget, _f, facets, dim, timeFilter, collection, ca # Only on dim 1 currently if can_range or (timeFilter and timeFilter['time_field'] == facet['field'] - and (widget['id'] not in timeFilter['time_filter_overrides'])): # or facet['widgetType'] != 'bucket-widget'): + and (widget['id'] not in timeFilter['time_filter_overrides'])): # or facet['widgetType'] != 'bucket-widget'): facet['widgetType'] = widget['widgetType'] _f[f_name].update(self._get_time_filter_query(timeFilter, facet, collection)) if widget['widgetType'] == 'tree2-widget' and facets[-1]['aggregate']['function'] != 'count': _f['subcount'] = self._get_aggregate_function(facets[-1]) - if len(facets) > 1: # Get n+1 dimension + if len(facets) > 1: # Get n+1 dimension if facets[1]['aggregate']['function'] == 'count': self._n_facet_dimension(widget, _f[f_name], facets[1:], dim + 1, timeFilter, collection) else: @@ -361,10 +344,9 @@ def _n_facet_dimension(self, widget, _f, facets, dim, timeFilter, collection, ca agg_function = self._get_aggregate_function(_f_agg) _f['facet']['agg_%02d_%02d:%s' % (dim, i, agg_function)] = agg_function else: - self._n_facet_dimension(widget, _f, facets[i:], dim + 1, timeFilter, collection) # Get n+1 dimension + self._n_facet_dimension(widget, _f, facets[i:], dim + 1, timeFilter, collection) # Get n+1 dimension break - def select(self, collection, query=None, rows=100, start=0): if query is None: query = EMPTY_QUERY.get() @@ -379,7 +361,6 @@ def select(self, collection, query=None, rows=100, start=0): response = self._root.get('%s/select' % collection, params) return self._get_json(response) - def suggest(self, collection, query): try: params = self._get_params() + ( @@ -397,8 +378,7 @@ def suggest(self, collection, query): except RestException as e: raise PopupException(e, title=_('Error while accessing Solr')) - - def collections(self): # To drop, used in indexer v1 + def collections(self): # To drop, used in indexer v1 try: params = self._get_params() + ( ('detail', 'true'), @@ -409,7 +389,6 @@ def collections(self): # To drop, used in indexer v1 except RestException as e: raise PopupException(e, title=_('Error while accessing Solr')) - def collections2(self): try: params = self._get_params() + ( @@ -420,7 +399,6 @@ def collections2(self): except RestException as e: raise PopupException(e, title=_('Error while accessing Solr')) - def config(self, name): try: params = self._get_params() + ( @@ -431,7 +409,6 @@ def config(self, name): except RestException as e: raise PopupException(e, title=_('Error while accessing Solr')) - def configs(self): try: params = self._get_params() + ( @@ -442,7 +419,6 @@ def configs(self): except RestException as e: raise PopupException(e, title=_('Error while accessing Solr')) - def create_config(self, name, base_config, immutable=False): try: params = self._get_params() + ( @@ -456,7 +432,6 @@ def create_config(self, name, base_config, immutable=False): except RestException as e: raise PopupException(e, title=_('Error while accessing Solr')) - def delete_config(self, name): response = {'status': -1, 'message': ''} @@ -476,7 +451,6 @@ def delete_config(self, name): raise PopupException(e, title=_('Error while accessing Solr')) return response - def list_aliases(self): try: params = self._get_params() + ( @@ -487,14 +461,12 @@ def list_aliases(self): except RestException as e: raise PopupException(e, title=_('Error while accessing Solr')) - def collection_or_core(self, hue_collection): if hue_collection.is_core_only: return self.core(hue_collection.name) else: return self.collection(hue_collection.name) - def collection(self, name): try: collections = self.collections() @@ -502,7 +474,6 @@ def collection(self, name): except Exception as e: raise PopupException(e, title=_('Error while accessing Solr')) - def create_collection2(self, name, config_name=None, shards=1, replication=1, **kwargs): try: params = self._get_params() + ( @@ -528,7 +499,6 @@ def create_collection2(self, name, config_name=None, shards=1, replication=1, ** except RestException as e: raise PopupException(e, title=_('Error while accessing Solr')) - def update_config(self, name, properties): try: params = self._get_params() + ( @@ -541,7 +511,6 @@ def update_config(self, name, properties): except RestException as e: raise PopupException(e, title=_('Error while accessing Solr')) - def add_fields(self, name, fields): try: params = self._get_params() + ( @@ -556,7 +525,6 @@ def add_fields(self, name, fields): except RestException as e: raise PopupException(e, title=_('Error while accessing Solr')) - def create_core(self, name, instance_dir, shards=1, replication=1): try: params = self._get_params() + ( @@ -579,7 +547,6 @@ def create_core(self, name, instance_dir, shards=1, replication=1): else: raise PopupException(e, title=_('Error while accessing Solr')) - def create_alias(self, name, collections): try: params = self._get_params() + ( @@ -597,7 +564,6 @@ def create_alias(self, name, collections): except RestException as e: raise PopupException(e, title=_('Error while accessing Solr')) - def delete_alias(self, name): try: params = self._get_params() + ( @@ -614,7 +580,6 @@ def delete_alias(self, name): except RestException as e: raise PopupException(e, title=_('Error while accessing Solr')) - def delete_collection(self, name): response = {'status': -1, 'message': ''} @@ -634,7 +599,6 @@ def delete_collection(self, name): raise PopupException(e, title=_('Error while accessing Solr')) return response - def remove_core(self, name): try: params = self._get_params() + ( @@ -653,7 +617,6 @@ def remove_core(self, name): except RestException as e: raise PopupException(e, title=_('Error while accessing Solr')) - def cores(self): try: params = self._get_params() + ( @@ -673,7 +636,6 @@ def core(self, core): except RestException as e: raise PopupException(e, title=_('Error while accessing Solr')) - def get_schema(self, collection): try: params = self._get_params() + ( @@ -775,7 +737,6 @@ def terms(self, core, field, properties=None): except RestException as e: raise PopupException(e, title=_('Error while accessing Solr')) - def info_system(self): try: params = self._get_params() + ( @@ -787,10 +748,9 @@ def info_system(self): except RestException as e: raise PopupException(e, title=_('Error while accessing Solr')) - def sql(self, collection, statement): try: - if 'limit' not in statement.lower(): # rows is not supported + if 'limit' not in statement.lower(): # rows is not supported statement = statement + ' LIMIT 100' params = self._get_params() + ( @@ -818,7 +778,6 @@ def get(self, core, doc_id): except RestException as e: raise PopupException(e, title=_('Error while accessing Solr')) - def export(self, name, query, fl, sort, rows=100): try: params = self._get_params() + ( @@ -833,7 +792,6 @@ def export(self, name, query, fl, sort, rows=100): except RestException as e: raise PopupException(e, title=_('Error while accessing Solr')) - def update(self, collection_or_core_name, data, content_type='csv', version=None, **kwargs): if content_type == 'csv': content_type = 'application/csv' @@ -859,11 +817,10 @@ def update(self, collection_or_core_name, data, content_type='csv', version=None response = self._root.post('%s/update' % collection_or_core_name, contenttype=content_type, params=params, data=data) return self._get_json(response) - # Deprecated def aliases(self): try: - params = self._get_params() + ( # Waiting for SOLR-4968 + params = self._get_params() + ( # Waiting for SOLR-4968 ('detail', 'true'), ('path', '/aliases.json'), ) @@ -872,7 +829,6 @@ def aliases(self): except RestException as e: raise PopupException(e, title=_('Error while accessing Solr')) - # Deprecated def create_collection(self, name, shards=1, replication=1): try: @@ -894,7 +850,6 @@ def create_collection(self, name, shards=1, replication=1): except RestException as e: raise PopupException(e, title=_('Error while accessing Solr')) - # Deprecated def remove_collection(self, name): try: @@ -913,7 +868,6 @@ def remove_collection(self, name): except RestException as e: raise PopupException(e, title=_('Error while accessing Solr')) - def _get_params(self): if self.security_enabled: return (('doAs', self._user),) @@ -975,7 +929,7 @@ def _get_time_filter_query(self, timeFilter, facet, collection): stat_facet = {'min': timeFilter['from'], 'max': timeFilter['to']} properties['start'] = None properties['end'] = None - else: # The user has zoomed in. Only show that section. + else: # The user has zoomed in. Only show that section. stat_facet = {'min': properties['min'], 'max': properties['max']} _compute_range_facet(facet['widgetType'], stat_facet, props, properties['start'], properties['end'], SLOTS=properties['slot']) @@ -996,7 +950,7 @@ def _get_time_filter_query(self, timeFilter, facet, collection): stat_facet = stats_json['stats']['stats_fields'][facet['field']] properties['start'] = None properties['end'] = None - else: # the user has zoomed in. Only show that section. + else: # the user has zoomed in. Only show that section. stat_facet = {'min': properties['min'], 'max': properties['max']} _compute_range_facet(facet['widgetType'], stat_facet, props, properties['start'], properties['end'], SLOTS=properties['slot']) return { @@ -1028,12 +982,12 @@ def _get_fq(self, collection, query): for fq in merged_fqs: if fq['type'] == 'field': - fields = fq['field'] if type(fq['field']) == list else [fq['field']] # 2D facets support + fields = fq['field'] if type(fq['field']) is list else [fq['field']] # 2D facets support for field in fields: f = [] for _filter in fq['filter']: - values = _filter['value'] if type(_filter['value']) == list else [_filter['value']] # 2D facets support - if fields.index(field) < len(values): # Lowest common field denominator + values = _filter['value'] if type(_filter['value']) is list else [_filter['value']] # 2D facets support + if fields.index(field) < len(values): # Lowest common field denominator value = values[fields.index(field)] if value or value is False: exclude = '-' if _filter['exclude'] else '' @@ -1042,7 +996,7 @@ def _get_fq(self, collection, query): f.append('%s%s:"%s"' % (exclude, field, value)) else: f.append('%s{!field f=%s}%s' % (exclude, field, value)) - else: # Handle empty value selection that are returned using solr facet.missing + else: # Handle empty value selection that are returned using solr facet.missing value = "*" exclude = '-' f.append('%s%s:%s' % (exclude, field, value)) @@ -1070,7 +1024,6 @@ def _get_fq(self, collection, query): return params - def _get_dimension_aggregates(self, facets): aggregates = [] for agg in facets: @@ -1080,14 +1033,12 @@ def _get_dimension_aggregates(self, facets): return aggregates return aggregates - def _get_nested_fields(self, collection): if collection and collection.get('nested') and collection['nested']['enabled']: return [field['filter'] for field in self._flatten_schema(collection['nested']['schema']) if field['selected']] else: return [] - def _flatten_schema(self, level): fields = [] for field in level: @@ -1096,20 +1047,18 @@ def _flatten_schema(self, level): fields.extend(self._flatten_schema(field['values'])) return fields - @classmethod def _get_json(cls, response): - if type(response) != dict: + if type(response) is not dict: # Got 'plain/text' mimetype instead of 'application/json' try: response = json.loads(response) except ValueError as e: # Got some null bytes in the response - LOG.error('%s: %s' % (new_str(e), repr(response))) + LOG.error('%s: %s' % (str(e), repr(response))) response = json.loads(response.replace('\x00', '')) return response - def uniquekey(self, collection): try: params = self._get_params() + ( @@ -1123,12 +1072,12 @@ def uniquekey(self, collection): GAPS = { '5MINUTES': { - 'histogram-widget': {'coeff': '+3', 'unit': 'SECONDS'}, # ~100 slots - 'timeline-widget': {'coeff': '+3', 'unit': 'SECONDS'}, # ~100 slots - 'bucket-widget': {'coeff': '+3', 'unit': 'SECONDS'}, # ~100 slots - 'bar-widget': {'coeff': '+3', 'unit': 'SECONDS'}, # ~100 slots - 'facet-widget': {'coeff': '+1', 'unit': 'MINUTES'}, # ~10 slots - 'pie-widget': {'coeff': '+1', 'unit': 'MINUTES'} # ~10 slots + 'histogram-widget': {'coeff': '+3', 'unit': 'SECONDS'}, # ~100 slots + 'timeline-widget': {'coeff': '+3', 'unit': 'SECONDS'}, # ~100 slots + 'bucket-widget': {'coeff': '+3', 'unit': 'SECONDS'}, # ~100 slots + 'bar-widget': {'coeff': '+3', 'unit': 'SECONDS'}, # ~100 slots + 'facet-widget': {'coeff': '+1', 'unit': 'MINUTES'}, # ~10 slots + 'pie-widget': {'coeff': '+1', 'unit': 'MINUTES'} # ~10 slots }, '30MINUTES': { 'histogram-widget': {'coeff': '+20', 'unit': 'SECONDS'}, diff --git a/desktop/libs/libsolr/src/libsolr/conf.py b/desktop/libs/libsolr/src/libsolr/conf.py index dafd60e978b..fa3fc3ef8ec 100644 --- a/desktop/libs/libsolr/src/libsolr/conf.py +++ b/desktop/libs/libsolr/src/libsolr/conf.py @@ -15,23 +15,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -from future import standard_library -standard_library.install_aliases() import logging -import sys +from urllib.parse import urlparse + +from django.utils.translation import gettext_lazy as _t -from desktop.lib.conf import Config, coerce_bool from desktop.conf import default_ssl_validate +from desktop.lib.conf import Config, coerce_bool from libzookeeper.conf import ENSEMBLE -if sys.version_info[0] > 2: - from urllib.parse import urlparse - new_str = str - from django.utils.translation import gettext_lazy as _t -else: - from django.utils.translation import ugettext_lazy as _t - from urlparse import urlparse - LOG = logging.getLogger() @@ -49,9 +41,9 @@ def zkensemble_path(): """ try: parsed = urlparse(ENSEMBLE.get()) - if parsed.port == 9983: # Standalone Solr cloud + if parsed.port == 9983: # Standalone Solr cloud return '' - except: + except Exception: LOG.warning('Failed to get Zookeeper ensemble path') return '/solr' diff --git a/desktop/libs/libzookeeper/src/libzookeeper/conf.py b/desktop/libs/libzookeeper/src/libzookeeper/conf.py index 173e89d4667..dc902258513 100644 --- a/desktop/libs/libzookeeper/src/libzookeeper/conf.py +++ b/desktop/libs/libzookeeper/src/libzookeeper/conf.py @@ -15,19 +15,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -from future import standard_library -standard_library.install_aliases() import logging -import sys +from urllib.parse import urlparse from desktop.lib.conf import Config, coerce_string -if sys.version_info[0] > 2: - from urllib.parse import urlparse - new_str = str -else: - from urlparse import urlparse - LOG = logging.getLogger() @@ -45,7 +37,7 @@ def zkensemble(): clusters = CLUSTERS.get() if clusters['default'].HOST_PORTS.get() != 'localhost:2181': return '%s' % clusters['default'].HOST_PORTS.get() - except: + except Exception: LOG.warning('Could not get zookeeper ensemble from the zookeeper app') if 'search' in settings.INSTALLED_APPS: @@ -53,20 +45,20 @@ def zkensemble(): from search.conf import SOLR_URL parsed = urlparse(SOLR_URL.get()) return "%s:2181" % (parsed.hostname or 'localhost') - except: + except Exception: LOG.warning('Could not get zookeeper ensemble from the search app') return "localhost:2181" -ENSEMBLE=Config( +ENSEMBLE = Config( "ensemble", help="ZooKeeper ensemble. Comma separated list of Host/Port, e.g. localhost:2181,localhost:2182,localhost:2183", dynamic_default=zkensemble, type=coerce_string, ) -PRINCIPAL_NAME=Config( +PRINCIPAL_NAME = Config( "principal_name", help="Name of Kerberos principal when using security", default="zookeeper", diff --git a/desktop/libs/metadata/src/metadata/analytic_db_api.py b/desktop/libs/metadata/src/metadata/analytic_db_api.py index 916bbf0f6ac..f55a2b98d8c 100644 --- a/desktop/libs/metadata/src/metadata/analytic_db_api.py +++ b/desktop/libs/metadata/src/metadata/analytic_db_api.py @@ -15,21 +15,16 @@ # See the License for the specific language governing permissions and # limitations under the License. -import logging import sys +import logging +from django.utils.translation import gettext as _ from django.views.decorators.http import require_POST from desktop.lib.django_util import JsonResponse from desktop.lib.i18n import force_unicode from notebook.connectors.altus import AnalyticDbApi, DataWarehouse2Api -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - - LOG = logging.getLogger() diff --git a/desktop/libs/metadata/src/metadata/assistant/queries_utils.py b/desktop/libs/metadata/src/metadata/assistant/queries_utils.py index a3e751b1c01..a5baa84077a 100644 --- a/desktop/libs/metadata/src/metadata/assistant/queries_utils.py +++ b/desktop/libs/metadata/src/metadata/assistant/queries_utils.py @@ -15,18 +15,16 @@ # See the License for the specific language governing permissions and # limitations under the License. -import logging -import json -import sys import os +import sys +import json +import logging -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ +from django.utils.translation import gettext as _ LOG = logging.getLogger() + def get_all_queries(): with open(os.path.join(os.path.dirname(__file__), 'data/queries.json')) as file: queries = json.load(file) diff --git a/desktop/libs/metadata/src/metadata/catalog/atlas_client.py b/desktop/libs/metadata/src/metadata/catalog/atlas_client.py index b0e814a7275..a5b4ea6e747 100644 --- a/desktop/libs/metadata/src/metadata/catalog/atlas_client.py +++ b/desktop/libs/metadata/src/metadata/catalog/atlas_client.py @@ -16,23 +16,19 @@ # See the License for the specific language governing permissions and # limitations under the License. -import json -import logging import re import sys +import json +import logging + import requests +from django.utils.translation import gettext as _ from desktop.lib.exceptions_renderable import raise_popup_exception from desktop.lib.rest import resource from desktop.lib.rest.http_client import HttpClient, RestException - +from metadata.catalog.base import Api, CatalogApiException, CatalogAuthException, CatalogEntityDoesNotExistException from metadata.conf import CATALOG, get_catalog_search_cluster -from metadata.catalog.base import CatalogAuthException, CatalogApiException, CatalogEntityDoesNotExistException, Api - -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ LOG = logging.getLogger() @@ -59,9 +55,9 @@ class AtlasApi(Api): 'hive_column': 'FIELD' } - CLASSIFICATION_RE = re.compile('(?:tag|tags|classification)\s*\:\s*(?:(?:\"([^"]+)\")|([^ ]+))\s*', re.IGNORECASE) - TYPE_RE = re.compile('type\s*\:\s*([^ ]+)\s*', re.IGNORECASE) - OWNER_RE = re.compile('owner\s*\:\s*([^ ]+)\s*', re.IGNORECASE) + CLASSIFICATION_RE = re.compile('(?:tag|tags|classification)\\s*\\:\\s*(?:(?:\"([^"]+)\")|([^ ]+))\\s*', re.IGNORECASE) + TYPE_RE = re.compile(r'type\s*\:\s*([^ ]+)\s*', re.IGNORECASE) + OWNER_RE = re.compile(r'owner\s*\:\s*([^ ]+)\s*', re.IGNORECASE) def __init__(self, user=None): super(AtlasApi, self).__init__(user) @@ -94,13 +90,12 @@ def __init__(self, user=None): elif self._password: self._client.set_basic_auth(self._username, self._password) - self._root = resource.Resource(self._client, urlencode=False) # For search_entities_interactive + self._root = resource.Resource(self._client, urlencode=False) # For search_entities_interactive self.__headers = {} self.__params = () - #self._fillup_properties() # Disabled currently - + # self._fillup_properties() # Disabled currently def _get_types_from_sources(self, sources): default_entity_types = entity_types = ('DATABASE', 'TABLE', 'PARTITION', 'FIELD', 'FILE', 'VIEW', 'S3BUCKET', 'OPERATION', 'DIRECTORY') @@ -124,16 +119,16 @@ def adapt_atlas_entity_to_navigator(self, atlas_entity): "description": atlas_entity['attributes'].get('description'), "identity": atlas_entity['guid'], "internalType": atlas_entity['typeName'], - "meaningNames": atlas_entity['meaningNames'], # Atlas specific - "meanings": atlas_entity['meanings'], # Atlas specific + "meaningNames": atlas_entity['meaningNames'], # Atlas specific + "meanings": atlas_entity['meanings'], # Atlas specific "name": atlas_entity['attributes'].get('name'), "original_name": atlas_entity['attributes'].get('name'), "originalDescription": None, "originalName": atlas_entity['attributes'].get('name'), "owner": atlas_entity['attributes'].get('owner'), - "parentPath": '', # Set below - "properties": {}, # Set below - "sourceType": '', # Set below + "parentPath": '', # Set below + "properties": {}, # Set below + "sourceType": '', # Set below "classifications": [], "tags": atlas_entity['classificationNames'], "type": self.ATLAS_TO_NAV_TYPE.get(atlas_entity['typeName'].lower()) or atlas_entity['typeName'] @@ -198,7 +193,7 @@ def fetch_single_entity(self, dsl_query): atlas_response = self._root.get('/v2/search/dsl?query=%s' % dsl_query, headers=self.__headers, params=self.__params) - if not 'entities' in atlas_response or len(atlas_response['entities']) < 1: + if 'entities' not in atlas_response or len(atlas_response['entities']) < 1: raise CatalogEntityDoesNotExistException('Could not find entity with query: %s' % dsl_query) for atlas_entity in atlas_response['entities']: @@ -413,7 +408,6 @@ def get_entity(self, entity_id): LOG.error(msg) raise CatalogApiException(e.message) - def update_entity(self, entity, **metadata): """ PUT /api/v3/entities/:id @@ -438,7 +432,6 @@ def update_entity(self, entity, **metadata): else: raise raise_popup_exception('Failed to update entity', detail=e) - def get_cluster_source_ids(self): return [] # params = ( @@ -449,14 +442,12 @@ def get_cluster_source_ids(self): # LOG.info(params) # return self._root.get('entities', headers=self.__headers, params=params) - def add_tags(self, entity_id, tags): entity = self.get_entity(entity_id) new_tags = entity['tags'] or [] new_tags.extend(tags) return self.update_entity(entity, tags=new_tags) - def delete_tags(self, entity_id, tags): entity = self.get_entity(entity_id) new_tags = entity['tags'] or [] @@ -465,7 +456,6 @@ def delete_tags(self, entity_id, tags): new_tags.remove(tag) return self.update_entity(entity, tags=new_tags) - def update_properties(self, entity_id, properties, modified_custom_metadata=None, deleted_custom_metadata_keys=None): entity = self.get_entity(entity_id) @@ -479,7 +469,6 @@ def update_properties(self, entity_id, properties, modified_custom_metadata=None del properties['properties'][key] return self.update_entity(entity, **properties) - def delete_metadata_properties(self, entity_id, property_keys): entity = self.get_entity(entity_id) new_props = entity['properties'] or {} @@ -488,7 +477,6 @@ def delete_metadata_properties(self, entity_id, property_keys): del new_props[key] return self.update_entity(entity, properties=new_props) - def get_lineage(self, entity_id): """ GET /api/v3/lineage/entityIds=:id @@ -508,7 +496,6 @@ def get_lineage(self, entity_id): else: raise raise_popup_exception('Failed to get lineage', detail=e) - def create_namespace(self, namespace, description=None): try: data = json.dumps({'name': namespace, 'description': description}) @@ -519,7 +506,6 @@ def create_namespace(self, namespace, description=None): else: raise raise_popup_exception('Failed to create namespace', detail=e) - def get_namespace(self, namespace): try: return self._root.get('models/namespaces/%(namespace)s' % {'namespace': namespace}) @@ -529,7 +515,6 @@ def get_namespace(self, namespace): else: raise raise_popup_exception('Failed to get namespace', detail=e) - def create_namespace_property(self, namespace, properties): try: data = json.dumps(properties) @@ -541,7 +526,6 @@ def create_namespace_property(self, namespace, properties): else: raise raise_popup_exception('Failed to create namespace', detail=e) - def get_namespace_properties(self, namespace): try: return self._root.get('models/namespaces/%(namespace)s/properties' % {'namespace': namespace}) @@ -551,7 +535,6 @@ def get_namespace_properties(self, namespace): else: raise raise_popup_exception('Failed to create namespace', detail=e) - def map_namespace_property(self, clazz, properties): try: data = json.dumps(properties) @@ -563,7 +546,6 @@ def map_namespace_property(self, clazz, properties): else: raise raise_popup_exception('Failed to map class', detail=e) - def get_model_properties_mapping(self): try: return self._root.get('models/properties/mappings') @@ -573,7 +555,6 @@ def get_model_properties_mapping(self): else: raise raise_popup_exception('Failed to get models properties mappings', detail=e) - def _fillup_properties(self): global _HAS_CATALOG_NAMESPACE @@ -591,7 +572,7 @@ def _fillup_properties(self): "description": "List of Hue document UUIDs related to this entity", "multiValued": True, "maxLength": 36, - "pattern": ".*", # UUID + "pattern": ".*", # UUID "enumValues": None, "type": "TEXT" }) @@ -605,7 +586,6 @@ def _fillup_properties(self): _HAS_CATALOG_NAMESPACE = True - def _get_boosted_term(self, term): return 'AND'.join([ '(%s)' % 'OR'.join(['(%s:%s*^%s)' % (field, term, weight) @@ -619,6 +599,5 @@ def _get_boosted_term(self, term): def _clean_path(self, path): return path.rstrip('/').split('/')[-1], self._escape_slashes(path.rstrip('/')) - def _escape_slashes(self, s): - return s.replace('/', '\/') + return s.replace('/', r'\/') diff --git a/desktop/libs/metadata/src/metadata/catalog/base.py b/desktop/libs/metadata/src/metadata/catalog/base.py index 89d75b210c0..2b9c1e78955 100644 --- a/desktop/libs/metadata/src/metadata/catalog/base.py +++ b/desktop/libs/metadata/src/metadata/catalog/base.py @@ -18,13 +18,10 @@ import sys from builtins import object -from desktop.lib.exceptions_renderable import PopupException -from desktop.lib.i18n import smart_unicode +from django.utils.translation import gettext as _ -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ +from desktop.lib.exceptions_renderable import PopupException +from desktop.lib.i18n import smart_str def get_api(request, interface): @@ -50,7 +47,7 @@ def __str__(self): return str(self.message) def __unicode__(self): - return smart_unicode(self.message) + return smart_str(self.message) class CatalogEntityDoesNotExistException(Exception): @@ -61,7 +58,7 @@ def __str__(self): return str(self.message) def __unicode__(self): - return smart_unicode(self.message) + return smart_str(self.message) class CatalogAuthException(Exception): @@ -72,7 +69,7 @@ def __str__(self): return str(self.message) def __unicode__(self): - return smart_unicode(self.message) + return smart_str(self.message) # Base API @@ -88,28 +85,22 @@ def search_entities_interactive(self, query_s=None, limit=100, **filters): """For the top search""" return {} - def find_entity(self, source_type, type, name, **filters): """e.g. From a database and table name, retrieve the enity id""" return {} - def get_entity(self, entity_id): return {} - def update_entity(self, entity, **metadata): return {} - def add_tags(self, entity_id, tags): return {} - def delete_tags(self, entity_id, tags): return {} - def update_properties(self, entity_id, properties, modified_custom_metadata=None, deleted_custom_metadata_keys=None): """For updating entity comments or other attributes""" return {} @@ -119,26 +110,21 @@ def update_properties(self, entity_id, properties, modified_custom_metadata=None def get_database(self, name): return self.find_entity(source_type='HIVE', type='DATABASE', name=name) - def get_table(self, database_name, table_name, is_view=False): - parent_path = '\/%s' % database_name + parent_path = r'\/%s' % database_name return self.find_entity(source_type='HIVE', type='VIEW' if is_view else 'TABLE', name=table_name, parentPath=parent_path) - def get_field(self, database_name, table_name, field_name): - parent_path = '\/%s\/%s' % (database_name, table_name) + parent_path = r'\/%s\/%s' % (database_name, table_name) return self.find_entity(source_type='HIVE', type='FIELD', name=field_name, parentPath=parent_path) - def get_partition(self, database_name, table_name, partition_spec): raise NotImplementedError - def get_directory(self, path): dir_name, dir_path = self._clean_path(path) return self.find_entity(source_type='HDFS', type='DIRECTORY', name=dir_name, fileSystemPath=dir_path) - def get_file(self, path): file_name, file_path = self._clean_path(path) return self.find_entity(source_type='HDFS', type='FILE', name=file_name, fileSystemPath=file_path) diff --git a/desktop/libs/metadata/src/metadata/catalog/dummy_client.py b/desktop/libs/metadata/src/metadata/catalog/dummy_client.py index 1447fc9c53c..2ae64738414 100644 --- a/desktop/libs/metadata/src/metadata/catalog/dummy_client.py +++ b/desktop/libs/metadata/src/metadata/catalog/dummy_client.py @@ -16,16 +16,12 @@ # See the License for the specific language governing permissions and # limitations under the License. -import logging import sys +import logging -from metadata.catalog.base import Api - -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ +from django.utils.translation import gettext as _ +from metadata.catalog.base import Api LOG = logging.getLogger() @@ -35,32 +31,25 @@ class DummyClient(Api): def __init__(self, user=None): self.user = user - def search_entities_interactive(self, query_s=None, limit=100, **filters): return {u'highlighting': {u'27': {u'sourceType': [u'HIVE'], u'originalName': [u'sample_08'], u'owner': [u'admin'], u'type': [u'TABLE'], u'fileSystemPath': [u'hdfs://self-service-analytics-1.gce.cloudera.com:8020/user/hive/warehouse/sample_08'], u'internalType': [u'hv_table']}, u'1144700': {u'sourceType': [u'HIVE'], u'originalName': [u'sample_07_parquet'], u'owner': [u'admin'], u'type': [u'TABLE'], u'fileSystemPath': [u'hdfs://self-service-analytics-1.gce.cloudera.com:8020/user/hive/warehouse/sample_07_parquet'], u'internalType': [u'hv_table']}, u'22': {u'sourceType': [u'HIVE'], u'description': [u'Job data'], u'originalName': [u'sample_07'], u'owner': [u'admin'], u'type': [u'TABLE'], u'fileSystemPath': [u'hdfs://self-service-analytics-1.gce.cloudera.com:8020/user/hive/warehouse/sample_07'], u'internalType': [u'hv_table']}}, u'facets': {}, u'qtime': 1339, u'facetRanges': [], u'results': [{u'clusteredByColNames': None, u'customProperties': {}, u'owner': u'admin', u'serdeName': None, u'deleteTime': None, u'fileSystemPath': u'hdfs://self-service-analytics-1.gce.cloudera.com:8020/user/hive/warehouse/sample_08', u'sourceType': u'HIVE', u'serdeLibName': u'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe', u'lastModifiedBy': None, u'sortByColNames': None, u'partColNames': None, u'type': u'TABLE', u'internalType': u'hv_table', u'description': None, u'inputFormat': u'org.apache.hadoop.mapred.TextInputFormat', u'tags': None, u'deleted': False, u'technicalProperties': None, u'userEntity': False, u'serdeProps': None, u'originalDescription': None, u'compressed': False, u'metaClassName': u'hv_table', u'properties': {u'__cloudera_internal__hueLink': u'http://self-service-analytics-1.gce.cloudera.com:8889/metastore/table/default/sample_08', u'dd': u'xx'}, u'identity': u'27', u'outputFormat': u'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat', u'firstClassParentId': None, u'name': None, u'extractorRunId': u'8##503', u'created': u'2018-03-30T17:14:44.000Z', u'sourceId': u'8', u'lastModified': None, u'packageName': u'nav', u'parentPath': u'/default', u'originalName': u'sample_08', u'lastAccessed': u'1970-01-01T00:00:00.000Z'}, {u'clusteredByColNames': None, u'customProperties': {}, u'owner': u'admin', u'serdeName': None, u'deleteTime': None, u'fileSystemPath': u'hdfs://self-service-analytics-1.gce.cloudera.com:8020/user/hive/warehouse/sample_07_parquet', u'sourceType': u'HIVE', u'serdeLibName': u'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe', u'lastModifiedBy': None, u'sortByColNames': None, u'partColNames': None, u'type': u'TABLE', u'internalType': u'hv_table', u'description': None, u'inputFormat': u'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat', u'tags': None, u'deleted': False, u'technicalProperties': None, u'userEntity': False, u'serdeProps': None, u'originalDescription': None, u'compressed': False, u'metaClassName': u'hv_table', u'properties': {u'__cloudera_internal__hueLink': u'http://self-service-analytics-1.gce.cloudera.com:8889/metastore/table/default/sample_07_parquet'}, u'identity': u'1144700', u'outputFormat': u'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat', u'firstClassParentId': None, u'name': None, u'extractorRunId': u'8##718', u'created': u'2018-04-17T06:16:17.000Z', u'sourceId': u'8', u'lastModified': None, u'packageName': u'nav', u'parentPath': u'/default', u'originalName': u'sample_07_parquet', u'lastAccessed': u'1970-01-01T00:00:00.000Z'}, {u'clusteredByColNames': None, u'customProperties': {}, u'owner': u'admin', u'serdeName': None, u'deleteTime': None, u'fileSystemPath': u'hdfs://self-service-analytics-1.gce.cloudera.com:8020/user/hive/warehouse/sample_07', u'sourceType': u'HIVE', u'serdeLibName': u'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe', u'lastModifiedBy': None, u'sortByColNames': None, u'partColNames': None, u'type': u'TABLE', u'internalType': u'hv_table', u'description': u'Job data', u'inputFormat': u'org.apache.hadoop.mapred.TextInputFormat', u'tags': None, u'deleted': False, u'technicalProperties': None, u'userEntity': False, u'serdeProps': None, u'originalDescription': None, u'compressed': False, u'metaClassName': u'hv_table', u'properties': {u'__cloudera_internal__hueLink': u'http://self-service-analytics-1.gce.cloudera.com:8889/metastore/table/default/sample_07'}, u'identity': u'22', u'outputFormat': u'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat', u'firstClassParentId': None, u'name': None, u'extractorRunId': u'8##503', u'created': u'2018-03-30T17:14:42.000Z', u'sourceId': u'8', u'lastModified': None, u'packageName': u'nav', u'parentPath': u'/default', u'originalName': u'sample_07', u'lastAccessed': u'1970-01-01T00:00:00.000Z'}], u'totalMatched': 3, u'limit': 45, u'offset': 0} - def find_entity(self, source_type, type, name, **filters): return [{u'clusteredByColNames': None, u'customProperties': {}, u'owner': u'admin', u'serdeName': None, u'deleteTime': None, u'fileSystemPath': u'hdfs://self-service-analytics-1.gce.cloudera.com:8020/user/hive/warehouse/sample_07', u'sourceType': u'HIVE', u'serdeLibName': u'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe', u'lastModifiedBy': None, u'sortByColNames': None, u'partColNames': None, u'type': u'TABLE', u'internalType': u'hv_table', u'description': u'Job data', u'inputFormat': u'org.apache.hadoop.mapred.TextInputFormat', u'tags': None, u'deleted': False, u'technicalProperties': None, u'userEntity': False, u'serdeProps': None, u'originalDescription': None, u'compressed': False, u'metaClassName': u'hv_table', u'properties': {u'__cloudera_internal__hueLink': u'http://self-service-analytics-1.gce.cloudera.com:8889/metastore/table/default/sample_07'}, u'identity': u'22', u'outputFormat': u'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat', u'firstClassParentId': None, u'name': None, u'extractorRunId': u'8##503', u'created': u'2018-03-30T17:14:42.000Z', u'sourceId': u'8', u'lastModified': None, u'packageName': u'nav', u'parentPath': u'/default', u'originalName': u'sample_07', u'lastAccessed': u'1970-01-01T00:00:00.000Z'}] - def get_entity(self, entity_id): return {u'customProperties': None, u'deleteTime': None, u'description': None, u'dataType': u'int', u'type': u'FIELD', u'internalType': u'hv_column', u'sourceType': u'HIVE', u'tags': None, u'deleted': False, u'technicalProperties': None, u'userEntity': False, u'originalDescription': None, u'metaClassName': u'hv_column', u'properties': {u'__cloudera_internal__hueLink': u'http://self-service-analytics-1.gce.cloudera.com:8889/metastore/table/default/sample_07'}, u'identity': u'26', u'firstClassParentId': u'22', u'name': None, u'extractorRunId': u'8##1', u'sourceId': u'8', u'packageName': u'nav', u'parentPath': u'/default/sample_07', u'originalName': u'total_emp'} - def update_entity(self, entity, **metadata): return {} - def add_tags(self, entity_id, tags): # Return entity but not used currently return {u'clusteredByColNames': None, u'customProperties': {}, u'owner': u'admin', u'serdeName': None, u'deleteTime': None, u'fileSystemPath': u'hdfs://self-service-analytics-1.gce.cloudera.com:8020/user/hive/warehouse/sample_07', u'sourceType': u'HIVE', u'serdeLibName': u'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe', u'lastModifiedBy': None, u'sortByColNames': None, u'partColNames': None, u'type': u'TABLE', u'internalType': u'hv_table', u'description': u'Job data', u'inputFormat': u'org.apache.hadoop.mapred.TextInputFormat', u'tags': [u'usage'], u'deleted': False, u'technicalProperties': None, u'userEntity': False, u'serdeProps': None, u'originalDescription': None, u'compressed': False, u'metaClassName': u'hv_table', u'properties': {u'__cloudera_internal__hueLink': u'http://self-service-analytics-1.gce.cloudera.com:8889/metastore/table/default/sample_07'}, u'identity': u'22', u'outputFormat': u'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat', u'firstClassParentId': None, u'name': None, u'extractorRunId': u'8##503', u'created': u'2018-03-30T17:14:42.000Z', u'sourceId': u'8', u'lastModified': None, u'packageName': u'nav', u'parentPath': u'/default', u'originalName': u'sample_07', u'lastAccessed': u'1970-01-01T00:00:00.000Z'} - def delete_tags(self, entity_id, tags): return {} - def update_properties(self, entity_id, properties, modified_custom_metadata=None, deleted_custom_metadata_keys=None): # For updating comments of table or columns # Returning the entity but not used currently diff --git a/desktop/libs/metadata/src/metadata/catalog/navigator_client.py b/desktop/libs/metadata/src/metadata/catalog/navigator_client.py index 437f7407fdd..82ce67f6d57 100644 --- a/desktop/libs/metadata/src/metadata/catalog/navigator_client.py +++ b/desktop/libs/metadata/src/metadata/catalog/navigator_client.py @@ -16,32 +16,25 @@ # See the License for the specific language governing permissions and # limitations under the License. -import json -import logging import re import sys - +import json +import logging from itertools import islice from django.core.cache import cache +from django.utils.translation import gettext as _ from desktop.lib.rest import resource -from desktop.lib.rest.unsecure_http_client import UnsecureHttpClient from desktop.lib.rest.http_client import RestException - +from desktop.lib.rest.unsecure_http_client import UnsecureHttpClient from hadoop.conf import HDFS_CLUSTERS from libsentry.privilege_checker import get_checker from libsentry.sentry_site import get_hive_sentry_provider - +from metadata.catalog.base import Api, CatalogApiException, CatalogAuthException, CatalogEntityDoesNotExistException from metadata.conf import NAVIGATOR, get_navigator_auth_password, get_navigator_auth_username -from metadata.catalog.base import CatalogAuthException, CatalogApiException, CatalogEntityDoesNotExistException, Api from metadata.metadata_sites import get_navigator_hue_server_name -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - LOG = logging.getLogger() VERSION = 'v9' _JSON_CONTENT_TYPE = 'application/json' @@ -68,7 +61,7 @@ def get_cluster_source_ids(api): else: # 0 means always false cluster_source_ids = 'sourceId:0 AND' - cache.set(CLUSTER_SOURCE_IDS_CACHE_KEY, cluster_source_ids, 60 * 60 * 12) # 1/2 Day + cache.set(CLUSTER_SOURCE_IDS_CACHE_KEY, cluster_source_ids, 60 * 60 * 12) # 1/2 Day return cluster_source_ids @@ -99,13 +92,12 @@ def __init__(self, user=None): # Navigator does not support Kerberos authentication while other components usually requires it self._client = UnsecureHttpClient(self._api_url, logger=LOG) self._client.set_basic_auth(self._username, self._password) - self._root = resource.Resource(self._client, urlencode=False) # For search_entities_interactive + self._root = resource.Resource(self._client, urlencode=False) # For search_entities_interactive self.__headers = {} self.__params = {} - #self._fillup_properties() # Disabled currently - + # self._fillup_properties() # Disabled currently def _get_types_from_sources(self, sources): default_entity_types = entity_types = ('DATABASE', 'TABLE', 'PARTITION', 'FIELD', 'FILE', 'VIEW', 'S3BUCKET', 'OPERATION', 'DIRECTORY') @@ -122,8 +114,7 @@ def _get_types_from_sources(self, sources): return default_entity_types, entity_types - - def search_entities_interactive(self, query_s=None, limit=100, offset=0, facetFields=None, facetPrefix=None, + def search_entities_interactive(self, query_s=None, limit=100, offset=0, facetFields=None, facetPrefix=None, facetRanges=None, filterQueries=None, firstClassEntitiesOnly=None, sources=None): try: pagination = { @@ -234,7 +225,7 @@ def search_entities_interactive(self, query_s=None, limit=100, offset=0, facetFi fq_type = default_entity_types filterQueries.append('sourceType:s3') - if query_s.strip().endswith('type:*'): # To list all available types + if query_s.strip().endswith('type:*'): # To list all available types fq_type = entity_types search_terms = [term for term in query_s.strip().split()] if query_s else [] @@ -244,8 +235,8 @@ def search_entities_interactive(self, query_s=None, limit=100, offset=0, facetFi query.append(self._get_boosted_term(term)) else: name, val = term.split(':') - if val: # Allow to type non default types, e.g for SQL: type:FIEL* - if name == 'type': # Make sure type value still makes sense for the source + if val: # Allow to type non default types, e.g for SQL: type:FIEL* + if name == 'type': # Make sure type value still makes sense for the source term = '%s:%s' % (name, val.upper()) fq_type = entity_types if name.lower() not in ['type', 'tags', 'owner', 'originalname', 'originaldescription', 'lastmodifiedby']: @@ -264,7 +255,7 @@ def search_entities_interactive(self, query_s=None, limit=100, offset=0, facetFi if source_ids: body['query'] = source_ids + '(' + body['query'] + ')' - body['facetFields'] = facetFields or [] # Currently mandatory in API + body['facetFields'] = facetFields or [] # Currently mandatory in API if facetPrefix: body['facetPrefix'] = facetPrefix if facetRanges: @@ -283,7 +274,7 @@ def search_entities_interactive(self, query_s=None, limit=100, offset=0, facetFi clear_cookies=True ) - response['results'] = list(islice(self._secure_results(response['results']), limit)) # Apply Sentry perms + response['results'] = list(islice(self._secure_results(response['results']), limit)) # Apply Sentry perms return response except RestException as e: @@ -293,8 +284,6 @@ def search_entities_interactive(self, query_s=None, limit=100, offset=0, facetFi else: raise CatalogApiException(e.message) - - def search_entities(self, query_s, limit=100, offset=0, raw_query=False, **filters): """ Solr edismax query parser syntax. @@ -330,8 +319,8 @@ def search_entities(self, query_s, limit=100, offset=0, raw_query=False, **filte if val: if name == 'type': term = '%s:%s' % (name, val.upper().strip('*')) - default_entity_types = entity_types # Make sure type value still makes sense for the source - user_filters.append(term + '*') # Manual filter allowed e.g. type:VIE* ca + default_entity_types = entity_types # Make sure type value still makes sense for the source + user_filters.append(term + '*') # Manual filter allowed e.g. type:VIE* ca filter_query = '*' @@ -365,7 +354,7 @@ def search_entities(self, query_s, limit=100, offset=0, raw_query=False, **filte LOG.info(params) response = self._root.get('entities', headers=self.__headers, params=params) - response = list(islice(self._secure_results(response), limit)) # Apply Sentry perms + response = list(islice(self._secure_results(response), limit)) # Apply Sentry perms return response except RestException as e: @@ -375,7 +364,6 @@ def search_entities(self, query_s, limit=100, offset=0, raw_query=False, **filte else: raise CatalogApiException(e) - def _secure_results(self, results, checker=None): # TODO: to move directly to Catalog API if NAVIGATOR.APPLY_SENTRY_PERMISSIONS.get(): @@ -402,7 +390,6 @@ def getkey(result): else: return results - def suggest(self, prefix=None): try: return self._root.get('interactive/suggestions?query=%s' % (prefix or '*')) @@ -411,7 +398,6 @@ def suggest(self, prefix=None): LOG.error(msg) raise CatalogApiException(e.message) - def find_entity(self, source_type, type, name, **filters): """ GET /api/v3/entities?query=((sourceType:)AND(type:)AND(originalName:)) @@ -431,7 +417,7 @@ def find_entity(self, source_type, type, name, **filters): filter_query = 'AND'.join('(%s:%s)' % (key, value) for key, value in list(query_filters.items())) filter_query = '%(type)s AND %(filter_query)s' % { - 'type': '(type:%s)' % 'TABLE OR type:VIEW' if type == 'TABLE' else type, # Impala don't always say that a table is actually a view + 'type': '(type:%s)' % 'TABLE OR type:VIEW' if type == 'TABLE' else type, # Impala don't always say that a table is actually a view 'filter_query': filter_query } @@ -458,7 +444,6 @@ def find_entity(self, source_type, type, name, **filters): LOG.error(msg) raise CatalogApiException(e.message) - def get_entity(self, entity_id): """ GET /api/v3/entities/:id @@ -471,7 +456,6 @@ def get_entity(self, entity_id): LOG.error(msg) raise CatalogApiException(e.message) - def update_entity(self, entity, **metadata): """ PUT /api/v3/entities/:id @@ -502,7 +486,6 @@ def update_entity(self, entity, **metadata): LOG.error(msg) raise CatalogApiException(e.message) - def get_cluster_source_ids(self): params = { 'query': 'clusterName:"%s"' % get_navigator_hue_server_name(), @@ -512,14 +495,12 @@ def get_cluster_source_ids(self): LOG.info(params) return self._root.get('entities', headers=self.__headers, params=params) - def add_tags(self, entity_id, tags): entity = self.get_entity(entity_id) new_tags = entity['tags'] or [] new_tags.extend(tags) return self.update_entity(entity, tags=new_tags) - def delete_tags(self, entity_id, tags): entity = self.get_entity(entity_id) new_tags = entity['tags'] or [] @@ -528,7 +509,6 @@ def delete_tags(self, entity_id, tags): new_tags.remove(tag) return self.update_entity(entity, tags=new_tags) - def update_properties(self, entity_id, properties, modified_custom_metadata=None, deleted_custom_metadata_keys=None): entity = self.get_entity(entity_id) @@ -542,7 +522,6 @@ def update_properties(self, entity_id, properties, modified_custom_metadata=None del properties['properties'][key] return self.update_entity(entity, **properties) - def delete_metadata_properties(self, entity_id, property_keys): entity = self.get_entity(entity_id) new_props = entity['properties'] or {} @@ -551,7 +530,6 @@ def delete_metadata_properties(self, entity_id, property_keys): del new_props[key] return self.update_entity(entity, properties=new_props) - def get_lineage(self, entity_id): """ GET /api/v3/lineage/entityIds=:id @@ -570,7 +548,6 @@ def get_lineage(self, entity_id): LOG.error(msg) raise CatalogApiException(e.message) - def create_namespace(self, namespace, description=None): try: data = json.dumps({'name': namespace, 'description': description}) @@ -580,7 +557,6 @@ def create_namespace(self, namespace, description=None): LOG.error(msg) raise CatalogApiException(e.message) - def get_namespace(self, namespace): try: return self._root.get('models/namespaces/%(namespace)s' % {'namespace': namespace}) @@ -589,7 +565,6 @@ def get_namespace(self, namespace): LOG.error(msg) raise CatalogApiException(e.message) - def create_namespace_property(self, namespace, properties): try: data = json.dumps(properties) @@ -605,7 +580,6 @@ def create_namespace_property(self, namespace, properties): LOG.error(msg) raise CatalogApiException(e.message) - def get_namespace_properties(self, namespace): try: return self._root.get('models/namespaces/%(namespace)s/properties' % {'namespace': namespace}) @@ -614,14 +588,13 @@ def get_namespace_properties(self, namespace): LOG.error(msg) raise CatalogApiException(e.message) - def map_namespace_property(self, clazz, properties): try: data = json.dumps(properties) return self._root.post( 'models/packages/nav/classes/%(class)s/properties' % {'class': clazz}, - data=data, - contenttype=_JSON_CONTENT_TYPE, + data=data, + contenttype=_JSON_CONTENT_TYPE, clear_cookies=True ) @@ -630,7 +603,6 @@ def map_namespace_property(self, clazz, properties): LOG.error(msg) raise CatalogApiException(e.message) - def get_model_properties_mapping(self): try: return self._root.get('models/properties/mappings') @@ -639,7 +611,6 @@ def get_model_properties_mapping(self): LOG.error(msg) raise CatalogApiException(e.message) - def _fillup_properties(self): global _HAS_CATALOG_NAMESPACE @@ -657,7 +628,7 @@ def _fillup_properties(self): "description": "List of Hue document UUIDs related to this entity", "multiValued": True, "maxLength": 36, - "pattern": ".*", # UUID + "pattern": ".*", # UUID "enumValues": None, "type": "TEXT" }) @@ -671,7 +642,6 @@ def _fillup_properties(self): _HAS_CATALOG_NAMESPACE = True - def _get_boosted_term(self, term): return 'AND'.join([ # Matching fields @@ -683,10 +653,8 @@ def _get_boosted_term(self, term): # Could add certain customProperties and properties ]) - def _clean_path(self, path): return path.rstrip('/').split('/')[-1], self._escape_slashes(path.rstrip('/')) - def _escape_slashes(self, s): - return s.replace('/', '\/') + return s.replace('/', r'\/') diff --git a/desktop/libs/metadata/src/metadata/catalog_api.py b/desktop/libs/metadata/src/metadata/catalog_api.py index 9293c2a3d72..27b5636a8ef 100644 --- a/desktop/libs/metadata/src/metadata/catalog_api.py +++ b/desktop/libs/metadata/src/metadata/catalog_api.py @@ -16,30 +16,23 @@ # See the License for the specific language governing permissions and # limitations under the License. -from builtins import next -import json -import logging import re import sys - +import json +import logging +from builtins import next from collections import OrderedDict from django.http import Http404 from django.utils.html import escape +from django.utils.translation import gettext as _ from django.views.decorators.http import require_POST from desktop.lib.django_util import JsonResponse from desktop.lib.i18n import force_unicode, smart_str - from metadata.catalog.base import get_api -from metadata.catalog.navigator_client import CatalogApiException, CatalogEntityDoesNotExistException, CatalogAuthException -from metadata.conf import has_catalog, CATALOG, has_catalog_file_search, NAVIGATOR - -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - +from metadata.catalog.navigator_client import CatalogApiException, CatalogAuthException, CatalogEntityDoesNotExistException +from metadata.conf import CATALOG, NAVIGATOR, has_catalog, has_catalog_file_search LOG = logging.getLogger() @@ -111,7 +104,7 @@ def search_entities_interactive(request): sources=sources ) - if response.get('facets'): # Remove empty facets + if response.get('facets'): # Remove empty facets for fname, fvalues in list(response['facets'].items()): # Should be a CATALOG option at some point for hidding table with no access / asking for access. if interface == 'navigator' and NAVIGATOR.APPLY_SENTRY_PERMISSIONS.get(): @@ -122,7 +115,6 @@ def search_entities_interactive(request): if ':' in query_s and not response['facets'][fname]: del response['facets'][fname] - _augment_highlighting(query_s, response.get('results')) response['status'] = 0 @@ -189,14 +181,14 @@ def _augment_highlighting(query_s, records): if record['hue_name'] and record.get('sourceType', '') != 'S3': record['hue_name'] = (record['hue_name'].replace('/', '.') + '.').lstrip('.') - record['originalName'] = record['hue_name'] + name # Inserted when selected in autocomplete, full path - record['selectionName'] = name # Use when hovering / selecting a search result + record['originalName'] = record['hue_name'] + name # Inserted when selected in autocomplete, full path + record['selectionName'] = name # Use when hovering / selecting a search result for term in ts: name = _highlight(term, name) if record.get('tags'): _highlight_tags(record, term) - for fname, fval in fs.items(): # e.g. owner:hue + for fname, fval in fs.items(): # e.g. owner:hue if record.get(fname, ''): if fname == 'tags': _highlight_tags(record, fval) @@ -386,8 +378,8 @@ def delete_tags(request): def update_properties(request): interface = request.POST.get('interface', CATALOG.INTERFACE.get()) entity_id = json.loads(request.POST.get('id', '""')) - properties = json.loads(request.POST.get('properties', '{}')) # Entity properties - modified_custom_metadata = json.loads(request.POST.get('modifiedCustomMetadata', '{}')) # Aka "Custom Metadata" + properties = json.loads(request.POST.get('properties', '{}')) # Entity properties + modified_custom_metadata = json.loads(request.POST.get('modifiedCustomMetadata', '{}')) # Aka "Custom Metadata" deleted_custom_metadata_keys = json.loads(request.POST.get('deletedCustomMetadataKeys', '[]')) api = get_api(request=request, interface=interface) @@ -397,7 +389,8 @@ def update_properties(request): request.audit = { 'allowed': is_allowed, 'operation': '%s_UPDATE_PROPERTIES' % interface.upper(), - 'operationText': 'Updating custom metadata %s, deleted custom metadata keys %s and properties %s of entity %s' % (modified_custom_metadata, deleted_custom_metadata_keys, properties, entity_id) + 'operationText': 'Updating custom metadata %s, deleted custom metadata keys %s and properties %s of entity %s' % ( + modified_custom_metadata, deleted_custom_metadata_keys, properties, entity_id) } if not entity_id: diff --git a/desktop/libs/metadata/src/metadata/conf.py b/desktop/libs/metadata/src/metadata/conf.py index f37635f0a35..f455c3e070b 100644 --- a/desktop/libs/metadata/src/metadata/conf.py +++ b/desktop/libs/metadata/src/metadata/conf.py @@ -15,24 +15,18 @@ # See the License for the specific language governing permissions and # limitations under the License. -import logging import os import sys - +import logging from subprocess import CalledProcessError +from django.utils.translation import gettext_lazy as _t + from desktop.conf import AUTH_USERNAME as DEFAULT_AUTH_USERNAME, CLUSTER_ID as DEFAULT_CLUSTER_ID from desktop.lib.conf import Config, ConfigSection, coerce_bool, coerce_password_from_script from desktop.lib.paths import get_config_root, get_desktop_root - -from metadata.settings import DJANGO_APPS from metadata.catalog import atlas_flags - -if sys.version_info[0] > 2: - from django.utils.translation import gettext_lazy as _t -else: - from django.utils.translation import ugettext_lazy as _t - +from metadata.settings import DJANGO_APPS OPTIMIZER_AUTH_PASSWORD = None NAVIGATOR_AUTH_PASSWORD = None @@ -44,14 +38,17 @@ def get_auth_username(): """Get from top level default from desktop""" return DEFAULT_AUTH_USERNAME.get() + def default_catalog_url(): """Get from main Hue config directory if present""" return atlas_flags.get_api_url() if atlas_flags.get_api_url() else None + def default_catalog_config_dir(): """Get from usual main Hue config directory""" return os.environ.get("HUE_CONF_DIR", get_desktop_root("conf")) + '/hive-conf' + def default_catalog_interface(): """Detect if the configured catalog is Navigator or default to Atlas""" from metadata.metadata_sites import get_navigator_server_url @@ -62,10 +59,12 @@ def default_catalog_interface(): catalog_interface = 'navigator' return catalog_interface + def default_navigator_config_dir(): """Get from usual main Hue config directory""" return get_config_root() + def default_navigator_url(): """Get from usual main Hue config directory""" from metadata.metadata_sites import get_navigator_server_url @@ -75,12 +74,15 @@ def default_navigator_url(): def get_optimizer_url(): return OPTIMIZER.HOSTNAME.get() and OPTIMIZER.HOSTNAME.get().strip('/') + def has_optimizer(): return OPTIMIZER.INTERFACE.get() != 'navopt' or bool(OPTIMIZER.AUTH_KEY_ID.get()) + def get_optimizer_mode(): return has_optimizer() and OPTIMIZER.MODE.get() or 'off' + def has_workload_analytics(): # Note: unused return bool(ALTUS.AUTH_KEY_ID.get()) and ALTUS.HAS_WA.get() @@ -265,9 +267,11 @@ def get_optimizer_password_script(): # Data Catalog + def get_catalog_url(): return (CATALOG.API_URL.get() and CATALOG.API_URL.get().strip('/')) or (CATALOG.INTERFACE.get() == 'navigator' and get_navigator_url()) + def has_catalog(user): from desktop.auth.backend import is_admin return ( @@ -276,16 +280,20 @@ def has_catalog(user): is_admin(user) or user.has_hue_permission(action="access", app=DJANGO_APPS[0]) ) + def has_readonly_catalog(user): return has_catalog(user) and not has_navigator(user) + def get_catalog_search_cluster(): return CATALOG.SEARCH_CLUSTER.get() + def get_kerberos_enabled_default(): '''Use atlas.authentication.method.kerberos if catalog interface is atlas else False ''' return atlas_flags.is_kerberos_enabled() if CATALOG.INTERFACE.get() == 'atlas' else False + def get_catalog_server_password_script(): '''Execute script at path''' return CATALOG.SERVER_PASSWORD_SCRIPT.get() @@ -356,17 +364,21 @@ def get_catalog_server_password_script(): # Navigator is deprecated over generic Catalog above + def get_navigator_url(): return NAVIGATOR.API_URL.get() and NAVIGATOR.API_URL.get().strip('/')[:-3] + def has_navigator(user): from desktop.auth.backend import is_admin return bool(get_navigator_url() and get_navigator_auth_password()) \ and (is_admin(user) or user.has_hue_permission(action="access", app=DJANGO_APPS[0])) + def get_navigator_auth_type(): return NAVIGATOR.AUTH_TYPE.get().lower() + def get_navigator_auth_username(): '''Get the username to authenticate with.''' @@ -377,6 +389,7 @@ def get_navigator_auth_username(): else: return NAVIGATOR.AUTH_CM_USERNAME.get() + def get_navigator_auth_password(): '''Get the password to authenticate with.''' global NAVIGATOR_AUTH_PASSWORD @@ -394,18 +407,22 @@ def get_navigator_auth_password(): return NAVIGATOR_AUTH_PASSWORD + def get_navigator_cm_password(): '''Get default password from secured file''' return NAVIGATOR.AUTH_CM_PASSWORD_SCRIPT.get() + def get_navigator_ldap_password(): '''Get default password from secured file''' return NAVIGATOR.AUTH_LDAP_PASSWORD_SCRIPT.get() + def get_navigator_saml_password(): '''Get default password from secured file''' return NAVIGATOR.AUTH_SAML_PASSWORD_SCRIPT.get() + def has_catalog_file_search(user): return has_catalog(user) and NAVIGATOR.ENABLE_FILE_SEARCH.get() diff --git a/desktop/libs/metadata/src/metadata/dataeng_api.py b/desktop/libs/metadata/src/metadata/dataeng_api.py index e5cb22b7321..11cc0457196 100644 --- a/desktop/libs/metadata/src/metadata/dataeng_api.py +++ b/desktop/libs/metadata/src/metadata/dataeng_api.py @@ -15,23 +15,17 @@ # See the License for the specific language governing permissions and # limitations under the License. -import logging -import json import sys +import json +import logging +from django.utils.translation import gettext as _ from django.views.decorators.http import require_POST from desktop.lib.django_util import JsonResponse from desktop.lib.i18n import force_unicode - from notebook.connectors.altus import DataEngApi -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - - LOG = logging.getLogger() diff --git a/desktop/libs/metadata/src/metadata/manager_api.py b/desktop/libs/metadata/src/metadata/manager_api.py index a89efeb54f6..5cc1f8dbb04 100644 --- a/desktop/libs/metadata/src/metadata/manager_api.py +++ b/desktop/libs/metadata/src/metadata/manager_api.py @@ -16,31 +16,25 @@ # See the License for the specific language governing permissions and # limitations under the License. -import json -import logging import os import sys +import json +import logging from django.http import Http404 from django.utils.html import escape +from django.utils.translation import gettext as _ from django.views.decorators.http import require_POST from desktop.auth.backend import is_admin from desktop.lib.django_util import JsonResponse from desktop.lib.i18n import force_unicode -from libzookeeper.conf import zkensemble from indexer.conf import config_morphline_path - +from libzookeeper.conf import zkensemble from metadata.catalog.navigator_client import CatalogApiException from metadata.conf import has_catalog from metadata.manager_client import ManagerApi -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - - LOG = logging.getLogger() @@ -52,7 +46,7 @@ def decorator(*args, **kwargs): } try: - if has_catalog(args[0].user): # TODO + if has_catalog(args[0].user): # TODO return view_fn(*args, **kwargs) else: raise CatalogApiException('Navigator API is not configured.') @@ -125,7 +119,6 @@ def update_flume_config(request): tier1.sinks.sink1.morphlineId = hue_accesslogs_no_geo tier1.sinks.sink1.channel = channel1''' - morphline_config = open(os.path.join(config_morphline_path(), 'hue_accesslogs_no_geo.morphline.conf')).read() morphline_config = morphline_config.replace( '${SOLR_COLLECTION}', 'log_analytics_demo' @@ -135,8 +128,10 @@ def update_flume_config(request): responses = {} - responses['agent_config_file'] = api.update_flume_config(cluster_name=None, config_name='agent_config_file', config_value=flume_agent_config) - responses['agent_morphlines_conf_file'] = api.update_flume_config(cluster_name=None, config_name='agent_morphlines_conf_file', config_value=morphline_config) + responses['agent_config_file'] = api.update_flume_config( + cluster_name=None, config_name='agent_config_file', config_value=flume_agent_config) + responses['agent_morphlines_conf_file'] = api.update_flume_config( + cluster_name=None, config_name='agent_morphlines_conf_file', config_value=morphline_config) responses['refresh_flume'] = api.refresh_flume(cluster_name=None, restart=True) diff --git a/desktop/libs/metadata/src/metadata/manager_client.py b/desktop/libs/metadata/src/metadata/manager_client.py index b40078990fa..411971a419a 100644 --- a/desktop/libs/metadata/src/metadata/manager_client.py +++ b/desktop/libs/metadata/src/metadata/manager_client.py @@ -16,29 +16,20 @@ # See the License for the specific language governing permissions and # limitations under the License. -from future import standard_library -standard_library.install_aliases() -from builtins import object -import base64 +import sys import json +import base64 import logging -import sys +from builtins import object +from urllib.parse import quote as urllib_quote from django.core.cache import cache +from django.utils.translation import gettext as _ -from desktop.lib.rest.http_client import RestException, HttpClient +from desktop.lib.i18n import smart_str +from desktop.lib.rest.http_client import HttpClient, RestException from desktop.lib.rest.resource import Resource -from desktop.lib.i18n import smart_unicode - -from metadata.conf import MANAGER, get_navigator_auth_username, get_navigator_auth_password - - -if sys.version_info[0] > 2: - from urllib.parse import quote as urllib_quote - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - from urllib import quote as urllib_quote +from metadata.conf import MANAGER, get_navigator_auth_password, get_navigator_auth_username LOG = logging.getLogger() VERSION = 'v19' @@ -52,7 +43,7 @@ def __str__(self): return str(self.message) def __unicode__(self): - return smart_unicode(self.message) + return smart_str(self.message) class ManagerApi(object): @@ -76,7 +67,6 @@ def __init__(self, user=None, security_enabled=False, ssl_cert_ca_verify=False): self._client.set_verify(ssl_cert_ca_verify) self._root = Resource(self._client) - def has_service(self, service_name, cluster_name=None): cluster = self._get_cluster(cluster_name) try: @@ -89,7 +79,6 @@ def has_service(self, service_name, cluster_name=None): except RestException as e: raise ManagerApiException(e) - def get_spark_history_server_configs(self, cluster_name=None): service_name = "SPARK_ON_YARN" shs_role_type = "SPARK_YARN_HISTORY_SERVER" @@ -103,7 +92,6 @@ def get_spark_history_server_configs(self, cluster_name=None): service_display_names = [service['displayName'] for service in services if service['type'] == service_name] - if service_display_names: spark_service_display_name = service_display_names[0] @@ -118,7 +106,8 @@ def get_spark_history_server_configs(self, cluster_name=None): shs_server_hostId = shs_server_hostRef[0]['hostId'] if shs_server_hostRef else None if shs_server_name and shs_server_hostId: - shs_server_configs = self._root.get('clusters/%(cluster_name)s/services/%(spark_service_display_name)s/roles/%(shs_server_name)s/config' % { + shs_server_configs = self._root.get( + 'clusters/%(cluster_name)s/services/%(spark_service_display_name)s/roles/%(shs_server_name)s/config' % { 'cluster_name': cluster['name'], 'spark_service_display_name': spark_service_display_name, 'shs_server_name': shs_server_name @@ -187,7 +176,6 @@ def tools_echo(self): except RestException as e: raise ManagerApiException(e) - def get_kafka_brokers(self, cluster_name=None): try: @@ -199,7 +187,6 @@ def get_kafka_brokers(self, cluster_name=None): except RestException as e: raise ManagerApiException(e) - def get_kudu_master(self, cluster_name=None): try: cluster = self._get_cluster(cluster_name) @@ -214,7 +201,6 @@ def get_kudu_master(self, cluster_name=None): except RestException as e: raise ManagerApiException(e) - def get_kafka_topics(self, broker_host): try: client = HttpClient('http://%s:24042' % broker_host, logger=LOG) @@ -224,14 +210,13 @@ def get_kafka_topics(self, broker_host): except RestException as e: raise ManagerApiException(e) - def update_flume_config(self, cluster_name, config_name, config_value): service = 'FLUME-1' cluster = self._get_cluster(cluster_name) roleConfigGroup = [role['roleConfigGroupRef']['roleConfigGroupName'] for role in self._get_roles(cluster['name'], service, 'AGENT')] data = { u'items': [{ - u'url': u'/api/v8/clusters/%(cluster_name)s/services/%(service)s/roleConfigGroups/%(roleConfigGroups)s/config?message=Updated%20service%20and%20role%20type%20configurations.'.replace('%(cluster_name)s', urllib_quote(cluster['name'])).replace('%(service)s', service).replace('%(roleConfigGroups)s', roleConfigGroup[0]), + u'url': u'/api/v8/clusters/%(cluster_name)s/services/%(service)s/roleConfigGroups/%(roleConfigGroups)s/config?message=Updated%20service%20and%20role%20type%20configurations.'.replace('%(cluster_name)s', urllib_quote(cluster['name'])).replace('%(service)s', service).replace('%(roleConfigGroups)s', roleConfigGroup[0]), # noqa: E501 u'body': { u'items': [ {u'name': config_name, u'value': config_value} @@ -246,11 +231,9 @@ def update_flume_config(self, cluster_name, config_name, config_value): items=data ) - def get_flume_agents(self, cluster_name=None): return [host['hostname'] for host in self._get_hosts('FLUME', 'AGENT', cluster_name=cluster_name)] - def _get_hosts(self, service_name, role_name, cluster_name=None): try: cluster = self._get_cluster(cluster_name) @@ -265,7 +248,6 @@ def _get_hosts(self, service_name, role_name, cluster_name=None): except RestException as e: raise ManagerApiException(e) - def refresh_flume(self, cluster_name, restart=False): service = 'FLUME-1' cluster = self._get_cluster(cluster_name) @@ -276,13 +258,15 @@ def refresh_flume(self, cluster_name, restart=False): else: return self.refresh_configs(cluster['name'], service, roles) - def refresh_configs(self, cluster_name, service=None, roles=None): try: if service is None: - return self._root.post('clusters/%(cluster_name)s/commands/refresh' % {'cluster_name': cluster_name}, contenttype="application/json") + return self._root.post( + 'clusters/%(cluster_name)s/commands/refresh' % {'cluster_name': cluster_name}, contenttype="application/json") elif roles is None: - return self._root.post('clusters/%(cluster_name)s/services/%(service)s/roleCommands/refresh' % {'cluster_name': cluster_name, 'service': service}, contenttype="application/json") + return self._root.post( + 'clusters/%(cluster_name)s/services/%(service)s/roleCommands/refresh' % {'cluster_name': cluster_name, 'service': service}, + contenttype="application/json") else: return self._root.post( 'clusters/%(cluster_name)s/services/%(service)s/roleCommands/refresh' % {'cluster_name': cluster_name, 'service': service}, @@ -292,13 +276,15 @@ def refresh_configs(self, cluster_name, service=None, roles=None): except RestException as e: raise ManagerApiException(e) - def restart_services(self, cluster_name, service=None, roles=None): try: if service is None: - return self._root.post('clusters/%(cluster_name)s/commands/restart' % {'cluster_name': cluster_name}, contenttype="application/json") + return self._root.post( + 'clusters/%(cluster_name)s/commands/restart' % {'cluster_name': cluster_name}, contenttype="application/json") elif roles is None: - return self._root.post('clusters/%(cluster_name)s/services/%(service)s/roleCommands/restart' % {'cluster_name': cluster_name, 'service': service}, contenttype="application/json") + return self._root.post( + 'clusters/%(cluster_name)s/services/%(service)s/roleCommands/restart' % {'cluster_name': cluster_name, 'service': service}, + contenttype="application/json") else: return self._root.post( 'clusters/%(cluster_name)s/services/%(service)s/roleCommands/restart' % {'cluster_name': cluster_name, 'service': service}, @@ -308,14 +294,12 @@ def restart_services(self, cluster_name, service=None, roles=None): except RestException as e: raise ManagerApiException(e) - def batch(self, items): try: return self._root.post('batch', data=json.dumps(items), contenttype='application/json') except RestException as e: raise ManagerApiException(e) - def _get_cluster(self, cluster_name=None): clusters = self._root.get('clusters/')['items'] @@ -326,12 +310,11 @@ def _get_cluster(self, cluster_name=None): return cluster - def _get_roles(self, cluster_name, service_name, role_type): - roles = self._root.get('clusters/%(cluster_name)s/services/%(service_name)s/roles' % {'cluster_name': cluster_name, 'service_name': service_name})['items'] + roles = self._root.get( + 'clusters/%(cluster_name)s/services/%(service_name)s/roles' % {'cluster_name': cluster_name, 'service_name': service_name})['items'] return [role for role in roles if role['type'] == role_type] - def get_impalad_config(self, key=None, impalad_host=None, cluster_name=None): if not key or not impalad_host: return None @@ -360,11 +343,13 @@ def get_impalad_config(self, key=None, impalad_host=None, cluster_name=None): 'spark_service_display_name': impala_service_display_name })['items'] - impalad_server_names = [server['name'] for server in servers if server['type'] == role_type and server['hostRef']['hostId'] == impalad_hostId] + impalad_server_names = [ + server['name'] for server in servers if server['type'] == role_type and server['hostRef']['hostId'] == impalad_hostId] impalad_server_name = impalad_server_names[0] if impalad_server_names else None if impalad_server_name: - server_configs = self._root.get('clusters/%(cluster_name)s/services/%(spark_service_display_name)s/roles/%(shs_server_name)s/config' % { + server_configs = self._root.get( + 'clusters/%(cluster_name)s/services/%(spark_service_display_name)s/roles/%(shs_server_name)s/config' % { 'cluster_name': cluster['name'], 'spark_service_display_name': impala_service_display_name, 'shs_server_name': impalad_server_name diff --git a/desktop/libs/metadata/src/metadata/metadata_sites_tests.py b/desktop/libs/metadata/src/metadata/metadata_sites_tests.py index 98f3fa19fad..aca62503a00 100644 --- a/desktop/libs/metadata/src/metadata/metadata_sites_tests.py +++ b/desktop/libs/metadata/src/metadata/metadata_sites_tests.py @@ -15,22 +15,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import absolute_import -from builtins import object -import logging import os import shutil -import sys +import logging import tempfile -from . import metadata_sites from metadata.conf import NAVIGATOR from metadata.metadata_sites import get_navigator_server_url -if sys.version_info[0] > 2: - open_file = open -else: - open_file = file +from . import metadata_sites LOG = logging.getLogger() @@ -44,7 +37,7 @@ def test_navigator_site(self): ] try: - open_file(os.path.join(tmpdir, 'navigator.lineage.client.properties'), 'w').write(""" + open(os.path.join(tmpdir, 'navigator.lineage.client.properties'), 'w').write(""" navigator.client.serviceType=HUE navigator.server.url=http://hue-rocks.com:7187 navigator.client.roleName=HUE-1-HUE_SERVER-50cf99601c4bf64e9ccded4c8cd96d12 @@ -62,7 +55,6 @@ def test_navigator_site(self): reset() shutil.rmtree(tmpdir) - def test_missing_navigator_site(self): tmpdir = tempfile.mkdtemp() shutil.rmtree(tmpdir) @@ -74,7 +66,7 @@ def test_missing_navigator_site(self): try: metadata_sites.reset() - assert get_navigator_server_url() == None + assert get_navigator_server_url() is None finally: metadata_sites.reset() for reset in resets: diff --git a/desktop/libs/metadata/src/metadata/optimizer/base.py b/desktop/libs/metadata/src/metadata/optimizer/base.py index 612fac7e26c..0c51e0a178b 100644 --- a/desktop/libs/metadata/src/metadata/optimizer/base.py +++ b/desktop/libs/metadata/src/metadata/optimizer/base.py @@ -18,13 +18,10 @@ import sys from builtins import object -from desktop.lib.exceptions_renderable import PopupException -from desktop.lib.i18n import smart_unicode +from django.utils.translation import gettext as _ -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ +from desktop.lib.exceptions_renderable import PopupException +from desktop.lib.i18n import smart_str def get_api(user, interface): @@ -49,7 +46,7 @@ def __str__(self): return str(self.message) def __unicode__(self): - return smart_unicode(self.message) + return smart_str(self.message) def check_privileges(view_func): diff --git a/desktop/libs/metadata/src/metadata/optimizer/dummy_client.py b/desktop/libs/metadata/src/metadata/optimizer/dummy_client.py index 9c855b04683..2dc12938d1c 100644 --- a/desktop/libs/metadata/src/metadata/optimizer/dummy_client.py +++ b/desktop/libs/metadata/src/metadata/optimizer/dummy_client.py @@ -16,19 +16,14 @@ # See the License for the specific language governing permissions and # limitations under the License. -import logging import sys +import logging -from desktop.lib.exceptions_renderable import PopupException +from django.utils.translation import gettext as _ +from desktop.lib.exceptions_renderable import PopupException from metadata.optimizer.base import Api -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - - LOG = logging.getLogger() @@ -37,19 +32,15 @@ class DummyClient(Api): def __init__(self, user, api_url=None, auth_key=None, auth_key_secret=None, tenant_id=None): self.user = user - def get_tenant(self, cluster_id='default'): pass - def upload(self, data, data_type='queries', source_platform='generic', workload_id=None): pass - def upload_status(self, workload_id): pass - def top_tables(self, workfloadId=None, database_name='default', page_size=1000, startingToken=None, connector=None): data = { 'results': [{ @@ -82,15 +73,12 @@ def top_tables(self, workfloadId=None, database_name='default', page_size=1000, return data - def table_details(self, database_name, table_name, page_size=100, startingToken=None, connector=None): return {} - def query_compatibility(self, source_platform, target_platform, query, page_size=100, startingToken=None, connector=None): return {} - def query_risk(self, query, source_platform, db_name, page_size=100, startingToken=None, connector=None): hints = [] response = {} @@ -101,7 +89,6 @@ def query_risk(self, query, source_platform, db_name, page_size=100, startingTok 'noDDL': response.get('noDDL', []), } - def predict(self, query, source_platform, connector): hints = [] response = {} @@ -110,23 +97,19 @@ def predict(self, query, source_platform, connector): 'hints': hints, } - def similar_queries(self, source_platform, query, page_size=100, startingToken=None, connector=None): raise PopupException(_('Call not supported')) - def top_filters(self, db_tables=None, page_size=100, startingToken=None, connector=None): results = {'results': []} return results - def top_aggs(self, db_tables=None, page_size=100, startingToken=None, connector=None): results = {'results': []} return results - def top_columns(self, db_tables=None, page_size=100, startingToken=None, connector=None): results = { 'selectColumns': [{ @@ -145,13 +128,11 @@ def top_columns(self, db_tables=None, page_size=100, startingToken=None, connect return results - def top_joins(self, db_tables=None, page_size=100, startingToken=None, connector=None): results = {'results': []} return results - def top_databases(self, page_size=100, startingToken=None, connector=None): results = {'results': []} diff --git a/desktop/libs/metadata/src/metadata/optimizer/optimizer_client.py b/desktop/libs/metadata/src/metadata/optimizer/optimizer_client.py index e1cba476e54..f0717692543 100644 --- a/desktop/libs/metadata/src/metadata/optimizer/optimizer_client.py +++ b/desktop/libs/metadata/src/metadata/optimizer/optimizer_client.py @@ -16,35 +16,28 @@ # See the License for the specific language governing permissions and # limitations under the License. -from builtins import object -import json -import logging import os import sys +import json import time import uuid - +import logging +from builtins import object from tempfile import NamedTemporaryFile from django.core.cache import cache from django.utils.functional import wraps +from django.utils.translation import gettext as _ from desktop.auth.backend import is_admin -from desktop.lib.exceptions_renderable import PopupException from desktop.lib import export_csvxls -from desktop.lib.i18n import smart_unicode +from desktop.lib.exceptions_renderable import PopupException +from desktop.lib.i18n import smart_str from desktop.lib.rest.http_client import RestException +from libsentry.privilege_checker import MissingSentryPrivilegeException, get_checker from libsentry.sentry_site import get_hive_sentry_provider -from libsentry.privilege_checker import get_checker, MissingSentryPrivilegeException - from metadata.conf import OPTIMIZER, get_optimizer_url -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - - LOG = logging.getLogger() _JSON_CONTENT_TYPE = 'application/json' @@ -64,7 +57,7 @@ def __str__(self): return str(self.message) def __unicode__(self): - return smart_unicode(self.message) + return smart_str(self.message) def check_privileges(view_func): @@ -111,8 +104,7 @@ def __init__(self, user, api_url=None, auth_key=None, auth_key_secret=None, tena self._api = ApiLib("navopt", self._api_url, self._auth_key, self._auth_key_secret) - self._tenant_id = tenant_id if tenant_id else _get_tenant_id(self) # Aka "workload" - + self._tenant_id = tenant_id if tenant_id else _get_tenant_id(self) # Aka "workload" def _call(self, *kwargs): start_time = time.time() @@ -130,11 +122,9 @@ def _call(self, *kwargs): else: return data - def get_tenant(self, cluster_id='default'): return self._call('getTenant', {'clusterId': cluster_id}) - def upload(self, data, data_type='queries', source_platform='generic', workload_id=None): if data_type in ('table_stats', 'cols_stats'): data_suffix = '.json' @@ -157,7 +147,7 @@ def upload(self, data, data_type='queries', source_platform='generic', workload_ } f_queries_path = NamedTemporaryFile(suffix=data_suffix) - f_queries_path.close() # Reopened as real file below to work well with the command + f_queries_path.close() # Reopened as real file below to work well with the command try: f_queries = open(f_queries_path.name, 'w+') @@ -186,7 +176,7 @@ def upload(self, data, data_type='queries', source_platform='generic', workload_ } parameters.update(extra_parameters) response = self._api.call_api('upload', parameters) - status = json.loads(response) # Workaround getting back a string + status = json.loads(response) # Workaround getting back a string status['count'] = len(data) return status @@ -196,7 +186,6 @@ def upload(self, data, data_type='queries', source_platform='generic', workload_ finally: os.remove(f_queries_path.name) - def upload_status(self, workload_id): return self._call('uploadStatus', {'tenant': self._tenant_id, 'workloadId': workload_id}) @@ -213,7 +202,6 @@ def top_tables(self, workfloadId=None, database_name='default', page_size=1000, } ) - @check_privileges def table_details(self, database_name, table_name, page_size=100, startingToken=None, connector=None): return self._call( @@ -227,7 +215,6 @@ def table_details(self, database_name, table_name, page_size=100, startingToken= } ) - def query_compatibility(self, source_platform, target_platform, query, page_size=100, startingToken=None, connector=None): return self._call( 'getQueryCompatible', { @@ -240,7 +227,6 @@ def query_compatibility(self, source_platform, target_platform, query, page_size } ) - def query_risk(self, query, source_platform, db_name, page_size=100, startingToken=None, connector=None): response = self._call( 'getQueryRisk', { @@ -265,7 +251,6 @@ def query_risk(self, query, source_platform, db_name, page_size=100, startingTok 'noDDL': response.get('noDDL', []), } - def predict(self, before_cursor, after_cursor, connector): response = self._call( 'predict', { @@ -282,7 +267,6 @@ def predict(self, before_cursor, after_cursor, connector): 'statement': predictions and predictions[0]['statement'] } - def similar_queries(self, source_platform, query, page_size=100, startingToken=None, connector=None): if is_admin(self.user): return self._call( @@ -298,7 +282,6 @@ def similar_queries(self, source_platform, query, page_size=100, startingToken=N else: raise PopupException(_('Call not supported')) - @check_privileges def top_filters(self, db_tables=None, page_size=100, startingToken=None, connector=None): args = { @@ -312,7 +295,6 @@ def top_filters(self, db_tables=None, page_size=100, startingToken=None, connect return self._call('getTopFilters', args) - @check_privileges def top_aggs(self, db_tables=None, page_size=100, startingToken=None, connector=None): args = { @@ -339,7 +321,6 @@ def getkey(table): return results - @check_privileges def top_columns(self, db_tables=None, page_size=100, startingToken=None, connector=None): args = { @@ -358,7 +339,6 @@ def top_columns(self, db_tables=None, page_size=100, startingToken=None, connect results[section] = list(_secure_results(results[section], self.user)) return results - @check_privileges def top_joins(self, db_tables=None, page_size=100, startingToken=None, connector=None): args = { @@ -381,7 +361,6 @@ def top_joins(self, db_tables=None, page_size=100, startingToken=None, connector results['results'] = filtered_joins return results - def top_databases(self, page_size=100, startingToken=None, connector=None): args = { 'tenant': self._tenant_id, diff --git a/desktop/libs/metadata/src/metadata/optimizer/optimizer_rest_client.py b/desktop/libs/metadata/src/metadata/optimizer/optimizer_rest_client.py index 871db111352..2bfc352d474 100644 --- a/desktop/libs/metadata/src/metadata/optimizer/optimizer_rest_client.py +++ b/desktop/libs/metadata/src/metadata/optimizer/optimizer_rest_client.py @@ -16,22 +16,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +import sys import json import logging -import sys + +from django.utils.translation import gettext as _ from desktop.lib.rest.http_client import HttpClient from desktop.lib.rest.resource import Resource - from metadata.conf import OPTIMIZER, get_optimizer_url from metadata.optimizer.optimizer_client import OptimizerClient -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - - LOG = logging.getLogger() _JSON_CONTENT_TYPE = 'application/json' @@ -48,11 +43,10 @@ def __init__(self, user, api_url=None, auth_key=None, auth_key_secret=None, tena self._api = MockApiLib() - def _call(self, path, data): try: return self._root.post(path, data=json.dumps(data), contenttype=_JSON_CONTENT_TYPE) - except: + except Exception: LOG.exception('Error calling Optimize service') return {} diff --git a/desktop/libs/metadata/src/metadata/optimizer_api.py b/desktop/libs/metadata/src/metadata/optimizer_api.py index 5b84bf51d54..ddc9ef3bca9 100644 --- a/desktop/libs/metadata/src/metadata/optimizer_api.py +++ b/desktop/libs/metadata/src/metadata/optimizer_api.py @@ -15,14 +15,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -from builtins import object -import base64 import json -import logging import struct -import sys +import logging +from base64 import decodebytes from django.http import Http404 +from django.utils.translation import gettext as _ from django.views.decorators.http import require_POST from desktop.auth.backend import is_admin @@ -30,28 +29,19 @@ from desktop.lib.i18n import force_unicode from desktop.models import Document2 from libsentry.privilege_checker import MissingSentryPrivilegeException +from metadata.conf import OPTIMIZER +from metadata.optimizer.base import get_api +from metadata.optimizer.optimizer_client import NavOptException, _clean_query, _get_table_name from notebook.api import _get_statement from notebook.models import Notebook from notebook.sql_utils import get_current_statement -from metadata.optimizer.base import get_api -from metadata.optimizer.optimizer_client import NavOptException, _get_table_name, _clean_query -from metadata.conf import OPTIMIZER - -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ - from base64 import decodebytes -else: - from django.utils.translation import ugettext as _ - from base64 import decodestring as decodebytes - LOG = logging.getLogger() try: from beeswax.api import get_table_stats from beeswax.design import hql_query - from metastore.views import _get_db except ImportError as e: LOG.warning("Hive lib not enabled") @@ -280,7 +270,7 @@ def top_filters(request): interface = request.POST.get('interface', OPTIMIZER.INTERFACE.get()) connector = json.loads(request.POST.get('connector', '{}')) db_tables = json.loads(request.POST.get('dbTables', '[]')) - column_name = request.POST.get('columnName') # Unused + column_name = request.POST.get('columnName') # Unused api = get_api(request.user, interface) @@ -394,7 +384,7 @@ def _convert_queries(queries_data): for query_data in queries_data: try: snippet = query_data['snippets'][0] - if 'guid' in snippet['result']['handle']: # Not failed query + if 'guid' in snippet['result']['handle']: # Not failed query guid = snippet['result']['handle']['guid'] if isinstance(guid, str): guid = guid.encode('utf-8') @@ -499,7 +489,6 @@ def upload_table_stats(request): if not OPTIMIZER.AUTO_UPLOAD_STATS.get(): with_table_stats = with_columns_stats = False - for db_table in db_tables: path = _get_table_name(db_table) @@ -520,7 +509,7 @@ def upload_table_stats(request): stats = dict((stat['data_type'], stat['comment']) for stat in full_table_stats['stats']) table_stats.append({ - 'table_name': '%(database)s.%(table)s' % path, # DB Prefix + 'table_name': '%(database)s.%(table)s' % path, # DB Prefix 'num_rows': stats.get('numRows', -1), 'last_modified_time': stats.get('transient_lastDdlTime', -1), 'total_size': stats.get('totalSize', -1), @@ -554,7 +543,7 @@ def upload_table_stats(request): for col_stats in raw_column_stats: column_stats.append({ - 'table_name': '%(database)s.%(table)s' % path, # DB Prefix + 'table_name': '%(database)s.%(table)s' % path, # DB Prefix 'column_name': col_stats['col_name'], 'data_type': col_stats['data_type'], "num_distinct": int(col_stats.get('distinct_count')) if col_stats.get('distinct_count') != '' else -1, diff --git a/desktop/libs/metadata/src/metadata/optimizer_api_tests.py b/desktop/libs/metadata/src/metadata/optimizer_api_tests.py index f06e164bd24..19a97cd58a9 100644 --- a/desktop/libs/metadata/src/metadata/optimizer_api_tests.py +++ b/desktop/libs/metadata/src/metadata/optimizer_api_tests.py @@ -15,33 +15,26 @@ # See the License for the specific language governing permissions and # limitations under the License. -from builtins import zip +import sys import json import logging -import pytest -import sys +from builtins import zip +from unittest.mock import Mock, patch -from django.urls import reverse +import pytest from django.test import TestCase +from django.urls import reverse from desktop.auth.backend import rewrite_user from desktop.conf import ENABLE_ORGANIZATIONS from desktop.lib.django_test_util import make_logged_in_client from desktop.lib.test_utils import add_to_group, grant_access -from useradmin.models import User - from metadata.optimizer_api import _convert_queries - -if sys.version_info[0] > 2: - from unittest.mock import patch, Mock -else: - from mock import patch, Mock - +from useradmin.models import User LOG = logging.getLogger() - @pytest.mark.django_db class TestApi(): @@ -54,7 +47,6 @@ def setup_method(self): add_to_group('test') grant_access("test", "test", "metadata") - def test_risk_ui_api(self): snippet = { "id": "2b7d1f46-17a0-30af-efeb-33d4c29b1055", @@ -112,13 +104,11 @@ def setup_class(cls): grant_access("test", "test", "metadata") grant_access("test", "test", "optimizer") - @classmethod def teardown_class(cls): cls.user.is_superuser = False cls.user.save() - # Should run first def test_upload(self): query_docs = [ diff --git a/desktop/libs/metadata/src/metadata/prometheus_api.py b/desktop/libs/metadata/src/metadata/prometheus_api.py index 809749ac1ab..838a463c0d7 100644 --- a/desktop/libs/metadata/src/metadata/prometheus_api.py +++ b/desktop/libs/metadata/src/metadata/prometheus_api.py @@ -16,24 +16,18 @@ # See the License for the specific language governing permissions and # limitations under the License. +import sys import json import logging -import sys from django.utils.html import escape +from django.utils.translation import gettext as _ from django.views.decorators.http import require_POST from desktop.lib.django_util import JsonResponse from desktop.lib.i18n import force_unicode - from metadata.prometheus_client import PrometheusApi -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - - LOG = logging.getLogger() diff --git a/desktop/libs/metadata/src/metadata/prometheus_client.py b/desktop/libs/metadata/src/metadata/prometheus_client.py index 93643c1dd04..9b7bbe1b729 100644 --- a/desktop/libs/metadata/src/metadata/prometheus_client.py +++ b/desktop/libs/metadata/src/metadata/prometheus_client.py @@ -16,24 +16,18 @@ # See the License for the specific language governing permissions and # limitations under the License. -from builtins import object -import logging import sys +import logging +from builtins import object from django.core.cache import cache +from django.utils.translation import gettext as _ -from desktop.lib.rest.http_client import RestException, HttpClient +from desktop.lib.i18n import smart_str +from desktop.lib.rest.http_client import HttpClient, RestException from desktop.lib.rest.resource import Resource -from desktop.lib.i18n import smart_unicode - from metadata.conf import PROMETHEUS -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - - LOG = logging.getLogger() VERSION = 'v1' @@ -46,7 +40,7 @@ def __str__(self): return str(self.message) def __unicode__(self): - return smart_unicode(self.message) + return smart_str(self.message) class PrometheusApi(object): @@ -60,7 +54,6 @@ def __init__(self, user=None, ssl_cert_ca_verify=False): self._client.set_verify(ssl_cert_ca_verify) self._root = Resource(self._client) - def query(self, query): try: return self._root.get('query', { diff --git a/desktop/libs/metadata/src/metadata/urls.py b/desktop/libs/metadata/src/metadata/urls.py index 1353de30086..4a521164906 100644 --- a/desktop/libs/metadata/src/metadata/urls.py +++ b/desktop/libs/metadata/src/metadata/urls.py @@ -17,16 +17,17 @@ import sys -from metadata import catalog_api as metadata_catalog_api, analytic_db_api, dataeng_api, prometheus_api -from metadata import optimizer_api as metadata_optimizer_api -from metadata import workload_analytics_api as metadata_workload_analytics_api -from metadata import manager_api as metadata_manager_api - -if sys.version_info[0] > 2: - from django.urls import re_path -else: - from django.conf.urls import url as re_path - +from django.urls import re_path + +from metadata import ( + analytic_db_api, + catalog_api as metadata_catalog_api, + dataeng_api, + manager_api as metadata_manager_api, + optimizer_api as metadata_optimizer_api, + prometheus_api, + workload_analytics_api as metadata_workload_analytics_api, +) # Catalog urlpatterns = [ @@ -78,7 +79,7 @@ re_path(r'^api/optimizer/upload/table_stats/?$', metadata_optimizer_api.upload_table_stats, name='upload_table_stats'), re_path(r'^api/optimizer/upload/status/?$', metadata_optimizer_api.upload_status, name='upload_status'), - #v2 + # v2 re_path(r'^api/optimizer/get_tenant/?$', metadata_optimizer_api.get_tenant, name='get_tenant'), re_path(r'^api/optimizer/top_databases/?$', metadata_optimizer_api.top_databases, name='top_databases'), diff --git a/desktop/libs/metadata/src/metadata/workload_analytics_api.py b/desktop/libs/metadata/src/metadata/workload_analytics_api.py index 0d37689d35d..8f3a295a87b 100644 --- a/desktop/libs/metadata/src/metadata/workload_analytics_api.py +++ b/desktop/libs/metadata/src/metadata/workload_analytics_api.py @@ -15,23 +15,17 @@ # See the License for the specific language governing permissions and # limitations under the License. -import logging -import json import sys +import json +import logging +from django.utils.translation import gettext as _ from django.views.decorators.http import require_POST from desktop.lib.django_util import JsonResponse from desktop.lib.i18n import force_unicode - from metadata.workload_analytics_client import WorkfloadAnalyticsClient -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - - LOG = logging.getLogger() diff --git a/desktop/libs/metadata/src/metadata/workload_analytics_client.py b/desktop/libs/metadata/src/metadata/workload_analytics_client.py index 731381c87b8..032a0eaefe8 100644 --- a/desktop/libs/metadata/src/metadata/workload_analytics_client.py +++ b/desktop/libs/metadata/src/metadata/workload_analytics_client.py @@ -15,17 +15,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -from builtins import object -import logging import sys +import logging +from builtins import object -from notebook.connectors.altus import _exec - -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ +from django.utils.translation import gettext as _ +from notebook.connectors.altus import _exec LOG = logging.getLogger() @@ -51,7 +47,6 @@ def get_mr_task_attempt_log(self, operation_execution_id, attempt_id): return WorkloadAnalytics(self.user).get_mr_task_attempt_log(operation_execution_id=operation_execution_id, attempt_id=attempt_id) - class WorkloadAnalytics(object): def __init__(self, user): pass @@ -61,15 +56,12 @@ def get_impala_query(self, cluster, query_id): return _exec('wa', 'getImpalaQuery', parameters=parameters) - def list_uploads(self): return _exec('wa', 'listUploads') - def list_environments(self): return _exec('wa', 'listEnvironments') - def get_operation_execution_details(self, operation_id, include_tree=False): parameters = {'id': operation_id} @@ -78,7 +70,6 @@ def get_operation_execution_details(self, operation_id, include_tree=False): return _exec('wa', 'getOperationExecutionDetails', parameters=parameters) - def get_mr_task_attempt_log(self, operation_execution_id, attempt_id): parameters = {'operationExecutionId': operation_execution_id, 'attemptId': attempt_id} diff --git a/desktop/libs/notebook/src/notebook/api.py b/desktop/libs/notebook/src/notebook/api.py index 2c4739c8a5e..4f68b2c3b52 100644 --- a/desktop/libs/notebook/src/notebook/api.py +++ b/desktop/libs/notebook/src/notebook/api.py @@ -15,14 +15,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import json import logging +from urllib.parse import unquote as urllib_unquote import sqlparse import opentracing.tracer from django.db.models import Q from django.urls import reverse +from django.utils.translation import gettext as _ from django.views.decorators.http import require_GET, require_POST from azure.abfs.__init__ import abfspath @@ -36,20 +37,9 @@ from metadata.conf import OPTIMIZER from notebook.conf import EXAMPLES from notebook.connectors.base import Notebook, QueryError, QueryExpired, SessionExpired, _get_snippet_name, patch_snippet_for_connector -from notebook.connectors.hiveserver2 import HS2Api from notebook.decorators import api_error_handler, check_document_access_permission, check_document_modify_permission from notebook.models import _get_dialect_example, escape_rows, get_api, make_notebook, upgrade_session_properties -if sys.version_info[0] > 2: - from urllib.parse import unquote as urllib_unquote - - from django.utils.translation import gettext as _ -else: - from urllib import unquote as urllib_unquote - - from django.utils.translation import ugettext as _ - - LOG = logging.getLogger() DEFAULT_HISTORY_NAME = '' diff --git a/desktop/libs/notebook/src/notebook/api_tests.py b/desktop/libs/notebook/src/notebook/api_tests.py index 839e24e36c0..84da9a15147 100644 --- a/desktop/libs/notebook/src/notebook/api_tests.py +++ b/desktop/libs/notebook/src/notebook/api_tests.py @@ -815,6 +815,7 @@ def test_get_interpreters_to_show(): resets.append(INTERPRETERS_SHOWN_ON_WHEEL.set_for_testing('java,pig')) + # 'get_interpreters_to_show did not return interpreters in the correct order expected' assert ( list(expected_interpreters.values()) == get_ordered_interpreters() ), 'get_interpreters_to_show did not return interpreters in the correct order expected' diff --git a/desktop/libs/notebook/src/notebook/conf.py b/desktop/libs/notebook/src/notebook/conf.py index 38b64516903..11f9fc72d6b 100644 --- a/desktop/libs/notebook/src/notebook/conf.py +++ b/desktop/libs/notebook/src/notebook/conf.py @@ -158,6 +158,7 @@ def computes_for_dialect(dialect, user): # cf. admin wizard too + INTERPRETERS = UnspecifiedConfigSection( "interpreters", help="One entry for each type of snippet.", diff --git a/desktop/libs/notebook/src/notebook/conf_tests.py b/desktop/libs/notebook/src/notebook/conf_tests.py index 77ebb655ab5..a28630c5c2e 100644 --- a/desktop/libs/notebook/src/notebook/conf_tests.py +++ b/desktop/libs/notebook/src/notebook/conf_tests.py @@ -15,25 +15,20 @@ # See the License for the specific language governing permissions and # limitations under the License. +import sys import json -import pytest import unittest -import sys +from unittest.mock import Mock, patch +import pytest from django.test import TestCase + from desktop.auth.backend import rewrite_user from desktop.conf import ENABLE_CONNECTORS from desktop.lib.connectors.api import _get_installed_connectors from desktop.lib.django_test_util import make_logged_in_client -from useradmin.models import User, update_app_permissions, get_default_user_group - -from notebook.conf import config_validator, get_ordered_interpreters, _excute_test_query - - -if sys.version_info[0] > 2: - from unittest.mock import patch, Mock -else: - from mock import patch, Mock +from notebook.conf import _excute_test_query, config_validator, get_ordered_interpreters +from useradmin.models import User, get_default_user_group, update_app_permissions class TestInterpreterConfig(TestCase): @@ -59,7 +54,6 @@ def teardown_class(cls): for reset in cls._class_resets: reset() - def test_get_ordered_interpreters(self): with patch('desktop.lib.connectors.api._get_installed_connectors') as _get_installed_connectors: _get_installed_connectors.return_value = [{ @@ -128,7 +122,6 @@ def test_config_validator(self, has_connectors): assert not warnings, warnings - _excute_test_query.side_effect = Exception('') connectors = _get_installed_connectors(user=self.user) diff --git a/desktop/libs/notebook/src/notebook/connectors/altus.py b/desktop/libs/notebook/src/notebook/connectors/altus.py index 4f25a59f3f5..22183c94121 100644 --- a/desktop/libs/notebook/src/notebook/connectors/altus.py +++ b/desktop/libs/notebook/src/notebook/connectors/altus.py @@ -15,25 +15,20 @@ # See the License for the specific language governing permissions and # limitations under the License. -from builtins import object -import logging -import json import sys - +import json +import logging +from builtins import object from datetime import datetime, timedelta from django.urls import reverse +from django.utils.translation import gettext as _ from desktop.lib.exceptions_renderable import PopupException from desktop.lib.rest.http_client import HttpClient from desktop.lib.rest.resource import Resource from metadata.conf import ALTUS, K8S -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - LOG = logging.getLogger() DATE_FORMAT = "%Y-%m-%d" @@ -72,7 +67,8 @@ def _exec(service, command, parameters=None): raise PopupException(e, title=_('Error accessing')) -class IAMApi(object): pass +class IAMApi(object): + pass # altus iam list-user-assigned-roles --user=crn:altus:ia @@ -142,9 +138,9 @@ def submit_hive_job(self, cluster_name, script, params=None, job_xml=None): job = {'script': script} if params: - job['params'] = params + job['params'] = params if job_xml: - job['jobXml'] = job_xml + job['jobXml'] = job_xml return self.submit_jobs(cluster_name, [{'hiveJob': job}]) @@ -152,7 +148,7 @@ def submit_spark_job(self, cluster_name, jars=None, main_class=None, arguments=N job = { "jars": jars if jars else [], "applicationArguments": arguments if arguments else [], - #"propertiesFile": "string" + # "propertiesFile": "string" } if spark_arguments: job['sparkArguments'] = ' '.join(spark_arguments) @@ -186,14 +182,14 @@ def create_cluster(self, cloud_provider, cluster_name, cdh_version, public_key, # [--cloudera-manager-username ] # [--cloudera-manager-password ] - params = { # cloud_provider: AWS, Azure... + params = { # cloud_provider: AWS, Azure... 'clusterName': cluster_name, 'cdhVersion': cdh_version, 'publicKey': public_key, 'instanceType': instance_type, 'environmentName': environment_name, 'workersGroupSize': workers_group_size, - #'automaticTerminationCondition': "EMPTY_JOB_QUEUE" + # 'automaticTerminationCondition': "EMPTY_JOB_QUEUE" } if namespace_name: @@ -294,7 +290,6 @@ def __init__(self, user=None): self._client.set_verify(False) self._root = Resource(self._client) - def list_k8_clusters(self): clusters = self._root.post('listClusters', contenttype="application/json") for cluster in clusters['clusters']: @@ -305,7 +300,6 @@ def list_k8_clusters(self): cluster['creationDate'] = str(datetime.now()) return clusters - def create_cluster(self, cloud_provider, cluster_name, cdh_version, public_key, instance_type, environment_name, workers_group_size=3, namespace_name=None, cloudera_manager_username='hue', cloudera_manager_password='hue'): data = { @@ -319,25 +313,22 @@ def create_cluster(self, cloud_provider, cluster_name, cdh_version, public_key, return self._root.post('createCluster', data=json.dumps(data), contenttype="application/json") - def list_clusters(self): clusters = self._root.post('listClusters', contenttype="application/json") for cluster in clusters['clusters']: cluster['clusterName'] = cluster['name'] cluster['workersGroupSize'] = cluster['workerReplicas'] - cluster['instanceType'] = 'Data Warehouse'# '%(workerCpuCores)s CPU %(workerMemoryInGib)s Memory' % cluster + cluster['instanceType'] = 'Data Warehouse' # '%(workerCpuCores)s CPU %(workerMemoryInGib)s Memory' % cluster cluster['progress'] = '%(workerReplicasOnline)s / %(workerReplicas)s' % cluster cluster['creationDate'] = str(datetime.now()) return clusters - def delete_cluster(self, cluster_id): data = json.dumps({'clusterName': cluster_id}) return { 'result': self._root.post('deleteCluster', data=data, contenttype="application/json") } - def describe_cluster(self, cluster_id): data = json.dumps({'clusterName': cluster_id}) data = self._root.post('describeCluster', data=data, contenttype="application/json") @@ -345,6 +336,5 @@ def describe_cluster(self, cluster_id): data['cluster']['cdhVersion'] = 'Data Warehouse' return data - def update_cluster(self, **params): return self._root.post('updateCluster', data=json.dumps(params), contenttype="application/json") diff --git a/desktop/libs/notebook/src/notebook/connectors/altus_adb.py b/desktop/libs/notebook/src/notebook/connectors/altus_adb.py index 0926102b598..f0621ff85a7 100644 --- a/desktop/libs/notebook/src/notebook/connectors/altus_adb.py +++ b/desktop/libs/notebook/src/notebook/connectors/altus_adb.py @@ -15,26 +15,17 @@ # See the License for the specific language governing permissions and # limitations under the License. -from future import standard_library -standard_library.install_aliases() -from builtins import object -import logging import json -import sys +import logging +import urllib.error +import urllib.request +from urllib.parse import quote as urllib_quote, quote_plus as urllib_quote_plus -from django.urls import reverse +from django.utils.translation import gettext as _ from notebook.connectors.altus import AnalyticDbApi from notebook.connectors.base import Api, QueryError -if sys.version_info[0] > 2: - import urllib.request, urllib.error - from urllib.parse import quote as urllib_quote, quote_plus as urllib_quote_plus - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - from urllib import quote as urllib_quote, quote_plus as urllib_quote_plus - LOG = logging.getLogger() @@ -47,41 +38,33 @@ def __init__(self, user, cluster_name, interpreter=None, request=None): Api.__init__(self, user, interpreter=interpreter, request=request) self.cluster_name = cluster_name - def execute(self, notebook, snippet): statement = snippet['statement'] return HueQuery(self.user, cluster_crn=self.cluster_name).do_execute(statement) - def check_status(self, notebook, snippet): handle = snippet['result']['handle'] return HueQuery(self.user, cluster_crn=self.cluster_name).do_check_status(handle) - def fetch_result(self, notebook, snippet, rows, start_over): handle = snippet['result']['handle'] return HueQuery(self.user, cluster_crn=self.cluster_name).do_fetch_result(handle) - def close_statement(self, notebook, snippet): return {'status': -1} - def cancel(self, notebook, snippet): return {'status': -1, 'message': _('Could not cancel.')} - def get_log(self, notebook, snippet, startFrom=0, size=None): return '...' - def get_jobs(self, notebook, snippet, logs): return [] - def autocomplete(self, snippet, database=None, table=None, column=None, nested=None, operation=None): url_path = '/notebook/api/autocomplete' @@ -104,7 +87,7 @@ def __init__(self, user, cluster_crn): self.api = AnalyticDbApi(self.user) def do_post(self, url_path): - payload = '''{"method":"POST","url":"https://localhost:8888''' + url_path +'''","httpVersion":"HTTP/1.1","headers":[{"name":"Accept-Encoding","value":"gzip, deflate, br"},{"name":"Content-Type","value":"application/x-www-form-urlencoded; charset=UTF-8"},{"name":"Accept","value":"*/*"},{"name":"X-Requested-With","value":"XMLHttpRequest"},{"name":"Connection","value":"keep-alive"}],"queryString":[],"postData": { + payload = '''{"method":"POST","url":"https://localhost:8888''' + url_path + '''","httpVersion":"HTTP/1.1","headers":[{"name":"Accept-Encoding","value":"gzip, deflate, br"},{"name":"Content-Type","value":"application/x-www-form-urlencoded; charset=UTF-8"},{"name":"Accept","value":"*/*"},{"name":"X-Requested-With","value":"XMLHttpRequest"},{"name":"Connection","value":"keep-alive"}],"queryString":[],"postData": { "mimeType": "application/x-www-form-urlencoded; charset=UTF-8", "text": "snippet=%7B%22type%22%3A%22impala%22%2C%22source%22%3A%22data%22%7D", "params": [ @@ -178,10 +161,9 @@ def do_execute(self, query): else: raise QueryError(resp.get('message')) - def do_check_status(self, handle): - notebook = {"type":"impala", "name": "query", "isSaved": False, "sessions": [], "snippets": [{"id": "1234", "type":"impala","statement_raw": "SHOW DATABASES", "result": {"handle": {} }}]} - snippet = {"id": "1234", "type": "impala", "statement":"SHOW DATABASES", "status": "running", "result": {'handle': {"log_context":None,"statements_count":1,"end":{"column":13,"row":0},"statement_id":0,"has_more_statements":False,"start":{"column":0,"row":0},"secret":"3h9WBnLbTUYAAAAAPQjxlQ==\n","has_result_set":True,"session_guid":"qcrpEBmCTGacxfhM+CxbkQ==\n","statement":"SHOW DATABASES","operation_type":0,"modified_row_count":None,"guid":"3h9WBnLbTUYAAAAAPQjxlQ==\n","previous_statement_hash":"5b1f14102d749be7b41da376bcdbb64f993ce00bc46e3aab0b8008c4"}}, "properties": {}} + notebook = {"type": "impala", "name": "query", "isSaved": False, "sessions": [], "snippets": [{"id": "1234", "type": "impala", "statement_raw": "SHOW DATABASES", "result": {"handle": {}}}]} + snippet = {"id": "1234", "type": "impala", "statement": "SHOW DATABASES", "status": "running", "result": {'handle': {"log_context": None, "statements_count": 1, "end": {"column": 13, "row": 0}, "statement_id": 0, "has_more_statements": False, "start": {"column": 0, "row": 0}, "secret": "3h9WBnLbTUYAAAAAPQjxlQ==\n", "has_result_set": True, "session_guid": "qcrpEBmCTGacxfhM+CxbkQ==\n", "statement": "SHOW DATABASES", "operation_type": 0, "modified_row_count": None, "guid": "3h9WBnLbTUYAAAAAPQjxlQ==\n", "previous_statement_hash": "5b1f14102d749be7b41da376bcdbb64f993ce00bc46e3aab0b8008c4"}}, "properties": {}} snippet['result']['handle'] = handle @@ -242,10 +224,9 @@ def do_check_status(self, handle): else: return resp_payload - def do_fetch_result(self, handle): - notebook = {"type":"impala", "name": "query", "isSaved": False, "sessions": [], "snippets": [{"id": "1234", "type":"impala","statement_raw": "SHOW DATABASES", "result": {"handle": {} }}]} - snippet = {"id": "1234", "type": "impala", "statement":"SHOW DATABASES", "status": "running", "result": {'handle': {"log_context":None,"statements_count":1,"end":{"column":13,"row":0},"statement_id":0,"has_more_statements":False,"start":{"column":0,"row":0},"secret":"3h9WBnLbTUYAAAAAPQjxlQ==\n","has_result_set":True,"session_guid":"qcrpEBmCTGacxfhM+CxbkQ==\n","statement":"SHOW DATABASES","operation_type":0,"modified_row_count":None,"guid":"3h9WBnLbTUYAAAAAPQjxlQ==\n","previous_statement_hash":"5b1f14102d749be7b41da376bcdbb64f993ce00bc46e3aab0b8008c4"}}, "properties": {}} + notebook = {"type": "impala", "name": "query", "isSaved": False, "sessions": [], "snippets": [{"id": "1234", "type": "impala", "statement_raw": "SHOW DATABASES", "result": {"handle": {}}}]} + snippet = {"id": "1234", "type": "impala", "statement": "SHOW DATABASES", "status": "running", "result": {'handle': {"log_context": None, "statements_count": 1, "end": {"column": 13, "row": 0}, "statement_id": 0, "has_more_statements": False, "start": {"column": 0, "row": 0}, "secret": "3h9WBnLbTUYAAAAAPQjxlQ==\n", "has_result_set": True, "session_guid": "qcrpEBmCTGacxfhM+CxbkQ==\n", "statement": "SHOW DATABASES", "operation_type": 0, "modified_row_count": None, "guid": "3h9WBnLbTUYAAAAAPQjxlQ==\n", "previous_statement_hash": "5b1f14102d749be7b41da376bcdbb64f993ce00bc46e3aab0b8008c4"}}, "properties": {}} rows = 100 start_over = True diff --git a/desktop/libs/notebook/src/notebook/connectors/base.py b/desktop/libs/notebook/src/notebook/connectors/base.py index 03491cf169e..22c770868c0 100644 --- a/desktop/libs/notebook/src/notebook/connectors/base.py +++ b/desktop/libs/notebook/src/notebook/connectors/base.py @@ -16,7 +16,6 @@ # limitations under the License. import re -import sys import json import time import uuid @@ -24,23 +23,18 @@ from builtins import object from django.utils.encoding import smart_str +from django.utils.translation import gettext as _ from beeswax.common import find_compute, is_compute from desktop.auth.backend import is_admin from desktop.conf import TASK_SERVER, has_connectors, is_cdw_compute_enabled from desktop.lib import export_csvxls from desktop.lib.exceptions_renderable import PopupException -from desktop.lib.i18n import smart_unicode +from desktop.lib.i18n import smart_str from metadata.optimizer.base import get_api as get_optimizer_api from notebook.conf import get_ordered_interpreters from notebook.sql_utils import get_current_statement -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - - LOG = logging.getLogger() @@ -80,7 +74,7 @@ def __init__(self, message, handle=None): self.extra = {} def __unicode__(self): - return smart_unicode(self.message) + return smart_str(self.message) class Notebook(object): diff --git a/desktop/libs/notebook/src/notebook/connectors/base_tests.py b/desktop/libs/notebook/src/notebook/connectors/base_tests.py index 33d9298c90c..0fe73918aa9 100644 --- a/desktop/libs/notebook/src/notebook/connectors/base_tests.py +++ b/desktop/libs/notebook/src/notebook/connectors/base_tests.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -## -*- coding: utf-8 -*- +# -*- coding: utf-8 -*- # Licensed to Cloudera, Inc. under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information @@ -16,22 +16,17 @@ # See the License for the specific language governing permissions and # limitations under the License. -from builtins import object -import json -import pytest import sys +import json +from builtins import object +from unittest.mock import MagicMock, Mock, patch +import pytest from django.urls import reverse from desktop.lib.django_test_util import make_logged_in_client -from useradmin.models import User - from notebook.connectors.base import Notebook, get_api - -if sys.version_info[0] > 2: - from unittest.mock import patch, Mock, MagicMock -else: - from mock import patch, Mock, MagicMock +from useradmin.models import User @pytest.mark.django_db @@ -41,7 +36,6 @@ def setup_method(self): self.client = make_logged_in_client(username="test", groupname="empty", recreate=True, is_superuser=False) self.user = User.objects.get(username="test") - def test_get_api(self): request = Mock() snippet = { @@ -51,7 +45,6 @@ def test_get_api(self): get_api(request=request, snippet=snippet) - def test_execute_and_wait(self): query = Notebook() @@ -72,7 +65,6 @@ def test_execute_and_wait(self): assert 2 == query.check_status.call_count - def test_check_status(self): query = Notebook() @@ -90,7 +82,6 @@ def test_check_status(self): assert 0 == resp['status'] assert 0 == resp['query_status']['status'] - def test_statement_with_variables(self): snippet = { 'statement_raw': "SELECT * FROM table WHERE city='${city}'", @@ -112,6 +103,8 @@ def test_statement_with_variables(self): iteration = 0 + + def check_status_side_effect(request, operation_id): """First time query is still running, second time the execution is finished.""" global iteration diff --git a/desktop/libs/notebook/src/notebook/connectors/dataeng.py b/desktop/libs/notebook/src/notebook/connectors/dataeng.py index 204441262f5..0048ed8e3b1 100644 --- a/desktop/libs/notebook/src/notebook/connectors/dataeng.py +++ b/desktop/libs/notebook/src/notebook/connectors/dataeng.py @@ -15,23 +15,17 @@ # See the License for the specific language governing permissions and # limitations under the License. -import logging import re import sys +import logging from django.urls import reverse +from django.utils.translation import gettext as _ +from jobbrowser.apis.data_eng_api import RUNNING_STATES from metadata.workload_analytics_client import WorkfloadAnalyticsClient - from notebook.connectors.altus import DataEngApi as AltusDataEngApi from notebook.connectors.base import Api, QueryError -from jobbrowser.apis.data_eng_api import RUNNING_STATES - -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - LOG = logging.getLogger() @@ -42,7 +36,6 @@ def __init__(self, user, cluster_name, interpreter=None, request=None): Api.__init__(self, user, interpreter=interpreter, request=request) self.cluster_name = cluster_name - def execute(self, notebook, snippet): if snippet['type'] == 'spark2': @@ -72,7 +65,6 @@ def execute(self, notebook, snippet): 'has_result_set': False, } - def check_status(self, notebook, snippet): response = {'status': 'running'} @@ -90,7 +82,6 @@ def check_status(self, notebook, snippet): return response - def fetch_result(self, notebook, snippet, rows, start_over): return { 'data': [[_('Job successfully completed.')]], @@ -99,7 +90,6 @@ def fetch_result(self, notebook, snippet, rows, start_over): 'has_more': False, } - def cancel(self, notebook, snippet): if snippet['result']['handle'].get('id'): job_id = snippet['result']['handle']['id'] @@ -110,7 +100,6 @@ def cancel(self, notebook, snippet): return response - def get_log(self, notebook, snippet, startFrom=0, size=None): # Currently no way to get the logs properly easily @@ -122,22 +111,19 @@ def get_log(self, notebook, snippet, startFrom=0, size=None): # (.*?)(?=<<< Invocation of Beeline command completed <<<)', logs['stdout'], re.DOTALL)) return '' - def get_jobs(self, notebook, snippet, logs): - ## 50cf0e00-746b-4d86-b8e3-f2722296df71 + # 50cf0e00-746b-4d86-b8e3-f2722296df71 job_id = snippet['result']['handle']['id'] return [{ 'name': job_id, 'url': reverse('jobbrowser:jobbrowser.views.apps') + '#!' + job_id, 'started': True, - 'finished': False # Would need call to check_status + 'finished': False # Would need call to check_status } ] - def close_statement(self, notebook, snippet): pass - def close_session(self, session): pass diff --git a/desktop/libs/notebook/src/notebook/connectors/flink_sql.py b/desktop/libs/notebook/src/notebook/connectors/flink_sql.py index cf14d509343..2eeb72f3217 100644 --- a/desktop/libs/notebook/src/notebook/connectors/flink_sql.py +++ b/desktop/libs/notebook/src/notebook/connectors/flink_sql.py @@ -17,23 +17,18 @@ from __future__ import absolute_import -import logging +import sys import json +import logging import posixpath -import sys + +from django.utils.translation import gettext as _ from desktop.lib.i18n import force_unicode from desktop.lib.rest.http_client import HttpClient, RestException from desktop.lib.rest.resource import Resource - from notebook.connectors.base import Api, QueryError -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - - LOG = logging.getLogger() _JSON_CONTENT_TYPE = 'application/json' _API_VERSION = 'v1' @@ -50,7 +45,7 @@ def decorator(*args, **kwargs): except RestException as e: try: message = force_unicode(json.loads(e.message)['errors']) - except: + except Exception: message = e.message message = force_unicode(message) raise QueryError(message) @@ -60,7 +55,6 @@ def decorator(*args, **kwargs): return decorator - class FlinkSqlApi(Api): def __init__(self, user, interpreter=None): @@ -71,7 +65,6 @@ def __init__(self, user, interpreter=None): self.db = FlinkSqlClient(user=user, api_url=api_url) - @query_error_handler def create_session(self, lang=None, properties=None): session = self._get_session() @@ -105,7 +98,6 @@ def _get_session(self): return SESSIONS[session_key] - @query_error_handler def execute(self, notebook, snippet): global n @@ -146,7 +138,6 @@ def execute(self, notebook, snippet): } } - @query_error_handler def check_status(self, notebook, snippet): global n @@ -182,13 +173,12 @@ def check_status(self, notebook, snippet): return response - @query_error_handler def fetch_result(self, notebook, snippet, rows, start_over): global n session = self._get_session() statement_id = snippet['result']['handle']['guid'] - token = n #rows + token = n # rows resp = self.db.fetch_results(session['id'], job_id=statement_id, token=token) @@ -209,7 +199,6 @@ def fetch_result(self, notebook, snippet, rows, start_over): 'type': 'table' } - @query_error_handler def autocomplete(self, snippet, database=None, table=None, column=None, nested=None, operation=None): response = {} @@ -231,7 +220,6 @@ def autocomplete(self, snippet, database=None, table=None, column=None, nested=N return response - @query_error_handler def get_sample_data(self, snippet, database=None, table=None, column=None, is_async=False, operation=None): if operation == 'hello': @@ -250,7 +238,6 @@ def get_sample_data(self, snippet, database=None, table=None, column=None, is_as return response - def cancel(self, notebook, snippet): session = self._get_session() statement_id = snippet['result']['handle']['guid'] @@ -259,7 +246,7 @@ def cancel(self, notebook, snippet): if session and statement_id: self.db.close_statement(session_id=session['id'], job_id=statement_id) else: - return {'status': -1} # missing operation ids + return {'status': -1} # missing operation ids except Exception as e: if 'does not exist in current session:' in str(e): return {'status': -1} # skipped @@ -268,14 +255,12 @@ def cancel(self, notebook, snippet): return {'status': 0} - def close_session(self, session): # Avoid closing session on page refresh or editor close for now pass # session = self._get_session() # self.db.close_session(session['id']) - def _show_databases(self): session = self._get_session() session_id = session['id'] @@ -284,7 +269,6 @@ def _show_databases(self): return [db[0] for db in resp['results'][0]['data']] - def _show_tables(self, database): session = self._get_session() session_id = session['id'] @@ -294,7 +278,6 @@ def _show_tables(self, database): return [table[0] for table in resp['results'][0]['data']] - def _get_columns(self, database, table): session = self._get_session() session_id = session['id'] diff --git a/desktop/libs/notebook/src/notebook/connectors/hbase.py b/desktop/libs/notebook/src/notebook/connectors/hbase.py index e2d1b28de8d..a1b0aab8ec8 100644 --- a/desktop/libs/notebook/src/notebook/connectors/hbase.py +++ b/desktop/libs/notebook/src/notebook/connectors/hbase.py @@ -17,22 +17,16 @@ from __future__ import absolute_import -import logging import sys +import logging from django.urls import reverse +from django.utils.translation import gettext as _ from desktop.lib.exceptions_renderable import PopupException from desktop.lib.i18n import force_unicode - from notebook.connectors.base import Api, QueryError -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - - LOG = logging.getLogger() diff --git a/desktop/libs/notebook/src/notebook/connectors/hive_metastore.py b/desktop/libs/notebook/src/notebook/connectors/hive_metastore.py index 373211d94dd..efe62313637 100644 --- a/desktop/libs/notebook/src/notebook/connectors/hive_metastore.py +++ b/desktop/libs/notebook/src/notebook/connectors/hive_metastore.py @@ -15,23 +15,17 @@ # See the License for the specific language governing permissions and # limitations under the License. -import logging import sys +import logging from django.urls import reverse +from django.utils.translation import gettext as _ from desktop.lib.exceptions import StructuredException from desktop.lib.exceptions_renderable import PopupException from desktop.lib.i18n import force_unicode, smart_str from desktop.lib.rest.http_client import RestException - -from notebook.connectors.base import Api, QueryError, QueryExpired, OperationTimeout, OperationNotSupported - -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - +from notebook.connectors.base import Api, OperationNotSupported, OperationTimeout, QueryError, QueryExpired LOG = logging.getLogger() @@ -39,7 +33,7 @@ try: from beeswax.api import _autocomplete from beeswax.server import dbms - from beeswax.server.dbms import get_query_server_config, QueryServerException + from beeswax.server.dbms import QueryServerException, get_query_server_config except ImportError as e: LOG.warning('Hive and HiveMetastoreServer interfaces are not enabled: %s' % e) hive_settings = None @@ -72,11 +66,9 @@ def autocomplete(self, snippet, database=None, table=None, column=None, nested=N return _autocomplete(db, database, table, column, nested, query=None, cluster=self.cluster) - @query_error_handler def get_sample_data(self, snippet, database=None, table=None, column=None, is_async=False, operation=None): return [] - def _get_db(self, snippet, is_async=False, cluster=None): return dbms.get(self.user, query_server=get_query_server_config(name='hms', cluster=cluster)) diff --git a/desktop/libs/notebook/src/notebook/connectors/hiveserver2.py b/desktop/libs/notebook/src/notebook/connectors/hiveserver2.py index 4a8eda35834..21a24d6b225 100644 --- a/desktop/libs/notebook/src/notebook/connectors/hiveserver2.py +++ b/desktop/libs/notebook/src/notebook/connectors/hiveserver2.py @@ -340,9 +340,9 @@ def execute(self, notebook, snippet): # All good server_id, server_guid = handle.get() - if sys.version_info[0] > 2: - server_id = server_id.decode('utf-8') - server_guid = server_guid.decode('utf-8') + + server_id = server_id.decode('utf-8') + server_guid = server_guid.decode('utf-8') response = { 'secret': server_id, diff --git a/desktop/libs/notebook/src/notebook/connectors/hiveserver2_tests.py b/desktop/libs/notebook/src/notebook/connectors/hiveserver2_tests.py index 94ac44bab5b..5b5fda81480 100644 --- a/desktop/libs/notebook/src/notebook/connectors/hiveserver2_tests.py +++ b/desktop/libs/notebook/src/notebook/connectors/hiveserver2_tests.py @@ -16,38 +16,32 @@ # See the License for the specific language governing permissions and # limitations under the License. -from builtins import next, object -import json -import logging -import pytest import re import sys +import json import time +import logging +from builtins import next, object +from unittest.mock import Mock, patch +import pytest from django.urls import reverse -from TCLIService.ttypes import TStatusCode, TProtocolVersion, TOperationType +from TCLIService.ttypes import TOperationType, TProtocolVersion, TStatusCode +from beeswax.server import dbms +from beeswax.server.dbms import QueryServerException +from beeswax.test_base import BeeswaxSampleProvider, get_query_server_config, is_hive_on_spark from desktop.auth.backend import rewrite_user from desktop.conf import has_connectors -from desktop.lib.i18n import smart_str from desktop.lib.django_test_util import make_logged_in_client +from desktop.lib.i18n import smart_str from desktop.lib.test_utils import add_to_group, grant_access -from beeswax.server import dbms -from beeswax.server.dbms import QueryServerException -from beeswax.test_base import BeeswaxSampleProvider, get_query_server_config, is_hive_on_spark from hadoop.pseudo_hdfs4 import is_live_cluster -from useradmin.models import User - from notebook.api import _save_notebook from notebook.connectors.base import QueryError, QueryExpired from notebook.connectors.hiveserver2 import HS2Api -from notebook.models import make_notebook, Notebook - -if sys.version_info[0] > 2: - from unittest.mock import patch, Mock -else: - from mock import patch, Mock - +from notebook.models import Notebook, make_notebook +from useradmin.models import User LOG = logging.getLogger() @@ -108,7 +102,6 @@ def setup_method(self): self.user = rewrite_user(User.objects.get(username="test")) grant_access("test", "default", "notebook") - def test_execute_impala(self): with patch('desktop.lib.connectors.api.CONNECTOR_INSTANCES', TestApi.CONNECTOR): @@ -155,7 +148,6 @@ def test_execute_impala(self): get_client.return_value = tclient tclient.get_coordinator_host = Mock(return_value={}) - response = self.client.post(reverse('notebook:execute'), { 'notebook': TestApi.NOTEBOOK_JSON, 'snippet': json.dumps(json.loads(TestApi.NOTEBOOK_JSON)['snippets'][0]), @@ -167,7 +159,6 @@ def test_execute_impala(self): data = json.loads(response.content) assert data['status'] == 0 - def test_autocomplete_database_impala(self): with patch('desktop.lib.connectors.api.CONNECTOR_INSTANCES', TestApi.CONNECTOR): @@ -190,7 +181,6 @@ def test_autocomplete_database_impala(self): assert data['status'] == 0 assert data['databases'] == [{u'comment': u'', u'hdfs_link': u'hdfs://table'}] - def test_sample_data_table_sync_impala(self): with patch('desktop.lib.connectors.api.CONNECTOR_INSTANCES', TestApi.CONNECTOR): @@ -224,7 +214,6 @@ def test_sample_data_table_sync_impala(self): assert data['full_headers'] == [{'name': 'name'}] assert data['rows'] == [[1], [2]] - def test_sample_data_table_async_impala(self): with patch('desktop.lib.connectors.api.CONNECTOR_INSTANCES', TestApi.CONNECTOR): @@ -278,7 +267,6 @@ def setup_method(self): self.client = make_logged_in_client(username="test", groupname="default", recreate=True, is_superuser=False) self.user = rewrite_user(User.objects.get(username="test")) - @patch('notebook.connectors.hiveserver2.has_jobbrowser', True) def test_get_jobs_with_jobbrowser(self): notebook = Mock() @@ -297,7 +285,6 @@ def test_get_jobs_with_jobbrowser(self): assert jobs[0]['name'] == 'job_id_00001' assert jobs[0]['url'] == '/jobbrowser/jobs/job_id_00001' - @patch('notebook.connectors.hiveserver2.has_jobbrowser', False) def test_get_jobs_without_jobbrowser(self): notebook = Mock() @@ -316,7 +303,6 @@ def test_get_jobs_without_jobbrowser(self): assert jobs[0]['name'] == 'job_id_00001' assert jobs[0]['url'] == '' # Is empty - def test_close_statement(self): with patch('notebook.connectors.hiveserver2.HS2Api._get_db') as _get_db: _get_db.return_value = Mock( @@ -371,11 +357,10 @@ def test_close_statement(self): 'interface': 'impala', 'type': 'direct', 'options': {}}, 'wasBatchExecuted': False, 'dialect': 'impala' } api = HS2Api(self.user) - + response = api.close_statement(notebook, snippet) assert response['status'] == -1 # snippet['result']['handel'] ['guid'] and ['secret'] are missing - def test_get_error_message_from_query(self): with patch('notebook.connectors.hiveserver2.HS2Api._get_db') as _get_db: with patch('notebook.connectors.hiveserver2.HS2Api._get_current_statement') as _get_current_statement: @@ -408,7 +393,6 @@ def test_get_error_message_from_query(self): e.message == 'Error while compiling statement: FAILED: HiveAccessControlException Permission denied') - def test_autocomplete_time_out(self): snippet = {'type': 'hive', 'properties': {}} @@ -427,7 +411,6 @@ def test_autocomplete_time_out(self): except QueryExpired as e: assert e.message == "HTTPSConnectionPool(host='gethue.com', port=10001): Read timed out. (read timeout=120)" - def test_autocomplete_functions_hive(self): snippet = {'type': 'hive', 'properties': {}} @@ -460,7 +443,6 @@ def setup_method(self): self.db = dbms.get(self.user, get_query_server_config()) self.api = HS2Api(self.user) - def test_prepare_hql_query(self): statement = "SELECT myUpper(description) FROM sample_07 LIMIT 10" snippet_json = """ @@ -564,7 +546,6 @@ def test_prepare_hql_query(self): assert pattern.search(config_statements), config_statements assert "CREATE TEMPORARY FUNCTION myUpper AS 'org.hue.udf.MyUpper'" in config_statements, config_statements - def test_upgrade_properties(self): properties = None # Verify that upgrade will return defaults if current properties not formatted as settings @@ -628,7 +609,6 @@ def test_upgrade_properties(self): upgraded_props = self.api.upgrade_properties(lang='hive', properties=properties) assert upgraded_props == properties - def test_progress(self): snippet = json.loads(""" { @@ -684,7 +664,7 @@ def test_progress(self): INFO : number of splits:1 INFO : Submitting tokens for job: job_1466104358744_0003 INFO : The url to track the job: http://jennykim-1.vpc.cloudera.com:8088/proxy/application_1466104358744_0003/ - """ + """ # noqa: E501 assert self.api.progress({}, snippet, logs=logs) == 5 @@ -741,7 +721,6 @@ def test_progress(self): assert self.api.progress({}, snippet, logs=logs) == 50 - def test_get_jobs(self): notebook = json.loads(""" @@ -812,14 +791,14 @@ def test_get_jobs(self): INFO : The url to track the job: http://jennykim-1.vpc.cloudera.com:8088/proxy/application_1466630204796_0059/ INFO : Starting Job = job_1466630204796_0059, Tracking URL = http://jennykim-1.vpc.cloudera.com:8088/proxy/application_1466630204796_0059/ INFO : Kill Command = /usr/lib/hadoop/bin/hadoop job -kill job_1466630204796_0059 - """ + """ # noqa: E501 jobs = self.api.get_jobs(notebook, snippet, logs) assert isinstance(jobs, list) assert len(jobs), 1 assert jobs[0]['name'] == 'job_1466630204796_0059' - assert jobs[0]['started'] == True - assert jobs[0]['finished'] == False + assert jobs[0]['started'] is True + assert jobs[0]['finished'] is False assert 'url' in jobs[0] logs += """INFO : Hadoop job information for Stage-1: number of mappers: 1; number of reducers: 1 @@ -831,13 +810,11 @@ def test_get_jobs(self): INFO : Launching Job 2 out of 2 """ - jobs = self.api.get_jobs(notebook, snippet, logs) assert len(jobs), 1 assert jobs[0]['name'] == 'job_1466630204796_0059' - assert jobs[0]['started'] == True - assert jobs[0]['finished'] == True - + assert jobs[0]['started'] is True + assert jobs[0]['finished'] is True def test_get_current_statement(self): snippet = json.loads(""" @@ -869,7 +846,6 @@ def test_get_current_statement(self): assert '086ecec9a8b89b1b47cce358bdbb343be23b1f8b54ca76bc81927e27' == statement['previous_statement_hash'] - def test_plan_extraction_from_profile(self): query_plan = self.api._get_impala_profile_plan( query_id='e147228183f1f0b3:6f086cc600000000', profile=IMPALA_CUSTOMER_QUERY_SAMPLE_PROFILE @@ -893,7 +869,6 @@ def setup_class(cls): super(TestHiveserver2ApiWithHadoop, cls).setup_class(load_data=False) - def setup_method(self): self.client.post('/beeswax/install_examples') @@ -908,7 +883,6 @@ def setup_method(self): self.statement = 'SELECT description, salary FROM sample_07 WHERE (sample_07.salary > 100000) ORDER BY salary DESC LIMIT 1000' - def create_query_document(self, owner, query_type='hive', database='default', name='Test Query', description='Test Query', statement='', files=None, functions=None, settings=None): @@ -933,7 +907,6 @@ def create_query_document(self, owner, query_type='hive', database='default', notebook_doc, save_as = _save_notebook(notebook.get_data(), owner) return notebook_doc - def get_snippet(self, notebook, snippet_idx=0): data = notebook.get_data() snippet = data['snippets'][snippet_idx] @@ -945,7 +918,6 @@ def get_snippet(self, notebook, snippet_idx=0): return snippet - def execute_and_wait(self, query_doc, snippet_idx=0, timeout=30.0, wait=1.0): notebook = Notebook(document=query_doc) snippet = self.get_snippet(notebook, snippet_idx=snippet_idx) @@ -975,7 +947,6 @@ def execute_and_wait(self, query_doc, snippet_idx=0, timeout=30.0, wait=1.0): return snippet - def test_query_with_unicode(self): statement = "SELECT * FROM sample_07 WHERE code='한';" @@ -996,7 +967,6 @@ def test_query_with_unicode(self): assert 0 == data['status'], data assert "SELECT * FROM sample_07 WHERE code='한'" in smart_str(data['logs']) - def test_get_current_statement(self): multi_statement = "SELECT description, salary FROM sample_07 LIMIT 20;\r\nSELECT AVG(salary) FROM sample_07;" @@ -1011,7 +981,7 @@ def test_get_current_statement(self): assert 0 == data['status'], data assert 0 == data['handle']['statement_id'], data assert 2 == data['handle']['statements_count'], data - assert True == data['handle']['has_more_statements'], data + assert True is data['handle']['has_more_statements'], data assert {'row': 0, 'column': 0} == data['handle']['start'], data assert {'row': 0, 'column': 51} == data['handle']['end'], data @@ -1024,11 +994,10 @@ def test_get_current_statement(self): assert 0 == data['status'], data assert 1 == data['handle']['statement_id'], data assert 2 == data['handle']['statements_count'], data - assert False == data['handle']['has_more_statements'], data + assert False is data['handle']['has_more_statements'], data assert {'row': 1, 'column': 0} == data['handle']['start'], data assert {'row': 1, 'column': 33} == data['handle']['end'], data - def test_explain(self): # Hive 2 with Tez set hive.explain.user to true by default, but this test is expecting output when this setting # is set to false. @@ -1046,7 +1015,6 @@ def test_explain(self): assert 'STAGE DEPENDENCIES' in data['explanation'], data assert self.statement == data['statement'], data - def test_download(self): statement = "SELECT 'hello world';" @@ -1060,7 +1028,6 @@ def test_download(self): assert 200 == response.status_code assert ('Content-Disposition', 'attachment; filename="Test Query.csv"') == response._headers['content-disposition'] - def test_get_sample(self): doc = self.create_query_document(owner=self.user, statement=self.statement) notebook = Notebook(document=doc) @@ -1087,7 +1054,6 @@ def test_get_sample(self): assert 'rows' in data assert len(data['rows']) > 0 - def test_fetch_result_size_mr(self): if not is_live_cluster(): # Mini-cluster does not have JHS pytest.skip("Skipping Test") @@ -1113,8 +1079,8 @@ def test_fetch_result_size_mr(self): assert 'result' in data assert 'rows' in data['result'] assert 'size' in data['result'] - assert None == data['result']['rows'] - assert None == data['result']['size'] + assert None is data['result']['rows'] + assert None is data['result']['size'] # Assert that a query with map & reduce task returns rows statement = "SELECT DISTINCT code FROM sample_07;" @@ -1149,7 +1115,6 @@ def test_fetch_result_size_mr(self): assert 23 == data['result']['rows'] assert data['result']['size'] > 0, data['result'] - def test_fetch_result_size_spark(self): if not is_live_cluster() or not is_hive_on_spark(): pytest.skip("Skipping Test") @@ -1177,8 +1142,8 @@ def test_fetch_result_size_spark(self): assert 'result' in data assert 'rows' in data['result'] assert 'size' in data['result'] - assert None == data['result']['rows'] - assert None == data['result']['size'] + assert None is data['result']['rows'] + assert None is data['result']['size'] # Assert that a query that runs a job will return rows and size statement = "SELECT app, COUNT(1) AS count FROM web_logs GROUP BY app ORDER BY count DESC;" @@ -1197,7 +1162,6 @@ def test_fetch_result_size_spark(self): assert 23 == data['result']['rows'] assert data['result']['size'] > 0 - def test_fetch_result_size_impala(self): if not is_live_cluster(): pytest.skip("Skipping Test") @@ -1224,7 +1188,7 @@ def test_fetch_result_size_impala(self): assert 'rows' in data['result'] assert 'size' in data['result'] assert 23 == data['result']['rows'] - assert None == data['result']['size'] + assert None is data['result']['size'] # Assert that selecting all from partitioned table works statement = "SELECT * FROM web_logs;" @@ -1246,7 +1210,6 @@ def test_fetch_result_size_impala(self): finally: self.api.close_session(session) - def test_fetch_result_abbreviated(self): if not is_live_cluster(): pytest.skip("Skipping Test") diff --git a/desktop/libs/notebook/src/notebook/connectors/kafka.py b/desktop/libs/notebook/src/notebook/connectors/kafka.py index 182545af9fe..e80b1584a2a 100644 --- a/desktop/libs/notebook/src/notebook/connectors/kafka.py +++ b/desktop/libs/notebook/src/notebook/connectors/kafka.py @@ -17,20 +17,15 @@ from __future__ import absolute_import -import logging import sys +import logging + +from django.utils.translation import gettext as _ from desktop.lib.i18n import force_unicode from kafka.kafka_api import get_topics - from notebook.connectors.base import Api, QueryError -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - - LOG = logging.getLogger() diff --git a/desktop/libs/notebook/src/notebook/connectors/ksql.py b/desktop/libs/notebook/src/notebook/connectors/ksql.py index ff626435489..63f1b94d0ce 100644 --- a/desktop/libs/notebook/src/notebook/connectors/ksql.py +++ b/desktop/libs/notebook/src/notebook/connectors/ksql.py @@ -18,22 +18,17 @@ from __future__ import absolute_import -import logging -import json import sys +import json +import logging + +from django.utils.translation import gettext as _ -from desktop.lib.i18n import force_unicode from desktop.conf import has_channels +from desktop.lib.i18n import force_unicode from kafka.ksql_client import KSqlApi as KSqlClientApi - from notebook.connectors.base import Api, QueryError -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - - LOG = logging.getLogger() @@ -60,11 +55,9 @@ def __init__(self, user, interpreter=None): self.url = self.options['url'] - def _get_db(self): return KSqlClientApi(user=self.user, url=self.url) - @query_error_handler def execute(self, notebook, snippet): channel_name = notebook.get('editorWsChannel') @@ -94,12 +87,10 @@ def execute(self, notebook, snippet): } } - @query_error_handler def check_status(self, notebook, snippet): return {'status': 'available'} - @query_error_handler def autocomplete(self, snippet, database=None, table=None, column=None, nested=None, operation=None): response = {} diff --git a/desktop/libs/notebook/src/notebook/connectors/oozie_batch.py b/desktop/libs/notebook/src/notebook/connectors/oozie_batch.py index d7d2604bfb6..c5af220e5d9 100644 --- a/desktop/libs/notebook/src/notebook/connectors/oozie_batch.py +++ b/desktop/libs/notebook/src/notebook/connectors/oozie_batch.py @@ -15,25 +15,19 @@ # See the License for the specific language governing permissions and # limitations under the License. -import logging import re import sys import time +import logging -from django.urls import reverse from django.http import QueryDict +from django.urls import reverse +from django.utils.translation import gettext as _ from desktop.lib.exceptions_renderable import PopupException from desktop.models import Document2 - from notebook.connectors.base import Api, QueryError -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - - LOG = logging.getLogger() @@ -64,7 +58,6 @@ def __init__(self, *args, **kwargs): self.fs = self.request.fs self.jt = self.request.jt - def execute(self, notebook, snippet): # Get document from notebook if not notebook.get('uuid', ''): @@ -90,7 +83,6 @@ def execute(self, notebook, snippet): 'has_result_set': True, } - def check_status(self, notebook, snippet): response = {'status': 'running'} @@ -115,7 +107,6 @@ def check_status(self, notebook, snippet): return response - def fetch_result(self, notebook, snippet, rows, start_over): log_output = self.get_log(notebook, snippet) results = self._get_results(log_output, snippet['type']) @@ -127,7 +118,6 @@ def fetch_result(self, notebook, snippet, rows, start_over): 'has_more': False, } - def cancel(self, notebook, snippet): job_id = snippet['result']['handle']['id'] @@ -138,7 +128,6 @@ def cancel(self, notebook, snippet): return {'status': 0} - def get_log(self, notebook, snippet, startFrom=0, size=None): job_id = snippet['result']['handle']['id'] @@ -147,14 +136,12 @@ def get_log(self, notebook, snippet, startFrom=0, size=None): return logs if logs else oozie_job.log - def progress(self, notebook, snippet, logs=None): job_id = snippet['result']['handle']['id'] oozie_job = check_job_access_permission(self.request, job_id) return oozie_job.get_progress() - def get_jobs(self, notebook, snippet, logs): jobs = [] job_id = snippet['result']['handle']['id'] @@ -171,15 +158,12 @@ def get_jobs(self, notebook, snippet, logs): }) return jobs - def close_statement(self, notebook, snippet): pass - def close_session(self, session): pass - def _get_log_output(self, oozie_workflow): log_output = '' q = self.request.GET.copy() @@ -204,7 +188,6 @@ def _get_log_output(self, oozie_workflow): attempts += 1 return log_output - def _get_results(self, log_output, action_type): results = '' diff --git a/desktop/libs/notebook/src/notebook/connectors/rdbms.py b/desktop/libs/notebook/src/notebook/connectors/rdbms.py index c68865d512d..9d052ab907b 100644 --- a/desktop/libs/notebook/src/notebook/connectors/rdbms.py +++ b/desktop/libs/notebook/src/notebook/connectors/rdbms.py @@ -15,19 +15,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -from builtins import next -from builtins import object -import logging import sys - -from desktop.lib.i18n import force_unicode +import logging +from builtins import next, object from beeswax import data_export +from desktop.lib.i18n import force_unicode from librdbms.server import dbms - from notebook.connectors.base import Api, QueryError, QueryExpired, _get_snippet_name - LOG = logging.getLogger() @@ -40,10 +36,7 @@ def decorator(*args, **kwargs): if 'Invalid query handle' in message or 'Invalid OperationHandle' in message: raise QueryExpired(e) else: - if sys.version_info[0] > 2: - raise QueryError(message).with_traceback(sys.exc_info()[2]) - else: - raise QueryError, message, sys.exc_info()[2] + raise QueryError(message).with_traceback(sys.exc_info()[2]) return decorator @@ -80,12 +73,10 @@ def execute(self, notebook, snippet): } } - @query_error_handler def check_status(self, notebook, snippet): return {'status': 'expired'} - @query_error_handler def fetch_result(self, notebook, snippet, rows, start_over): return { @@ -95,27 +86,22 @@ def fetch_result(self, notebook, snippet, rows, start_over): 'type': 'table' } - @query_error_handler def fetch_result_metadata(self): pass - @query_error_handler def cancel(self, notebook, snippet): return {'status': 0} - @query_error_handler def get_log(self, notebook, snippet, startFrom=None, size=None): return 'No logs' - @query_error_handler def close_statement(self, notebook, snippet): return {'status': -1} - @query_error_handler def autocomplete(self, snippet, database=None, table=None, column=None, nested=None, operation=None): query_server = self._get_query_server() @@ -143,7 +129,6 @@ def autocomplete(self, snippet, database=None, table=None, column=None, nested=N response['status'] = 0 return response - @query_error_handler def get_sample_data(self, snippet, database=None, table=None, column=None, is_async=False, operation=None): query_server = self._get_query_server() diff --git a/desktop/libs/notebook/src/notebook/connectors/solr.py b/desktop/libs/notebook/src/notebook/connectors/solr.py index 53f350e05bd..fb5b14173ff 100644 --- a/desktop/libs/notebook/src/notebook/connectors/solr.py +++ b/desktop/libs/notebook/src/notebook/connectors/solr.py @@ -15,23 +15,18 @@ # See the License for the specific language governing permissions and # limitations under the License. -from builtins import object -import logging import sys +import logging +from builtins import object + +from django.utils.translation import gettext as _ from desktop.lib.exceptions_renderable import PopupException from desktop.lib.i18n import force_unicode from indexer.solr_client import SolrClient - from notebook.connectors.base import Api, QueryError from notebook.models import escape_rows -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - - LOG = logging.getLogger() @@ -71,7 +66,7 @@ def execute(self, notebook, snippet): response = api.sql(collection, snippet['statement']) - info = response['result-set']['docs'].pop(-1) # EOF, RESPONSE_TIME, EXCEPTION + info = response['result-set']['docs'].pop(-1) # EOF, RESPONSE_TIME, EXCEPTION if info.get('EXCEPTION'): raise QueryError(info['EXCEPTION']) @@ -103,12 +98,10 @@ def execute(self, notebook, snippet): 'statements_count': 1 } - @query_error_handler def check_status(self, notebook, snippet): return {'status': 'available'} - @query_error_handler def fetch_result(self, notebook, snippet, rows, start_over): return { @@ -118,17 +111,14 @@ def fetch_result(self, notebook, snippet, rows, start_over): 'type': 'table' } - @query_error_handler def fetch_result_metadata(self): pass - @query_error_handler def cancel(self, notebook, snippet): return {'status': 0} - @query_error_handler def get_log(self, notebook, snippet, startFrom=None, size=None): return 'No logs' @@ -137,7 +127,6 @@ def get_log(self, notebook, snippet, startFrom=None, size=None): def close_statement(self, notebook, snippet): return {'status': -1} - @query_error_handler def autocomplete(self, snippet, database=None, table=None, column=None, nested=None, operation=None): from search.conf import SOLR_URL @@ -157,7 +146,6 @@ def autocomplete(self, snippet, database=None, table=None, column=None, nested=N response['status'] = 0 return response - @query_error_handler def get_sample_data(self, snippet, database=None, table=None, column=None, is_async=False, operation=None): from search.conf import SOLR_URL @@ -203,7 +191,13 @@ def get_tables(self, database, table_names=[]): ] def get_columns(self, database, table): - return [{'name': field['name'], 'type': field['type'], 'comment': '', 'primary_key': field.get('primary_key')} for field in self.db.schema_fields(table)['fields']] + return [{ + 'name': field['name'], + 'type': field['type'], + 'comment': '', + 'primary_key': field.get('primary_key') + } for field in self.db.schema_fields(table)['fields'] + ] def get_sample_data(self, database, table, column=None): # Note: currently ignores dynamic fields diff --git a/desktop/libs/notebook/src/notebook/connectors/spark_shell.py b/desktop/libs/notebook/src/notebook/connectors/spark_shell.py index 89d9993ee68..292c47f3edd 100644 --- a/desktop/libs/notebook/src/notebook/connectors/spark_shell.py +++ b/desktop/libs/notebook/src/notebook/connectors/spark_shell.py @@ -15,31 +15,25 @@ # See the License for the specific language governing permissions and # limitations under the License. -from builtins import range, object -import logging import re import sys +import json import time +import logging import textwrap -import json +from builtins import object, range -from beeswax.server.dbms import Table +from django.utils.translation import gettext as _ +from beeswax.server.dbms import Table +from desktop.auth.backend import rewrite_user from desktop.conf import USE_DEFAULT_CONFIGURATION from desktop.lib.exceptions_renderable import PopupException from desktop.lib.i18n import force_unicode from desktop.lib.rest.http_client import RestException from desktop.models import DefaultConfiguration -from desktop.auth.backend import rewrite_user - -from notebook.data_export import download as spark_download from notebook.connectors.base import Api, QueryError, SessionExpired, _get_snippet_session - -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - +from notebook.data_export import download as spark_download LOG = logging.getLogger() @@ -52,21 +46,19 @@ SESSION_KEY = '%(username)s-%(interpreter_name)s' + class SparkApi(Api): - SPARK_UI_RE = re.compile("Started SparkUI at (http[s]?://([0-9a-zA-Z-_\.]+):(\d+))") + SPARK_UI_RE = re.compile(r"Started SparkUI at (http[s]?://([0-9a-zA-Z-_\.]+):(\d+))") YARN_JOB_RE = re.compile("tracking URL: (http[s]?://.+/)") - STANDALONE_JOB_RE = re.compile("Got job (\d+)") - + STANDALONE_JOB_RE = re.compile(r"Got job (\d+)") def __init__(self, user, interpreter): super(SparkApi, self).__init__(user=user, interpreter=interpreter) - def get_api(self): return get_spark_api(self.user, self.interpreter) - @staticmethod def get_livy_props(lang, properties=None): props = dict([(p['name'], p['value']) for p in SparkConfiguration.PROPERTIES]) @@ -100,7 +92,6 @@ def get_livy_props(lang, properties=None): return props - @staticmethod def to_properties(props=None): properties = list() @@ -114,14 +105,12 @@ def to_properties(props=None): return properties - def _get_session_key(self): return SESSION_KEY % { 'username': self.user.username if hasattr(self.user, 'username') else self.user, 'interpreter_name': self.interpreter['name'] } - def _check_session(self, session): ''' Check if the session is actually present and its state is healthy. @@ -135,7 +124,6 @@ def _check_session(self, session): if session_present and session_present['state'] not in ('dead', 'shutting_down', 'error', 'killed'): return session_present - def create_session(self, lang='scala', properties=None): api = self.get_api() stored_session_info = self._get_session_info_from_user() @@ -174,7 +162,6 @@ def create_session(self, lang='scala', properties=None): self._set_session_info_to_user(new_session_info) return new_session_info - def execute(self, notebook, snippet): api = self.get_api() @@ -183,7 +170,6 @@ def execute(self, notebook, snippet): response = self._execute(api, session, snippet.get('type'), snippet['statement']) return response - def _execute(self, api, session, snippet_type, statement): if not session or not self._check_session(session): stored_session_info = self._get_session_info_from_user() @@ -201,12 +187,11 @@ def _execute(self, api, session, snippet_type, statement): } except Exception as e: message = force_unicode(str(e)).lower() - if re.search("session ('\d+' )?not found", message) or 'connection refused' in message or 'session is in state busy' in message: + if re.search(r"session ('\d+' )?not found", message) or 'connection refused' in message or 'session is in state busy' in message: raise SessionExpired(e) else: raise e - def check_status(self, notebook, snippet): api = self.get_api() session = _get_snippet_session(notebook, snippet) @@ -221,12 +206,11 @@ def check_status(self, notebook, snippet): } except Exception as e: message = force_unicode(str(e)).lower() - if re.search("session ('\d+' )?not found", message): + if re.search(r"session ('\d+' )?not found", message): raise SessionExpired(e) else: raise e - def fetch_result(self, notebook, snippet, rows, start_over=False): api = self.get_api() session = _get_snippet_session(notebook, snippet) @@ -243,13 +227,12 @@ def fetch_result(self, notebook, snippet, rows, start_over=False): return response - def _fetch_result(self, api, session, cell): try: response = api.fetch_data(session['id'], cell) except Exception as e: message = force_unicode(str(e)) - if re.search("session ('\d+' )?not found", message): + if re.search(r"session ('\d+' )?not found", message): raise SessionExpired(e) else: raise PopupException(_(message)) @@ -301,13 +284,12 @@ def _fetch_result(self, api, session, cell): raise QueryError(msg) - def _handle_result_data(self, result, is_complex_type=False): """ Parse the data from the 'result' dict based on whether it has complex datatypes or not. - If the 'is_complex_type' flag is True, it parses the result dict, checking for 'schema' and 'values' - and if found, formatting them into a appropriate result data dictionary representing that result column. + If the 'is_complex_type' flag is True, it parses the result dict, checking for 'schema' and 'values' + and if found, formatting them into a appropriate result data dictionary representing that result column. If the flag is False, it simply returns the 'data' as is. Args: @@ -337,9 +319,8 @@ def _handle_result_data(self, result, is_complex_type=False): else: # If the query result is not having complex datatype, return the 'data' as it is. data = result['data'] - - return data + return data def _handle_result_meta(self, result): meta = [] @@ -357,9 +338,8 @@ def _handle_result_meta(self, result): meta.append({'name': f['name'], 'type': complex_type, 'comment': ''}) else: meta.append({'name': f['name'], 'type': f['type'], 'comment': ''}) - - return meta, is_complex_type + return meta, is_complex_type def cancel(self, notebook, snippet): api = self.get_api() @@ -375,7 +355,6 @@ def cancel(self, notebook, snippet): return {'status': 0} - def get_log(self, notebook, snippet, startFrom=0, size=None): response = {'status': 0} api = self.get_api() @@ -389,7 +368,6 @@ def get_log(self, notebook, snippet, startFrom=0, size=None): LOG.debug(message) return response - def _handle_session_health_check(self, session): if not session or not self._check_session(session): @@ -398,14 +376,12 @@ def _handle_session_health_check(self, session): session = stored_session_info else: raise PopupException(_("Session error. Please create new session and try again.")) - - return session + return session - def close_statement(self, notebook, snippet): # Individual statements cannot be closed + def close_statement(self, notebook, snippet): # Individual statements cannot be closed pass - def close_session(self, session): api = self.get_api() @@ -417,7 +393,7 @@ def close_session(self, session): 'status': 0 } except RestException as e: - if e.code == 404 or e.code == 500: # TODO remove the 500 + if e.code == 404 or e.code == 500: # TODO remove the 500 raise SessionExpired(e) finally: stored_session_info = self._get_session_info_from_user() @@ -426,7 +402,6 @@ def close_session(self, session): else: return {'status': -1} - def get_jobs(self, notebook, snippet, logs): if self._is_yarn_mode(): # Tracking URL is found at the start of the logs @@ -435,7 +410,6 @@ def get_jobs(self, notebook, snippet, logs): else: return self._get_standalone_jobs(logs) - def autocomplete(self, snippet, database=None, table=None, column=None, nested=None, operation=None): response = {} # As booting a new SQL session is slow and we don't send the id of the current one in /autocomplete @@ -446,7 +420,7 @@ def autocomplete(self, snippet, database=None, table=None, column=None, nested=N # Calling the method here since this /autocomplete call can be frequent enough and we dont need dedicated one. if self._get_session_info_from_user(): self._close_unused_sessions(snippet.get('type')) - + stored_session_info = self._get_session_info_from_user() if stored_session_info and self._check_session(stored_session_info): session = stored_session_info @@ -470,7 +444,6 @@ def autocomplete(self, snippet, database=None, table=None, column=None, nested=N return response - def _close_unused_sessions(self, session_type): ''' Closes all unused Livy sessions for a particular user to free up session resources. @@ -493,7 +466,6 @@ def _close_unused_sessions(self, session_type): session['kind'] == session_type and session['state'] in ('idle', 'shutting_down', 'error', 'dead', 'killed'): self.close_session(session) - def _check_status_and_fetch_result(self, api, session, execute_resp): check_status = api.fetch_data(session['id'], execute_resp['id']) @@ -506,7 +478,6 @@ def _check_status_and_fetch_result(self, api, session, execute_resp): if check_status['state'] == 'available': return self._fetch_result(api, session, execute_resp['id']) - def _show_databases(self, api, session, snippet_type): show_db_execute = self._execute(api, session, snippet_type, 'SHOW DATABASES') db_list = self._check_status_and_fetch_result(api, session, show_db_execute) @@ -514,7 +485,6 @@ def _show_databases(self, api, session, snippet_type): if db_list: return [db[0] for db in db_list['data']] - def _show_tables(self, api, session, snippet_type, database): use_db_execute = self._execute(api, session, snippet_type, 'USE %(database)s' % {'database': database}) use_db_resp = self._check_status_and_fetch_result(api, session, use_db_execute) @@ -525,7 +495,6 @@ def _show_tables(self, api, session, snippet_type, database): if tables_list: return [table[1] for table in tables_list['data']] - def _get_columns(self, api, session, snippet_type, database, table): use_db_execute = self._execute(api, session, snippet_type, 'USE %(database)s' % {'database': database}) use_db_resp = self._check_status_and_fetch_result(api, session, use_db_execute) @@ -550,7 +519,6 @@ def _get_columns(self, api, session, snippet_type, database, table): return cols - def get_sample_data(self, snippet, database=None, table=None, column=None, is_async=False, operation=None): api = self.get_api() response = { @@ -578,7 +546,6 @@ def get_sample_data(self, snippet, database=None, table=None, column=None, is_as if stat.get('data_type') and stat['data_type'] == 'transactional' and stat.get('col_name'): return response - statement = self._get_select_query(database, table, column, operation) sample_execute = self._execute(api, session, snippet.get('type'), statement) @@ -589,11 +556,9 @@ def get_sample_data(self, snippet, database=None, table=None, column=None, is_as return response - def get_browse_query(self, snippet, database, table, partition_spec=None): return self._get_select_query(database, table) - def _get_select_query(self, database, table, column=None, operation=None, limit=100): if operation == 'hello': statement = "SELECT 'Hello World!'" @@ -612,7 +577,6 @@ def _get_select_query(self, database, table, column=None, operation=None, limit= return statement - def describe_table(self, notebook, snippet, database=None, table=None): api = self.get_api() @@ -644,7 +608,6 @@ def describe_table(self, notebook, snippet, database=None, table=None): 'stats': tb.stats } - def describe_database(self, notebook, snippet, database=None): response = {'status': 0} api = self.get_api() @@ -675,7 +638,6 @@ def describe_database(self, notebook, snippet, database=None): return response - def _get_standalone_jobs(self, logs): job_ids = set([]) @@ -700,7 +662,6 @@ def _get_standalone_jobs(self, logs): return jobs - def _get_yarn_jobs(self, logs): tracking_urls = set([]) @@ -716,11 +677,9 @@ def _get_yarn_jobs(self, logs): return jobs - def _is_yarn_mode(self): return LIVY_SERVER_SESSION_KIND.get() == "yarn" - def _get_session_info_from_user(self): self.user = rewrite_user(self.user) session_key = self._get_session_key() @@ -728,7 +687,6 @@ def _get_session_info_from_user(self): if self.user.profile.data.get(session_key): return self.user.profile.data[session_key] - def _set_session_info_to_user(self, session_info): self.user = rewrite_user(self.user) session_key = self._get_session_key() @@ -736,7 +694,6 @@ def _set_session_info_to_user(self, session_info): self.user.profile.update_data({session_key: session_info}) self.user.profile.save() - def _remove_session_info_from_user(self): self.user = rewrite_user(self.user) session_key = self._get_session_key() @@ -745,7 +702,7 @@ def _remove_session_info_from_user(self): json_data = self.user.profile.data json_data.pop(session_key) self.user.profile.json_data = json.dumps(json_data) - + self.user.profile.save() @@ -767,7 +724,7 @@ def __init__(self, desc_results): self.stats = [] self.cols = [] self.partition_keys = [] - self.primary_keys = [] # Not implemented + self.primary_keys = [] # Not implemented self.is_view = False self._details = None @@ -808,7 +765,7 @@ def handle_describe_format(self): }) if d[0] == 'Table': - self.name = d[1] + self.name = d[1] elif d[0] == 'Type': if 'view' in d[1].lower(): self.is_view = True diff --git a/desktop/libs/notebook/src/notebook/connectors/spark_shell_tests.py b/desktop/libs/notebook/src/notebook/connectors/spark_shell_tests.py index 1d96e328b72..d172385a227 100644 --- a/desktop/libs/notebook/src/notebook/connectors/spark_shell_tests.py +++ b/desktop/libs/notebook/src/notebook/connectors/spark_shell_tests.py @@ -15,20 +15,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import pytest import sys - from builtins import object +from unittest.mock import Mock, patch -from desktop.lib.django_test_util import make_logged_in_client -from useradmin.models import User +import pytest +from desktop.lib.django_test_util import make_logged_in_client from notebook.connectors.spark_shell import SparkApi - -if sys.version_info[0] > 2: - from unittest.mock import patch, Mock -else: - from mock import patch, Mock +from useradmin.models import User @pytest.mark.django_db @@ -46,7 +41,6 @@ def setup_method(self): } self.api = SparkApi(self.user, self.interpreter) - def test_get_api(self): lang = 'pyspark' properties = None @@ -55,7 +49,6 @@ def test_get_api(self): spark_api = self.api.get_api() assert spark_api.__class__.__name__ == 'LivyClient' - def test_get_livy_props_method(self): test_properties = [{ "name": "files", @@ -64,7 +57,6 @@ def test_get_livy_props_method(self): props = self.api.get_livy_props('scala', test_properties) assert props['files'] == ['file_a', 'file_b', 'file_c'] - def test_create_session_with_config(self): lang = 'pyspark' properties = None @@ -128,7 +120,6 @@ def test_create_session_with_config(self): cores = p['value'] assert cores == 1 - def test_create_session_plain(self): lang = 'pyspark' properties = None @@ -153,7 +144,6 @@ def test_create_session_plain(self): assert files_properties, session['properties'] assert files_properties[0]['value'] == [], session['properties'] - def test_execute(self): with patch('notebook.connectors.spark_shell._get_snippet_session') as _get_snippet_session: with patch('notebook.connectors.spark_shell.get_spark_api') as get_spark_api: @@ -177,7 +167,6 @@ def test_execute(self): with pytest.raises(Exception): self.api.execute(notebook, snippet) - def test_handle_result_data(self): # When result data has no complex type. data = { @@ -216,7 +205,6 @@ def test_handle_result_data(self): processed_data = self.api._handle_result_data(data, is_complex_type=True) assert processed_data == [['0', 535.0, {'site_id': 'BEB'}, {'c_id': 'EF'}, '2023-06-16T23:53:31Z']] - def test_check_status(self): with patch('notebook.connectors.spark_shell._get_snippet_session') as _get_snippet_session: with patch('notebook.connectors.spark_shell.get_spark_api') as get_spark_api: @@ -245,7 +233,6 @@ def test_check_status(self): ) with pytest.raises(Exception): self.api.check_status(notebook, snippet) - def test_get_sample_data(self): snippet = Mock() @@ -282,14 +269,13 @@ def test_get_sample_data(self): # When table is not transactional self.api.describe_table = Mock( return_value={ - 'stats': [] # No details regarding transactionality is present in describe response + 'stats': [] # No details regarding transactionality is present in describe response } ) response = self.api.get_sample_data(snippet, 'test_db', 'test_table', 'test_column') assert response['rows'] == 'test_data' assert response['full_headers'] == 'test_meta' - def test_get_select_query(self): # With operation as 'hello' @@ -304,7 +290,6 @@ def test_get_select_query(self): response = self.api._get_select_query('test_db', 'test_table', 'test_column') assert response == 'SELECT test_column\nFROM test_db.test_table\nLIMIT 100\n' - def test_describe_database(self): notebook = Mock() snippet = Mock() @@ -340,7 +325,6 @@ def test_describe_database(self): 'parameters': '{Create-by=Kevin, Create-date=09/01/2019}', 'status': 0} - def test_describe_table(self): notebook = Mock() snippet = Mock() @@ -501,7 +485,6 @@ def test_describe_table(self): 'data_type': 'transient_lastDdlTime'}], 'status': 0} - def test_get_jobs(self): local_jobs = [ {'url': u'http://172.21.1.246:4040/jobs/job/?id=0', 'name': u'0'} @@ -516,7 +499,6 @@ def test_get_jobs(self): assert jobs == yarn_jobs, jobs - LIVY_STANDALONE_LOG = """ Starting livy-repl on http://172.21.1.246:58449 Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties @@ -592,7 +574,7 @@ def test_get_jobs(self): 15/10/05 14:02:37 INFO TaskSchedulerImpl: Removed TaskSet 0.0, whose tasks have all completed, from pool 15/10/05 14:02:37 INFO DAGScheduler: ShuffleMapStage 0 (reduceByKey at :1) finished in 0.973 s 15/10/05 14:02:37 INFO DAGScheduler: looking for newly runnable stages -""" +""" # noqa: E501 LIVY_YARN_LOG = """ 15/10/05 13:51:21 INFO client.RMProxy: Connecting to ResourceManager at huetest-1.test.com/175.18.213.12:8032 15/10/05 13:51:21 INFO yarn.Client: Requesting a new application from cluster with 3 NodeManagers @@ -645,4 +627,4 @@ def test_get_jobs(self): tracking URL: http://huetest-1.test.com:8088/proxy/application_1444070328046_0002/ user: huetest 15/10/05 13:52:24 INFO yarn.Client: Application report for application_1444070328046_0002 (state: RUNNING) -""" +""" # noqa: E501 diff --git a/desktop/libs/notebook/src/notebook/connectors/sql_alchemy.py b/desktop/libs/notebook/src/notebook/connectors/sql_alchemy.py index 67b8bb9c942..dc5dccbd23a 100644 --- a/desktop/libs/notebook/src/notebook/connectors/sql_alchemy.py +++ b/desktop/libs/notebook/src/notebook/connectors/sql_alchemy.py @@ -49,47 +49,36 @@ Each query statement grabs a connection from the engine and will return it after its close(). Disposing the engine closes all its connections. ''' -from future import standard_library -standard_library.install_aliases() -from builtins import next, object -import datetime -import json -import logging -import uuid import re import sys +import json +import uuid +import logging +import datetime import textwrap - from string import Template +from urllib.parse import parse_qs as urllib_parse_qs, quote_plus as urllib_quote_plus, urlparse as urllib_urlparse from django.core.cache import caches -from sqlalchemy import create_engine, inspect, Table, MetaData -from sqlalchemy.exc import OperationalError, UnsupportedCompilationError, CompileError, ProgrammingError, NoSuchTableError +from django.utils.translation import gettext as _ +from past.builtins import long +from sqlalchemy import MetaData, Table, create_engine, inspect +from sqlalchemy.exc import CompileError, NoSuchTableError, OperationalError, ProgrammingError, UnsupportedCompilationError +from beeswax import data_export from desktop.lib import export_csvxls from desktop.lib.i18n import force_unicode -from beeswax import data_export from librdbms.server import dbms - -from notebook.connectors.base import Api, QueryError, QueryExpired, _get_snippet_name, AuthenticationRequired +from notebook.connectors.base import Api, AuthenticationRequired, QueryError, QueryExpired, _get_snippet_name from notebook.models import escape_rows -if sys.version_info[0] > 2: - from urllib.parse import quote_plus as urllib_quote_plus, urlparse as urllib_urlparse, parse_qs as urllib_parse_qs - from past.builtins import long - from io import StringIO - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - from urllib import quote_plus as urllib_quote_plus, urlparse as urllib_urlparse, parse_qs as urllib_parse_qs - from cStringIO import StringIO - ENGINES = {} CONNECTIONS = {} ENGINE_KEY = '%(username)s-%(connector_name)s' URL_PATTERN = '(?P.+?://)(?P[^:/ ]+):(?P[0-9]*).*' + LOG = logging.getLogger() @@ -99,7 +88,7 @@ def decorator(*args, **kwargs): return func(*args, **kwargs) except OperationalError as e: message = str(e) - if '1045' in message: # 'Access denied' # MySQL + if '1045' in message: # 'Access denied' # MySQL raise AuthenticationRequired(message=message) else: raise e @@ -215,7 +204,6 @@ def _create_engine(self): return create_engine(url, **options) - def _get_session(self, notebook, snippet): for session in notebook['sessions']: if session['type'] == snippet['type']: @@ -223,7 +211,6 @@ def _get_session(self, notebook, snippet): return None - def _create_connection(self, engine): connection = None try: @@ -236,7 +223,6 @@ def _create_connection(self, engine): return connection - @query_error_handler def execute(self, notebook, snippet): guid = uuid.uuid4().hex @@ -278,11 +264,11 @@ def execute(self, notebook, snippet): response = { 'sync': False, - 'has_result_set': result.cursor != None, + 'has_result_set': result.cursor is not None, 'modified_row_count': 0, 'guid': guid, 'result': { - 'has_more': result.cursor != None, + 'has_more': result.cursor is not None, 'data': [], 'meta': cache['meta'], 'type': 'table' @@ -292,7 +278,6 @@ def execute(self, notebook, snippet): return response - @query_error_handler def explain(self, notebook, snippet): session = self._get_session(notebook, snippet) @@ -310,7 +295,7 @@ def explain(self, notebook, snippet): explanation = '' else: try: - result = connection.execute('EXPLAIN '+ statement) + result = connection.execute('EXPLAIN ' + statement) explanation = "\n".join("{}: {},".format(k, v) for row in result for k, v in row.items()) except ProgrammingError: pass @@ -323,7 +308,6 @@ def explain(self, notebook, snippet): 'statement': statement } - @query_error_handler def check_status(self, notebook, snippet): guid = snippet.get('result', {}).get('handle', {}).get('guid') @@ -344,7 +328,6 @@ def check_status(self, notebook, snippet): return response - @query_error_handler def progress(self, notebook, snippet, logs=''): progress = 50 @@ -363,7 +346,6 @@ def progress(self, notebook, snippet, logs=''): progress = stats.get('completedSplits', 0) * 100 // stats.get('totalSplits', 1) return progress - @query_error_handler def fetch_result(self, notebook, snippet, rows, start_over): guid = snippet['result']['handle']['guid'] @@ -383,7 +365,6 @@ def fetch_result(self, notebook, snippet, rows, start_over): 'type': 'table' } - def _assign_types(self, results, meta): result = results and results[0] if result: @@ -401,17 +382,14 @@ def _assign_types(self, results, meta): else: meta[index]['type'] = 'STRING_TYPE' - @query_error_handler def fetch_result_metadata(self): pass - @query_error_handler def cancel(self, notebook, snippet): return self.close_statement(notebook, snippet) - @query_error_handler def get_log(self, notebook, snippet, startFrom=None, size=None): guid = snippet['result']['handle']['guid'] @@ -433,12 +411,10 @@ def close_statement(self, notebook, snippet): finally: return result - def close_session(self, session): engine = self._get_engine() engine.dispose() # ENGINE_KEY currently includes the current user - @query_error_handler def autocomplete(self, snippet, database=None, table=None, column=None, nested=None, operation=None): engine = self._get_engine() @@ -486,7 +462,6 @@ def autocomplete(self, snippet, database=None, table=None, column=None, nested=N response['status'] = 0 return response - @query_error_handler def get_sample_data(self, snippet, database=None, table=None, column=None, is_async=False, operation=None): engine = self._get_engine() @@ -530,7 +505,6 @@ def get_browse_query(self, snippet, database, table, partition_spec=None): 'backticks': self.backticks }) - def _get_column_type_name(self, col): try: name = str(col.get('type')) @@ -539,7 +513,6 @@ def _get_column_type_name(self, col): return name - def _fix_bigquery_db_prefixes(self, table_or_column): if self.options['url'].startswith('bigquery://'): table_or_column = table_or_column.rsplit('.', 1)[-1] diff --git a/desktop/libs/notebook/src/notebook/connectors/sql_alchemy_tests.py b/desktop/libs/notebook/src/notebook/connectors/sql_alchemy_tests.py index fe8f785d1fd..20cab695a22 100644 --- a/desktop/libs/notebook/src/notebook/connectors/sql_alchemy_tests.py +++ b/desktop/libs/notebook/src/notebook/connectors/sql_alchemy_tests.py @@ -16,27 +16,20 @@ # See the License for the specific language governing permissions and # limitations under the License. -from builtins import object -import logging -import pytest import sys +import logging +from builtins import object +from unittest.mock import MagicMock, Mock, patch +import pytest from sqlalchemy.exc import UnsupportedCompilationError -from sqlalchemy.types import NullType, ARRAY, JSON, VARCHAR +from sqlalchemy.types import ARRAY, JSON, VARCHAR, NullType from desktop.auth.backend import rewrite_user from desktop.lib.django_test_util import make_logged_in_client -from useradmin.models import User - from notebook.connectors.base import AuthenticationRequired -from notebook.connectors.sql_alchemy import SqlAlchemyApi, Assist - - -if sys.version_info[0] > 2: - from unittest.mock import patch, Mock, MagicMock -else: - from mock import patch, Mock, MagicMock - +from notebook.connectors.sql_alchemy import Assist, SqlAlchemyApi +from useradmin.models import User LOG = logging.getLogger() @@ -55,7 +48,6 @@ def setup_method(self): }, } - def test_column_backticks_escaping(self): interpreter = { 'name': 'hive', @@ -73,7 +65,6 @@ def test_column_backticks_escaping(self): } assert SqlAlchemyApi(self.user, interpreter).backticks == '"' - def test_create_athena_engine(self): interpreter = { 'name': 'hive', @@ -86,7 +77,6 @@ def test_create_athena_engine(self): with patch('notebook.connectors.sql_alchemy.create_engine') as create_engine: SqlAlchemyApi(self.user, interpreter)._create_engine() - def test_fetch_result_empty(self): notebook = Mock() snippet = {'result': {'handle': {'guid': 'guid-1'}}} @@ -97,7 +87,7 @@ def test_fetch_result_empty(self): CONNECTIONS.get = Mock( return_value={ 'result': Mock( - fetchmany=Mock(return_value=[]) # We have 0 rows + fetchmany=Mock(return_value=[]) # We have 0 rows ), 'meta': MagicMock( __getitem__=Mock(return_value={'type': 'BIGINT_TYPE'}), @@ -110,12 +100,11 @@ def test_fetch_result_empty(self): assert not data['has_more'] assert data['has_more'] != [] - assert data['has_more'] == False + assert data['has_more'] is False assert data['data'] == [] assert data['meta']() == [{'type': 'BIGINT_TYPE'}] - def test_fetch_result_rows(self): notebook = Mock() snippet = {'result': {'handle': {'guid': 'guid-1'}}} @@ -126,7 +115,7 @@ def test_fetch_result_rows(self): CONNECTIONS.get = Mock( return_value={ 'result': Mock( - fetchmany=Mock(return_value=[['row1'], ['row2']]) # We have 2 rows + fetchmany=Mock(return_value=[['row1'], ['row2']]) # We have 2 rows ), 'meta': MagicMock( __getitem__=Mock(return_value={'type': 'BIGINT_TYPE'}), @@ -139,12 +128,11 @@ def test_fetch_result_rows(self): assert not data['has_more'] assert data['has_more'] != [] - assert data['has_more'] == False + assert data['has_more'] is False assert data['data'] == [['row1'], ['row2']] assert data['meta']() == [{'type': 'BIGINT_TYPE'}] - def test_create_engine_auth_error(self): interpreter = { 'name': 'hive', @@ -157,7 +145,6 @@ def test_create_engine_auth_error(self): with pytest.raises(AuthenticationRequired): SqlAlchemyApi(self.user, interpreter)._create_engine() - def test_create_engine_auth(self): interpreter = { 'name': 'hive', @@ -181,7 +168,6 @@ def test_create_engine_auth(self): with patch('notebook.connectors.sql_alchemy.create_engine') as create_engine: SqlAlchemyApi(self.user, interpreter)._create_engine() - def test_create_connection_error(self): interpreter = { 'name': 'hive', @@ -195,7 +181,6 @@ def test_create_connection_error(self): engine = SqlAlchemyApi(self.user, interpreter)._create_engine() SqlAlchemyApi(self.user, interpreter)._create_connection(engine) - def test_create_connection(self): interpreter = { 'name': 'hive', @@ -220,7 +205,6 @@ def test_create_connection(self): engine = SqlAlchemyApi(self.user, interpreter)._create_engine() SqlAlchemyApi(self.user, interpreter)._create_connection(engine) - def test_create_engine_with_impersonation(self): interpreter = { 'name': 'hive', @@ -236,7 +220,6 @@ def test_create_engine_with_impersonation(self): create_engine.assert_called_with('presto://hue:8080/hue', pool_pre_ping=True) - interpreter['options']['has_impersonation'] = True # On with patch('notebook.connectors.sql_alchemy.create_engine') as create_engine: @@ -261,7 +244,6 @@ def test_create_engine_with_impersonation_phoenix(self): create_engine.assert_called_with('phoenix://hue:8080/hue', pool_pre_ping=False) - interpreter['options']['has_impersonation'] = True # On with patch('notebook.connectors.sql_alchemy.create_engine') as create_engine: @@ -269,7 +251,6 @@ def test_create_engine_with_impersonation_phoenix(self): create_engine.assert_called_with('phoenix://test@hue:8080/hue', pool_pre_ping=False) - def test_explain(self): with patch('notebook.connectors.sql_alchemy.SqlAlchemyApi._create_connection') as _create_connection: @@ -291,7 +272,6 @@ def test_explain(self): assert explanation == response['explanation'] - def test_check_status(self): notebook = Mock() @@ -305,7 +285,6 @@ def test_check_status(self): response = SqlAlchemyApi(self.user, self.interpreter).check_status(notebook, snippet) assert response['status'] == 'available' - def test_get_sample_data(self): snippet = Mock() @@ -321,7 +300,6 @@ def test_get_sample_data(self): response['full_headers'] == [{'name': 'col1', 'type': 'STRING_TYPE', 'comment': ''}]) - def test_get_tables(self): snippet = MagicMock() @@ -338,7 +316,6 @@ def test_get_tables(self): assert response['tables_meta'][0]['type'] == 'Table' assert response['tables_meta'][1]['type'] == 'View' - def test_get_sample_data_table(self): snippet = Mock() @@ -351,7 +328,6 @@ def test_get_sample_data_table(self): assert response['rows'] == [[1], [2]] - def test_dialect_trim_statement_semicolon(self): interpreter = { 'name': 'presto', @@ -385,7 +361,6 @@ def test_dialect_trim_statement_semicolon(self): execute.assert_called_with('SELECT 1') - def test_get_log(self): notebook = Mock() snippet = MagicMock() @@ -411,27 +386,23 @@ def setup_method(self): self.client = make_logged_in_client(username="test", groupname="default", recreate=True, is_superuser=False) self.user = rewrite_user(User.objects.get(username="test")) - def test_backticks_with_connectors(self): interpreter = {'name': 'hive', 'options': {'url': 'dialect://'}, 'dialect_properties': {'sql_identifier_quote': '`'}} data = SqlAlchemyApi(self.user, interpreter).get_browse_query(snippet=Mock(), database='db1', table='table1') assert data == 'SELECT *\nFROM `db1`.`table1`\nLIMIT 1000\n' - interpreter = {'options': {'url': 'dialect://'}, 'dialect_properties': {'sql_identifier_quote': '"'}} data = SqlAlchemyApi(self.user, interpreter).get_browse_query(snippet=Mock(), database='db1', table='table1') assert data == 'SELECT *\nFROM "db1"."table1"\nLIMIT 1000\n' - def test_backticks_without_connectors(self): interpreter = {'name': 'hive', 'options': {'url': 'hive://'}} data = SqlAlchemyApi(self.user, interpreter).get_browse_query(snippet=Mock(), database='db1', table='table1') assert data == 'SELECT *\nFROM `db1`.`table1`\nLIMIT 1000\n' - interpreter = {'name': 'postgresql', 'options': {'url': 'postgresql://'}} data = SqlAlchemyApi(self.user, interpreter).get_browse_query(snippet=Mock(), database='db1', table='table1') @@ -445,7 +416,6 @@ def setup_method(self): self.client = make_logged_in_client(username="test", groupname="default", recreate=True, is_superuser=False) self.user = rewrite_user(User.objects.get(username="test")) - def test_empty_database_names(self): interpreter = { 'name': 'hive', @@ -480,6 +450,7 @@ def col1_dict(key): col1 = MagicMock() col1.__getitem__.side_effect = col1_dict col1.get = col1_dict + def col2_dict(key): return { 'name': 'col2', @@ -543,7 +514,6 @@ def test_get_column_type_name_complex(self): assert api._get_column_type_name({'type': ARRAY}) == 'array' assert api._get_column_type_name({'type': JSON}) == 'json' - def test_fix_bigquery_db_prefixes(self): interpreter = { 'name': 'bigquery', diff --git a/desktop/libs/notebook/src/notebook/connectors/sqlflow.py b/desktop/libs/notebook/src/notebook/connectors/sqlflow.py index 481c9011706..f1e6c73abc4 100644 --- a/desktop/libs/notebook/src/notebook/connectors/sqlflow.py +++ b/desktop/libs/notebook/src/notebook/connectors/sqlflow.py @@ -18,26 +18,20 @@ from __future__ import absolute_import -import logging -import json import os import sys +import json +import logging import sqlflow +from django.utils.translation import gettext as _ from sqlflow.rows import Rows from desktop.lib.i18n import force_unicode - from notebook.connectors.base import Api, QueryError -from notebook.decorators import ssh_error_handler, rewrite_ssh_api_url +from notebook.decorators import rewrite_ssh_api_url, ssh_error_handler from notebook.models import escape_rows -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - - LOG = logging.getLogger() @@ -62,12 +56,10 @@ def __init__(self, user, interpreter=None): if self.options.get('has_ssh'): self.url = rewrite_ssh_api_url(self.url)['url'] - def _get_db(self): os.environ['SQLFLOW_DATASOURCE'] = self.interpreter['options']['datasource'] return sqlflow.Client(server_url='172.18.1.3:50051') # TODO Send as param instead of ENV - @query_error_handler @ssh_error_handler def execute(self, notebook, snippet): @@ -98,7 +90,6 @@ def execute(self, notebook, snippet): } } - def _execute(self, statement): db = self._get_db() @@ -124,12 +115,10 @@ def _execute(self, statement): 'description': description, } - @query_error_handler def check_status(self, notebook, snippet): return {'status': 'available'} - @query_error_handler @ssh_error_handler def autocomplete(self, snippet, database=None, table=None, column=None, nested=None, operation=None): @@ -175,7 +164,6 @@ def get_sample_data(self, snippet, database=None, table=None, column=None, is_as return response - def fetch_result(self, notebook, snippet, rows, start_over): """Only called at the end of a live query.""" return { diff --git a/desktop/libs/notebook/src/notebook/decorators.py b/desktop/libs/notebook/src/notebook/decorators.py index 78d16abe2e8..7d59b46a61c 100644 --- a/desktop/libs/notebook/src/notebook/decorators.py +++ b/desktop/libs/notebook/src/notebook/decorators.py @@ -15,36 +15,36 @@ # See the License for the specific language governing permissions and # limitations under the License. -from past.builtins import basestring -import json -import logging -import math import re import sys +import json +import math +import logging from django.forms import ValidationError from django.http import Http404 from django.utils.functional import wraps +from django.utils.translation import gettext as _ +from past.builtins import basestring from dashboard.models import extract_solr_exception_message from desktop.conf import ENABLE_HUE_5 from desktop.lib.django_util import JsonResponse from desktop.lib.exceptions_renderable import PopupException -from desktop.lib.i18n import smart_unicode +from desktop.lib.i18n import smart_str from desktop.lib.rest.http_client import RestException -from desktop.models import Document2, Document, FilesystemException - +from desktop.models import Document, Document2, FilesystemException from notebook.conf import check_has_missing_permission -from notebook.connectors.base import QueryExpired, QueryError, SessionExpired, AuthenticationRequired, OperationTimeout, \ - OperationNotSupported +from notebook.connectors.base import ( + AuthenticationRequired, + OperationNotSupported, + OperationTimeout, + QueryError, + QueryExpired, + SessionExpired, +) from notebook.models import _get_editor_type -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ - - LOG = logging.getLogger() @@ -55,7 +55,7 @@ def decorate(request, *args, **kwargs): editor_type = request.GET.get('type', 'hive') gist_id = request.POST.get('gist') - if editor_type == 'gist' or gist_id: # Gist don't have permissions currently + if editor_type == 'gist' or gist_id: # Gist don't have permissions currently pass else: if editor_id: # Open existing saved editor document @@ -144,7 +144,7 @@ def wrapper(*args, **kwargs): except QueryError as e: LOG.exception('Error running %s' % f.__name__) response['status'] = 1 - response['message'] = smart_unicode(e) + response['message'] = smart_str(e) if response['message'].index("max_row_size"): size = re.search(r"(\d+.?\d*) (.B)", response['message']) if size and size.group(1): @@ -168,16 +168,18 @@ def wrapper(*args, **kwargs): except Exception as e: LOG.exception('Error running %s' % f.__name__) response['status'] = -1 - response['message'] = smart_unicode(e) + response['message'] = smart_str(e) finally: if response: return JsonResponse(response) return wrapper + def _closest_power_of_2(number): return math.pow(2, math.ceil(math.log(number, 2))) + def _to_size_in_bytes(size, unit): unit_size = 1 unit = unit.upper() @@ -192,6 +194,7 @@ def _to_size_in_bytes(size, unit): return float(size) * unit_size + def json_error_handler(view_fn): def decorator(*args, **kwargs): try: diff --git a/desktop/libs/notebook/src/notebook/management/commands/notebook_setup.py b/desktop/libs/notebook/src/notebook/management/commands/notebook_setup.py index bb89dee16c0..12cf9b0527d 100644 --- a/desktop/libs/notebook/src/notebook/management/commands/notebook_setup.py +++ b/desktop/libs/notebook/src/notebook/management/commands/notebook_setup.py @@ -15,18 +15,17 @@ # See the License for the specific language governing permissions and # limitations under the License. -import logging import os import pwd import sys +import logging from django.core import management from django.core.management.base import BaseCommand from django.db import transaction -from desktop.models import Directory, Document, Document2, Document2Permission, SAMPLE_USER_OWNERS -from useradmin.models import get_default_user_group, install_sample_user, User - +from desktop.models import SAMPLE_USER_OWNERS, Directory, Document, Document2, Document2Permission +from useradmin.models import User, get_default_user_group, install_sample_user LOG = logging.getLogger() @@ -47,10 +46,8 @@ def handle(self, *args, **options): sample_user = install_sample_user() with transaction.atomic(): - if sys.version_info[0] > 2: - management.call_command('loaddata', 'initial_notebook_examples.json', verbosity=2) - else: - management.call_command('loaddata', 'initial_notebook_examples.json', verbosity=2, commit=False) + management.call_command('loaddata', 'initial_notebook_examples.json', verbosity=2) + Document.objects.sync() # Get or create sample user directories diff --git a/desktop/libs/notebook/src/notebook/models.py b/desktop/libs/notebook/src/notebook/models.py index 5005a8be268..4ef3923261c 100644 --- a/desktop/libs/notebook/src/notebook/models.py +++ b/desktop/libs/notebook/src/notebook/models.py @@ -15,7 +15,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import json import math import uuid @@ -24,31 +23,23 @@ import datetime from builtins import object, str from datetime import timedelta +from urllib.parse import quote as urllib_quote from django.contrib.sessions.models import Session from django.db.models import Count from django.db.models.functions import Trunc from django.utils.html import escape +from django.utils.translation import gettext as _ from desktop.conf import has_connectors from desktop.lib.connectors.models import _get_installed_connectors -from desktop.lib.i18n import smart_unicode +from desktop.lib.i18n import smart_str from desktop.lib.paths import SAFE_CHARACTERS_URI from desktop.models import Directory, Document2 from notebook.conf import EXAMPLES, get_ordered_interpreters from notebook.connectors.base import Notebook, get_api as _get_api, get_interpreter from useradmin.models import User, install_sample_user -if sys.version_info[0] > 2: - from urllib.parse import quote as urllib_quote - - from django.utils.translation import gettext as _ -else: - from urllib import quote as urllib_quote - - from django.utils.translation import ugettext as _ - - LOG = logging.getLogger() @@ -70,7 +61,7 @@ def escape_rows(rows, nulls_only=False, encoding=None): escaped_field = 'NULL' else: # Prevent error when getting back non utf8 like charset=iso-8859-1 - escaped_field = smart_unicode(field, errors='replace', encoding=encoding) + escaped_field = smart_str(field, errors='replace', encoding=encoding) if not nulls_only: escaped_field = escape(escaped_field).replace(' ', ' ') escaped_row.append(escaped_field) diff --git a/desktop/libs/notebook/src/notebook/models_tests.py b/desktop/libs/notebook/src/notebook/models_tests.py index 8384b687007..4232562162d 100644 --- a/desktop/libs/notebook/src/notebook/models_tests.py +++ b/desktop/libs/notebook/src/notebook/models_tests.py @@ -16,23 +16,18 @@ # See the License for the specific language governing permissions and # limitations under the License. -import logging +import sys import json +import logging +from unittest.mock import MagicMock, Mock, patch + import pytest -import sys from desktop.lib.django_test_util import make_logged_in_client from desktop.models import Document2 -from useradmin.models import User - from notebook.conf import EXAMPLES -from notebook.models import install_custom_examples, Analytics - -if sys.version_info[0] > 2: - from unittest.mock import patch, Mock, MagicMock -else: - from mock import patch, Mock, MagicMock - +from notebook.models import Analytics, install_custom_examples +from useradmin.models import User LOG = logging.getLogger() @@ -62,7 +57,6 @@ def setup_method(self): self.client = make_logged_in_client(username="test", groupname="default", recreate=True, is_superuser=True, is_admin=True) self.user = User.objects.get(username="test") - def test_install_only_hive_queries(self): finish = [ EXAMPLES.AUTO_LOAD.set_for_testing(True), @@ -100,7 +94,6 @@ def test_install_only_hive_queries(self): for f in finish: f() - def test_install_auto_load_disabled(self): f = EXAMPLES.AUTO_LOAD.set_for_testing(False) try: diff --git a/desktop/libs/notebook/src/notebook/routing.py b/desktop/libs/notebook/src/notebook/routing.py index ff2e97c0558..b5681213716 100644 --- a/desktop/libs/notebook/src/notebook/routing.py +++ b/desktop/libs/notebook/src/notebook/routing.py @@ -17,13 +17,9 @@ import sys -from desktop.conf import has_channels - -if sys.version_info[0] > 2: - from django.urls import re_path -else: - from django.conf.urls import url as re_path +from django.urls import re_path +from desktop.conf import has_channels if has_channels(): from notebook import consumer diff --git a/desktop/libs/notebook/src/notebook/sql_utils.py b/desktop/libs/notebook/src/notebook/sql_utils.py index 6a5fdf21e01..d121a9924dc 100644 --- a/desktop/libs/notebook/src/notebook/sql_utils.py +++ b/desktop/libs/notebook/src/notebook/sql_utils.py @@ -15,19 +15,10 @@ # See the License for the specific language governing permissions and # limitations under the License. -from future import standard_library -standard_library.install_aliases() -import hashlib import os import re -import sys - -from desktop.lib.i18n import smart_str - -if sys.version_info[0] > 2: - from io import StringIO as string_io -else: - from StringIO import StringIO as string_io +import hashlib +from io import StringIO as string_io # Note: Might be replaceable by sqlparse.split @@ -50,6 +41,7 @@ def get_statements(hql_query, dialect=None): }) return statements + def get_current_statement(snippet): # Multiquery, if not first statement or arrived to the last query should_close = False @@ -59,7 +51,7 @@ def get_current_statement(snippet): statements = get_statements(snippet['statement'], snippet['dialect'] if 'dialect' in snippet else None) - statement_id = min(statement_id, len(statements) - 1) # In case of removal of statements + statement_id = min(statement_id, len(statements) - 1) # In case of removal of statements previous_statement_hash = compute_statement_hash(statements[statement_id]['statement']) non_edited_statement = previous_statement_hash == handle.get('previous_statement_hash') or not handle.get('previous_statement_hash') @@ -86,10 +78,8 @@ def get_current_statement(snippet): def compute_statement_hash(statement): - if sys.version_info[0] > 2: - return hashlib.sha224(statement.encode()).hexdigest() - else: - return hashlib.sha224(smart_str(statement)).hexdigest() + return hashlib.sha224(statement.encode()).hexdigest() + def split_statements(hql, dialect=None): """ @@ -154,11 +144,13 @@ def split_statements(hql, dialect=None): if current and current != ';': current = current.strip() - statements.append(((start_row, start_col), (end_row, end_col+1), current)) + statements.append(((start_row, start_col), (end_row, end_col + 1), current)) return statements -_SEMICOLON_WHITESPACE = re.compile(";\s*$") + +_SEMICOLON_WHITESPACE = re.compile(r";\s*$") + def strip_trailing_semicolon(query): """As a convenience, we remove trailing semicolons from queries.""" diff --git a/desktop/libs/notebook/src/notebook/tasks.py b/desktop/libs/notebook/src/notebook/tasks.py index da54badb695..96fb34f2802 100644 --- a/desktop/libs/notebook/src/notebook/tasks.py +++ b/desktop/libs/notebook/src/notebook/tasks.py @@ -25,6 +25,7 @@ import logging import datetime from builtins import next, object +from io import StringIO as string_io from celery import states from celery.utils.log import get_task_logger @@ -46,12 +47,6 @@ from notebook.sql_utils import get_current_statement from useradmin.models import User -if sys.version_info[0] > 2: - from io import StringIO as string_io -else: - from StringIO import StringIO as string_io - - LOG_TASK = get_task_logger(__name__) LOG = logging.getLogger() STATE_MAP = { @@ -133,7 +128,7 @@ def download_to_file(notebook, snippet, file_format='csv', max_rows=-1, **kwargs if TASK_SERVER.RESULT_CACHE.get(): with storage.open(result_key, 'rb') as store: with codecs.getreader('utf-8')(store) as text_file: - delimiter = ',' if sys.version_info[0] > 2 else ','.encode('utf-8') + delimiter = ',' csv_reader = csv.reader(text_file, delimiter=delimiter) caches[CACHES_CELERY_QUERY_RESULT_KEY].set(result_key, [row for row in csv_reader], 60 * 5) LOG.info('Caching results %s.' % result_key) @@ -403,7 +398,7 @@ def _get_data(task_id): csv_reader = csv_reader[1:] if csv_reader else [] else: f = storage.open(result_key, 'rb') - delimiter = ',' if sys.version_info[0] > 2 else ','.encode('utf-8') + delimiter = ',' csv_reader = csv.reader(f, delimiter=delimiter) headers = next(csv_reader, []) diff --git a/desktop/libs/notebook/src/notebook/tasks_tests.py b/desktop/libs/notebook/src/notebook/tasks_tests.py index 4651f2e5c66..cf2d5d5f4e6 100644 --- a/desktop/libs/notebook/src/notebook/tasks_tests.py +++ b/desktop/libs/notebook/src/notebook/tasks_tests.py @@ -16,28 +16,21 @@ # See the License for the specific language governing permissions and # limitations under the License. -import logging -import pytest import sys +import logging +from unittest.mock import MagicMock, Mock, patch +import pytest from celery import states from desktop.lib.django_test_util import make_logged_in_client -from useradmin.models import User - from notebook.connectors.sql_alchemy import SqlAlchemyApi -from notebook.tasks import run_sync_query, download_to_file, close_statement, get_log - -if sys.version_info[0] > 2: - from unittest.mock import patch, Mock, MagicMock -else: - from mock import patch, Mock, MagicMock - +from notebook.tasks import close_statement, download_to_file, get_log, run_sync_query +from useradmin.models import User LOG = logging.getLogger() - @pytest.mark.django_db class TestRunAsyncQueryTask(): @@ -45,7 +38,6 @@ def setup_method(self): self.client = make_logged_in_client(username="test", groupname="default", recreate=True, is_superuser=False) self.user = User.objects.get(username="test") - def test_run_query_only(self): with patch('notebook.tasks._get_request') as _get_request: with patch('notebook.tasks.get_api') as get_api: @@ -73,7 +65,6 @@ def notebook_dict(key): assert meta['row_counter'] == 2, meta - def test_close_statement(self): with patch('notebook.tasks._get_request') as _get_request: with patch('notebook.tasks.download_to_file') as download_to_file: @@ -96,7 +87,6 @@ def notebook_dict(key): assert response == {'status': 0} - def test_get_log(self): with patch('notebook.tasks._get_request') as _get_request: with patch('notebook.tasks.download_to_file') as download_to_file: @@ -118,7 +108,6 @@ def notebook_dict(key): assert response == '' - @pytest.mark.django_db class TestRunSyncQueryTask(): @@ -126,7 +115,6 @@ def setup_method(self): self.client = make_logged_in_client(username="test", groupname="default", recreate=True, is_superuser=False) self.user = User.objects.get(username="test") - def test_run_query(self): snippet = {'type': 'mysql', 'statement_raw': 'SHOW TABLES', 'variables': []} diff --git a/desktop/libs/notebook/src/notebook/templates/editor_components.mako b/desktop/libs/notebook/src/notebook/templates/editor_components.mako index 24ad6624757..836f6180e8b 100644 --- a/desktop/libs/notebook/src/notebook/templates/editor_components.mako +++ b/desktop/libs/notebook/src/notebook/templates/editor_components.mako @@ -21,7 +21,6 @@ from webpack_loader.templatetags.webpack_loader import render_bundle from desktop import conf from desktop.auth.backend import is_admin -from desktop.lib.i18n import smart_unicode from desktop.views import _ko, antixss from desktop.webpack_utils import get_hue_bundles diff --git a/desktop/libs/notebook/src/notebook/urls.py b/desktop/libs/notebook/src/notebook/urls.py index 2c38a1644e7..928e33e6081 100644 --- a/desktop/libs/notebook/src/notebook/urls.py +++ b/desktop/libs/notebook/src/notebook/urls.py @@ -15,8 +15,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys - from django.urls import re_path from notebook import api as notebook_api, views as notebook_views diff --git a/desktop/libs/notebook/src/notebook/views.py b/desktop/libs/notebook/src/notebook/views.py index c2a20e9689b..e22a02afbd1 100644 --- a/desktop/libs/notebook/src/notebook/views.py +++ b/desktop/libs/notebook/src/notebook/views.py @@ -15,12 +15,13 @@ # See the License for the specific language governing permissions and # limitations under the License. +import sys import json import logging -import sys -from django.urls import reverse from django.shortcuts import redirect +from django.urls import reverse +from django.utils.translation import gettext as _ from django.views.decorators.http import require_POST from beeswax.data_export import DOWNLOAD_COOKIE_AGE @@ -29,26 +30,21 @@ from desktop.conf import ENABLE_DOWNLOAD, ENABLE_HUE_5, USE_NEW_EDITOR from desktop.lib import export_csvxls from desktop.lib.connectors.models import Connector -from desktop.lib.django_util import render, JsonResponse +from desktop.lib.django_util import JsonResponse, render from desktop.lib.exceptions_renderable import PopupException -from desktop.models import Document2, Document, FilesystemException, _get_gist_document +from desktop.models import Document, Document2, FilesystemException, _get_gist_document from desktop.views import serve_403_error -from metadata.conf import has_optimizer, has_catalog, has_workload_analytics - -from notebook.conf import get_ordered_interpreters, SHOW_NOTEBOOKS, EXAMPLES +from metadata.conf import has_catalog, has_optimizer, has_workload_analytics +from notebook.conf import EXAMPLES, SHOW_NOTEBOOKS, get_ordered_interpreters from notebook.connectors.base import Notebook, _get_snippet_name, get_interpreter from notebook.connectors.spark_shell import SparkApi -from notebook.decorators import check_editor_access_permission, check_document_access_permission, check_document_modify_permission +from notebook.decorators import check_document_access_permission, check_document_modify_permission, check_editor_access_permission from notebook.management.commands.notebook_setup import Command -from notebook.models import make_notebook, _get_editor_type, get_api, _get_dialect_example - -if sys.version_info[0] > 2: - from django.utils.translation import gettext as _ -else: - from django.utils.translation import ugettext as _ +from notebook.models import _get_dialect_example, _get_editor_type, get_api, make_notebook LOG = logging.getLogger() + @check_document_access_permission def notebook(request, is_embeddable=False): if not SHOW_NOTEBOOKS.get() or not request.user.has_hue_permission(action="access", app='notebook'): @@ -60,7 +56,7 @@ def notebook(request, is_embeddable=False): try: from spark.conf import LIVY_SERVER_SESSION_KIND is_yarn_mode = LIVY_SERVER_SESSION_KIND.get() - except: + except Exception: LOG.exception('Spark is not enabled') return render('notebook.mako', request, { @@ -225,9 +221,9 @@ def execute_and_watch(request): sample = get_api(request, snippet).fetch_result(notebook, snippet, 0, start_over=True) - from indexer.api3 import _index # Will ve moved to the lib - from indexer.file_format import HiveFormat + from indexer.api3 import _index # Will ve moved to the lib from indexer.fields import Field + from indexer.file_format import HiveFormat file_format = { 'name': 'col', diff --git a/desktop/libs/notebook/src/notebook/views_tests.py b/desktop/libs/notebook/src/notebook/views_tests.py index 66a0648612b..9e0227a01c6 100644 --- a/desktop/libs/notebook/src/notebook/views_tests.py +++ b/desktop/libs/notebook/src/notebook/views_tests.py @@ -16,23 +16,17 @@ # See the License for the specific language governing permissions and # limitations under the License. -import logging import json -import pytest -import sys +import logging +from unittest.mock import MagicMock, Mock, patch +import pytest from django.urls import reverse -from desktop.lib.django_test_util import make_logged_in_client from desktop.lib.connectors.models import Connector +from desktop.lib.django_test_util import make_logged_in_client from useradmin.models import User -if sys.version_info[0] > 2: - from unittest.mock import patch, Mock, MagicMock -else: - from mock import patch, Mock, MagicMock - - LOG = logging.getLogger() @@ -43,7 +37,6 @@ def setup_method(self): self.client = make_logged_in_client(username="test", groupname="default", recreate=True, is_superuser=True, is_admin=True) self.user = User.objects.get(username="test") - def test_install_via_insert_mysql(self): with patch('notebook.views.Connector.objects') as ConnectorObjects: with patch('notebook.views.get_interpreter') as get_interpreter: @@ -80,7 +73,6 @@ def test_install_via_insert_mysql(self): make_notebook.assert_called() - def test_install_via_load_hive(self): with patch('notebook.views.Connector.objects') as ConnectorObjects: with patch('notebook.views.get_interpreter') as get_interpreter: @@ -132,7 +124,6 @@ def test_install_via_load_hive(self): fs.do_as_user.assert_called() - def test_install_via_insert_hive(self): with patch('notebook.views.Connector.objects') as ConnectorObjects: with patch('notebook.views.get_interpreter') as get_interpreter: diff --git a/pyproject.toml b/pyproject.toml index 4947ea90d21..332923fbeda 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -39,8 +39,21 @@ ignore = [ "E114", "E117", "W191", + "E731" ] +[tool.ruff.lint.per-file-ignores] +"desktop/libs/notebook/src/notebook/connectors/altus.py" = ["E501"] +"desktop/libs/notebook/src/notebook/connectors/altus_adb.py" = ["E501"] +"desktop/libs/metadata/src/metadata/catalog/dummy_client.py" = ["E501"] +"desktop/libs/liboozie/src/liboozie/submission2.py" = ["E501"] +"desktop/libs/libanalyze/src/libanalyze/rules.py" = ["E501"] +"apps/hbase/src/hbase/api.py" = ["E501"] +"desktop/libs/librdbms/src/librdbms/server/mysql_lib.py" = ["E402"] +"desktop/core/src/desktop/urls.py" = ["E402"] +"desktop/core/src/desktop/lib/thrift_util_test.py" = ["E402"] +"desktop/core/src/desktop/auth/backend.py" = ["E402"] + [tool.ruff.format] docstring-code-format = true docstring-code-line-length = 140 diff --git a/tools/app_reg/pth.py b/tools/app_reg/pth.py index ae421099ebd..5579d2659f7 100644 --- a/tools/app_reg/pth.py +++ b/tools/app_reg/pth.py @@ -19,21 +19,12 @@ Tools to manipulate the .pth file in the virtualenv. """ +import os import glob import logging -import os -import sys import common -py2or3 = "2" -if sys.version_info[0] > 2: - from builtins import object - open_file = open - py2or3 = "3" -else: - open_file = file - LOG = logging.getLogger(__name__) PTH_FILE = 'hue.pth' @@ -63,7 +54,7 @@ def _relpath(self, path): def _read(self): if os.path.exists(self._path): - self._entries = set(open_file(self._path).read().split('\n')) + self._entries = set(open(self._path).read().split('\n')) def add(self, app): """ @@ -139,11 +130,7 @@ def save(self): with open(self._path, 'w') as _file: # We want the Hue libraries to come before system libraries in # case there is a name collision. - if py2or3 == "2": - _file.write("import sys; sys.__plen = len(sys.path)\n") _file.write('\n'.join(sorted(self._entries))) - if py2or3 == "2": - _file.write("\nimport sys; new=sys.path[sys.__plen:]; del sys.path[sys.__plen:]; sys.path[0:0]=new\n") LOG.info('=== Saved %s' % self._path) def sync(self, apps): diff --git a/tools/app_reg/registry.py b/tools/app_reg/registry.py index e72dae0e5f2..47d96efbac8 100644 --- a/tools/app_reg/registry.py +++ b/tools/app_reg/registry.py @@ -19,19 +19,15 @@ Registry for the applications """ -import glob -import logging import os import sys +import glob import json +import logging import common from common import cmp -if sys.version_info[0] > 2: - from builtins import object - - LOG = logging.getLogger(__name__) @@ -43,16 +39,14 @@ def __init__(self): """Open the existing registry""" self._reg_path = os.path.join(common.HUE_APP_REG_DIR, 'app.reg') self._initialized = False - self._apps = { } # Map of name -> HueApp + self._apps = {} # Map of name -> HueApp self._open() def _open(self): """Open the registry file. May raise OSError""" if os.path.exists(self._reg_path): - if sys.version_info[0] > 2: - reg_file = open(self._reg_path) - else: - reg_file = file(self._reg_path) + reg_file = open(self._reg_path) + app_list = json.load(reg_file) reg_file.close() @@ -65,10 +59,8 @@ def _open(self): def _write(self, path): """Write out the registry to the given path""" - if sys.version_info[0] > 2: - outfile = open(path, 'w') - else: - outfile = file(path, 'w') + outfile = open(path, 'w') + json.dump(list(self._apps.values()), outfile, cls=AppJsonEncoder, indent=2) outfile.close() @@ -178,14 +170,13 @@ def get_conffiles(self): """get_conffiles() -> A list of config (.ini) files""" return glob.glob(os.path.join(self.abs_path, 'conf', '*.ini')) - def install_conf(self): """ install_conf() -> True/False Symlink the app's conf/*.ini files into the conf directory. """ - installed = [ ] + installed = [] for target in self.get_conffiles(): link_name = os.path.join(common.HUE_CONF_DIR, os.path.basename(target)) @@ -220,7 +211,6 @@ def install_conf(self): return False return True - def uninstall_conf(self): """uninstall_conf() -> True/False""" app_conf_dir = os.path.abspath(os.path.join(self.abs_path, 'conf')) diff --git a/tools/ops/script_runner/lib/custom_commands/management/commands/backend_test_curl.py b/tools/ops/script_runner/lib/custom_commands/management/commands/backend_test_curl.py index 85e9acad293..b0cef4ad627 100644 --- a/tools/ops/script_runner/lib/custom_commands/management/commands/backend_test_curl.py +++ b/tools/ops/script_runner/lib/custom_commands/management/commands/backend_test_curl.py @@ -35,10 +35,7 @@ from hue_curl import Curl -if sys.version_info[0] > 2: - from django.utils.translation import gettext_lazy as _t, gettext as _ -else: - from django.utils.translation import ugettext_lazy as _t, ugettext as _ +from django.utils.translation import gettext_lazy as _t, gettext as _ DEFAULT_LOG_DIR = 'logs' log_dir = os.getenv("DESKTOP_LOG_DIR", DEFAULT_LOG_DIR) diff --git a/tools/ops/script_runner/lib/custom_commands/management/commands/c6_test_command.py b/tools/ops/script_runner/lib/custom_commands/management/commands/c6_test_command.py index 65015669677..f3e800a337c 100644 --- a/tools/ops/script_runner/lib/custom_commands/management/commands/c6_test_command.py +++ b/tools/ops/script_runner/lib/custom_commands/management/commands/c6_test_command.py @@ -29,10 +29,7 @@ import desktop.conf -if sys.version_info[0] > 2: - from django.utils.translation import gettext_lazy as _t, gettext as _ -else: - from django.utils.translation import ugettext_lazy as _t, ugettext as _ +from django.utils.translation import gettext_lazy as _t, gettext as _ LOG = logging.getLogger(__name__) diff --git a/tools/ops/script_runner/lib/custom_commands/management/commands/change_owner_of_docs.py b/tools/ops/script_runner/lib/custom_commands/management/commands/change_owner_of_docs.py index 595019418a3..638e789d70d 100644 --- a/tools/ops/script_runner/lib/custom_commands/management/commands/change_owner_of_docs.py +++ b/tools/ops/script_runner/lib/custom_commands/management/commands/change_owner_of_docs.py @@ -29,10 +29,7 @@ import desktop.conf -if sys.version_info[0] > 2: - from django.utils.translation import gettext_lazy as _t, gettext as _ -else: - from django.utils.translation import ugettext_lazy as _t, ugettext as _ +from django.utils.translation import gettext_lazy as _t, gettext as _ LOG = logging.getLogger(__name__) diff --git a/tools/ops/script_runner/lib/custom_commands/management/commands/db_query_test.py b/tools/ops/script_runner/lib/custom_commands/management/commands/db_query_test.py index 7d7db90c01a..49115f9b6d2 100755 --- a/tools/ops/script_runner/lib/custom_commands/management/commands/db_query_test.py +++ b/tools/ops/script_runner/lib/custom_commands/management/commands/db_query_test.py @@ -29,10 +29,7 @@ import desktop.conf -if sys.version_info[0] > 2: - from django.utils.translation import gettext_lazy as _t, gettext as _ -else: - from django.utils.translation import ugettext_lazy as _t, ugettext as _ +from django.utils.translation import gettext_lazy as _t, gettext as _ LOG = logging.getLogger(__name__) diff --git a/tools/ops/script_runner/lib/custom_commands/management/commands/delete_user.py b/tools/ops/script_runner/lib/custom_commands/management/commands/delete_user.py index d3f961b453b..3c692d592d5 100644 --- a/tools/ops/script_runner/lib/custom_commands/management/commands/delete_user.py +++ b/tools/ops/script_runner/lib/custom_commands/management/commands/delete_user.py @@ -28,10 +28,7 @@ import desktop.conf -if sys.version_info[0] > 2: - from django.utils.translation import gettext_lazy as _t, gettext as _ -else: - from django.utils.translation import ugettext_lazy as _t, ugettext as _ +from django.utils.translation import gettext_lazy as _t, gettext as _ LOG = logging.getLogger(__name__) diff --git a/tools/ops/script_runner/lib/custom_commands/management/commands/estimate_concurrent_users.py b/tools/ops/script_runner/lib/custom_commands/management/commands/estimate_concurrent_users.py index 0e428a494d6..8e500edf4c0 100644 --- a/tools/ops/script_runner/lib/custom_commands/management/commands/estimate_concurrent_users.py +++ b/tools/ops/script_runner/lib/custom_commands/management/commands/estimate_concurrent_users.py @@ -35,10 +35,7 @@ from hadoop import conf as hdfs_conf from hadoop import cluster -if sys.version_info[0] > 2: - from django.utils.translation import gettext_lazy as _t, gettext as _ -else: - from django.utils.translation import ugettext_lazy as _t, ugettext as _ +from django.utils.translation import gettext_lazy as _t, gettext as _ DEFAULT_LOG_DIR = 'logs' log_dir = os.getenv("DESKTOP_LOG_DIR", DEFAULT_LOG_DIR) diff --git a/tools/ops/script_runner/lib/custom_commands/management/commands/hue_desktop_document_cleanup.py b/tools/ops/script_runner/lib/custom_commands/management/commands/hue_desktop_document_cleanup.py index 8bc5b285eb2..027d32cceed 100644 --- a/tools/ops/script_runner/lib/custom_commands/management/commands/hue_desktop_document_cleanup.py +++ b/tools/ops/script_runner/lib/custom_commands/management/commands/hue_desktop_document_cleanup.py @@ -36,10 +36,7 @@ import desktop.conf -if sys.version_info[0] > 2: - from django.utils.translation import gettext_lazy as _t, gettext as _ -else: - from django.utils.translation import ugettext_lazy as _t, ugettext as _ +from django.utils.translation import gettext_lazy as _t, gettext as _ logging.basicConfig() LOG = logging.getLogger(__name__) diff --git a/tools/ops/script_runner/lib/custom_commands/management/commands/list_groups.py b/tools/ops/script_runner/lib/custom_commands/management/commands/list_groups.py index 4939a66ea69..04c4a96bde9 100644 --- a/tools/ops/script_runner/lib/custom_commands/management/commands/list_groups.py +++ b/tools/ops/script_runner/lib/custom_commands/management/commands/list_groups.py @@ -28,10 +28,7 @@ import desktop.conf -if sys.version_info[0] > 2: - from django.utils.translation import gettext_lazy as _t, gettext as _ -else: - from django.utils.translation import ugettext_lazy as _t, ugettext as _ +from django.utils.translation import gettext_lazy as _t, gettext as _ LOG = logging.getLogger(__name__) diff --git a/tools/ops/script_runner/lib/custom_commands/management/commands/remove_doc2_without_content_object.py b/tools/ops/script_runner/lib/custom_commands/management/commands/remove_doc2_without_content_object.py index b6203222b85..769db2061da 100644 --- a/tools/ops/script_runner/lib/custom_commands/management/commands/remove_doc2_without_content_object.py +++ b/tools/ops/script_runner/lib/custom_commands/management/commands/remove_doc2_without_content_object.py @@ -30,10 +30,7 @@ import logging import logging.handlers -if sys.version_info[0] > 2: - from django.utils.translation import gettext_lazy as _t, gettext as _ -else: - from django.utils.translation import ugettext_lazy as _t, ugettext as _ +from django.utils.translation import gettext_lazy as _t, gettext as _ LOG = logging.getLogger(__name__) diff --git a/tools/ops/script_runner/lib/custom_commands/management/commands/remove_orphaned_docs.py b/tools/ops/script_runner/lib/custom_commands/management/commands/remove_orphaned_docs.py index 3c89d205e04..f62f67a38ae 100644 --- a/tools/ops/script_runner/lib/custom_commands/management/commands/remove_orphaned_docs.py +++ b/tools/ops/script_runner/lib/custom_commands/management/commands/remove_orphaned_docs.py @@ -40,10 +40,7 @@ import desktop.conf -if sys.version_info[0] > 2: - from django.utils.translation import gettext_lazy as _t, gettext as _ -else: - from django.utils.translation import ugettext_lazy as _t, ugettext as _ +from django.utils.translation import gettext_lazy as _t, gettext as _ LOG = logging.getLogger(__name__) diff --git a/tools/ops/script_runner/lib/custom_commands/management/commands/rename_duplicate_users.py b/tools/ops/script_runner/lib/custom_commands/management/commands/rename_duplicate_users.py index cecf51659de..4aad0a8bd1d 100644 --- a/tools/ops/script_runner/lib/custom_commands/management/commands/rename_duplicate_users.py +++ b/tools/ops/script_runner/lib/custom_commands/management/commands/rename_duplicate_users.py @@ -28,10 +28,7 @@ import desktop.conf -if sys.version_info[0] > 2: - from django.utils.translation import gettext_lazy as _t, gettext as _ -else: - from django.utils.translation import ugettext_lazy as _t, ugettext as _ +from django.utils.translation import gettext_lazy as _t, gettext as _ LOG = logging.getLogger(__name__) diff --git a/tools/ops/script_runner/lib/custom_commands/management/commands/run_hive_impala_query.py b/tools/ops/script_runner/lib/custom_commands/management/commands/run_hive_impala_query.py index ed9f4a7d237..e33045f78df 100644 --- a/tools/ops/script_runner/lib/custom_commands/management/commands/run_hive_impala_query.py +++ b/tools/ops/script_runner/lib/custom_commands/management/commands/run_hive_impala_query.py @@ -27,10 +27,7 @@ import desktop.conf -if sys.version_info[0] > 2: - from django.utils.translation import gettext_lazy as _t, gettext as _ -else: - from django.utils.translation import ugettext_lazy as _t, ugettext as _ +from django.utils.translation import gettext_lazy as _t, gettext as _ logging.basicConfig() LOG = logging.getLogger(__name__) diff --git a/tools/ops/script_runner/lib/custom_commands/management/commands/share_all_workflows.py b/tools/ops/script_runner/lib/custom_commands/management/commands/share_all_workflows.py index f7a4eaab0ca..d8d2954a3b9 100644 --- a/tools/ops/script_runner/lib/custom_commands/management/commands/share_all_workflows.py +++ b/tools/ops/script_runner/lib/custom_commands/management/commands/share_all_workflows.py @@ -27,10 +27,7 @@ import logging import logging.handlers -if sys.version_info[0] > 2: - from django.utils.translation import gettext_lazy as _t, gettext as _ -else: - from django.utils.translation import ugettext_lazy as _t, ugettext as _ +from django.utils.translation import gettext_lazy as _t, gettext as _ LOG = logging.getLogger(__name__)
${smart_unicode(file_name) or ""}${smart_unicode(line_number) or ""}${smart_unicode(function_name) or ""}${smart_str(file_name) or ""}${smart_str(line_number) or ""}${smart_str(function_name) or ""}
${smart_unicode(file_name) or ""}${smart_unicode(line_number) or ""}${smart_unicode(function_name) or ""}${smart_str(file_name) or ""}${smart_str(line_number) or ""}${smart_str(function_name) or ""}