Skip to content

Commit

Permalink
[core][cleanup] Refactor and remove old Py2 code checks and methods (#…
Browse files Browse the repository at this point in the history
…3683)

## What changes were proposed in this pull request?

- Remove all Py2 check conditionals.
- Remove some not-used imports.
- Streamline file opening code section and remove py2 support.
- Remove `smart_unicode` method and change its usage with `smart_str` wherever required. This also triggered adding an encoding param in the `smart_str` method for compatibility with `smart_unicode` switch.


- Because of all the files modified with above actions, we are also fixing Ruff violations on these files **which made this change big.**
- **The idea is to merge this as a single commit and have as low regression as possible and if something major comes up, we can try simply reverting to previous state. For minor regressions, we can try fixing on top of this change.**

## How was this patch tested?

- Manually and basic Hue testing.
- Running existing full unit test suite.
- E2E cluster setup
  • Loading branch information
Harshg999 authored Oct 21, 2024
1 parent 2e10b71 commit ccecafa
Show file tree
Hide file tree
Showing 452 changed files with 8,174 additions and 9,402 deletions.
1 change: 0 additions & 1 deletion apps/about/src/about/templates/admin_wizard.mako
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ from metadata.conf import OPTIMIZER, has_optimizer
from desktop.auth.backend import is_admin
from desktop.conf import has_connectors
from desktop.lib.i18n import smart_unicode
from desktop.views import commonheader, commonfooter
if sys.version_info[0] > 2:
Expand Down
7 changes: 1 addition & 6 deletions apps/about/src/about/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import sys
from django.urls import re_path

from about import views as about_views

if sys.version_info[0] > 2:
from django.urls import re_path
else:
from django.conf.urls import url as re_path

urlpatterns = [
re_path(r'^$', about_views.admin_wizard, name='index'),
re_path(r'^admin_wizard$', about_views.admin_wizard, name='admin_wizard'),
Expand Down
9 changes: 1 addition & 8 deletions apps/about/src/about/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from builtins import str
import logging
import sys
from django.utils.translation import gettext as _

from desktop import appmanager
from desktop.auth.backend import is_hue_admin
Expand All @@ -26,11 +24,6 @@
from desktop.models import Settings, hue_version
from desktop.views import collect_usage

if sys.version_info[0] > 2:
from django.utils.translation import gettext as _
else:
from django.utils.translation import ugettext as _


def admin_wizard(request):
if is_hue_admin(request.user):
Expand Down
1 change: 0 additions & 1 deletion apps/beeswax/src/beeswax/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
# limitations under the License.

import re
import sys
import json
import logging
from builtins import zip
Expand Down
29 changes: 8 additions & 21 deletions apps/beeswax/src/beeswax/api_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,27 +16,19 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import json
import logging
import pytest
import sys
import logging
from unittest.mock import Mock, patch

import pytest
from django.test import TestCase
from requests.exceptions import ReadTimeout

from beeswax.api import _autocomplete, get_functions
from desktop.lib.django_test_util import make_logged_in_client
from desktop.lib.test_utils import add_to_group, grant_access
from useradmin.models import User

from beeswax.api import _autocomplete, get_functions


if sys.version_info[0] > 2:
from unittest.mock import patch, Mock
else:
from mock import patch, Mock


LOG = logging.getLogger()


Expand All @@ -47,9 +39,8 @@ def setup_method(self):
self.client = make_logged_in_client(username="test", groupname="default", recreate=True, is_superuser=False)
self.user = User.objects.get(username="test")


def test_autocomplete_time_out(self):
get_tables_meta=Mock(
get_tables_meta = Mock(
side_effect=ReadTimeout("HTTPSConnectionPool(host='gethue.com', port=10001): Read timed out. (read timeout=120)")
)
db = Mock(
Expand All @@ -65,7 +56,6 @@ def test_autocomplete_time_out(self):
'error': "HTTPSConnectionPool(host='gethue.com', port=10001): Read timed out. (read timeout=120)"
})


def test_get_functions(self):
db = Mock(
get_functions=Mock(
Expand All @@ -83,7 +73,6 @@ def test_get_functions(self):
resp ==
[{'name': 'f1'}, {'name': 'f2'}])


def test_get_functions(self):
with patch('beeswax.api._get_functions') as _get_functions:
db = Mock()
Expand All @@ -97,12 +86,11 @@ def test_get_functions(self):
resp['functions'] ==
[{'name': 'f1'}, {'name': 'f2'}, {'name': 'f3'}])


def test_get_function(self):
db = Mock()
db.client = Mock(query_server = {'dialect': 'hive'})
db.client = Mock(query_server={'dialect': 'hive'})
db.get_function = Mock(
return_value = [
return_value=[
['floor_month(param) - Returns the timestamp at a month granularity'],
['param needs to be a timestamp value'],
['Example:'],
Expand All @@ -123,8 +111,7 @@ def test_get_function(self):
'> SELECT floor_month(CAST(\'yyyy-MM-dd HH:mm:ss\' AS TIMESTAMP)) FROM src;\nyyyy-MM-01 00:00:00'
})


db.client = Mock(query_server = {'dialect': 'impala'})
db.client = Mock(query_server={'dialect': 'impala'})
data = _autocomplete(db, operation='function')

assert data['function'] == {}
4 changes: 0 additions & 4 deletions apps/beeswax/src/beeswax/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import division

import sys
import math
import logging
import os.path
from builtins import str

from django.utils.translation import gettext as _, gettext_lazy as _t

Expand Down
87 changes: 42 additions & 45 deletions apps/beeswax/src/beeswax/create_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,54 +15,47 @@
# See the License for the specific language governing permissions and
# limitations under the License.


from __future__ import division
from builtins import str
from builtins import range
from builtins import object
import re
import csv
import gzip
import json
import logging
import math
import re
import sys
import logging

from django.urls import reverse
from django.http import QueryDict
from django.urls import reverse
from django.utils.translation import gettext as _

from aws.s3.s3fs import S3FileSystemException
from beeswax.common import TERMINATORS
from beeswax.design import hql_query
from beeswax.forms import (
TERMINATOR_CHOICES,
ColumnTypeFormSet,
CreateByImportDelimForm,
CreateByImportFileForm,
CreateTableForm,
PartitionTypeFormSet,
)
from beeswax.server import dbms
from beeswax.server.dbms import QueryServerException
from beeswax.views import execute_directly
from desktop.context_processors import get_app_name
from desktop.lib import django_mako, i18n
from desktop.lib.django_forms import MultiForm
from desktop.lib.django_util import render
from desktop.lib.exceptions_renderable import PopupException
from desktop.lib.django_forms import MultiForm
from desktop.models import _get_apps
from hadoop.fs import hadoopfs

from beeswax.common import TERMINATORS
from beeswax.design import hql_query
from beeswax.forms import CreateTableForm, ColumnTypeFormSet,\
PartitionTypeFormSet, CreateByImportFileForm, CreateByImportDelimForm,\
TERMINATOR_CHOICES
from beeswax.server import dbms
from beeswax.server.dbms import QueryServerException
from beeswax.views import execute_directly

if sys.version_info[0] > 2:
from django.utils.translation import gettext as _
else:
from django.utils.translation import ugettext as _


LOG = logging.getLogger()


def create_table(request, database='default'):
"""Create a table by specifying its attributes manually"""
db = dbms.get(request.user)
dbs = db.get_databases()
databases = [{'name':db, 'url':reverse('beeswax:create_table', kwargs={'database': db})} for db in dbs]
databases = [{'name': db, 'url': reverse('beeswax:create_table', kwargs={'database': db})} for db in dbs]

form = MultiForm(
table=CreateTableForm,
Expand All @@ -77,8 +70,8 @@ def create_table(request, database='default'):

if request.POST.get('create'):
if form.is_valid():
columns = [ f.cleaned_data for f in form.columns.forms ]
partition_columns = [ f.cleaned_data for f in form.partitions.forms ]
columns = [f.cleaned_data for f in form.columns.forms]
partition_columns = [f.cleaned_data for f in form.partitions.forms]
proposed_query = django_mako.render_to_string("create_table_statement.mako", {
'databases': databases,
'database': database,
Expand Down Expand Up @@ -109,15 +102,16 @@ def create_table(request, database='default'):

IMPORT_PEEK_SIZE = 5 * 1024**2
IMPORT_PEEK_NLINES = 10
DELIMITERS = [ hive_val for hive_val, desc, ascii in TERMINATORS ]
DELIMITER_READABLE = {'\\001' : _('ctrl-As'),
'\\002' : _('ctrl-Bs'),
'\\003' : _('ctrl-Cs'),
'\\t' : _('tabs'),
',' : _('commas'),
' ' : _('spaces')}
DELIMITERS = [hive_val for hive_val, desc, ascii in TERMINATORS]
DELIMITER_READABLE = {'\\001': _('ctrl-As'),
'\\002': _('ctrl-Bs'),
'\\003': _('ctrl-Cs'),
'\\t': _('tabs'),
',': _('commas'),
' ': _('spaces')}
FILE_READERS = []


def import_wizard(request, database='default'):
"""
Help users define table and based on a file they want to import to Hive.
Expand All @@ -133,7 +127,7 @@ def import_wizard(request, database='default'):

db = dbms.get(request.user)
dbs = db.get_databases()
databases = [{'name':db, 'url':reverse('beeswax:import_wizard', kwargs={'database': db})} for db in dbs]
databases = [{'name': db, 'url': reverse('beeswax:import_wizard', kwargs={'database': db})} for db in dbs]

if request.method == 'POST':
#
Expand Down Expand Up @@ -164,7 +158,7 @@ def import_wizard(request, database='default'):
cancel_s3_column_def = request.POST.get('cancel_create') # Step 3 -> 2

# Exactly one of these should be True
if len([_f for _f in (do_s2_auto_delim, do_s2_user_delim, do_s3_column_def, do_hive_create, cancel_s2_user_delim, cancel_s3_column_def) if _f]) != 1:
if len([_f for _f in (do_s2_auto_delim, do_s2_user_delim, do_s3_column_def, do_hive_create, cancel_s2_user_delim, cancel_s3_column_def) if _f]) != 1: # noqa: E501
raise PopupException(_('Invalid form submission'))

if not do_s2_auto_delim:
Expand Down Expand Up @@ -198,7 +192,8 @@ def import_wizard(request, database='default'):
raise PopupException(_('Path location "%s" is invalid: %s') % (path, e))

delim_is_auto = True
fields_list, n_cols, s2_delim_form = _delim_preview(request.fs, s1_file_form, encoding, [reader.TYPE for reader in FILE_READERS], DELIMITERS)
fields_list, n_cols, s2_delim_form = _delim_preview(
request.fs, s1_file_form, encoding, [reader.TYPE for reader in FILE_READERS], DELIMITERS)

if (do_s2_user_delim or do_s3_column_def or cancel_s3_column_def) and s2_delim_form.is_valid():
# Delimit based on input
Expand Down Expand Up @@ -236,7 +231,7 @@ def import_wizard(request, database='default'):
try:
fields_list_for_json = list(fields_list)
if fields_list_for_json:
fields_list_for_json[0] = [re.sub('[^\w]', '', a) for a in fields_list_for_json[0]] # Cleaning headers
fields_list_for_json[0] = [re.sub(r'[^\w]', '', a) for a in fields_list_for_json[0]] # Cleaning headers
apps_list = _get_apps(request.user, '')
return render('import_wizard_define_columns.mako', request, {
'apps': apps_list,
Expand All @@ -251,7 +246,8 @@ def import_wizard(request, database='default'):
'databases': databases
})
except Exception as e:
raise PopupException(_("The selected delimiter is creating an un-even number of columns. Please make sure you don't have empty columns."), detail=e)
raise PopupException(_(
"The selected delimiter is creating an un-even number of columns. Please make sure you don't have empty columns."), detail=e)

#
# Final: Execute
Expand All @@ -271,7 +267,7 @@ def import_wizard(request, database='default'):
'path': path,
'skip_header': request.GET.get('removeHeader', 'off').lower() == 'on'
},
'columns': [ f.cleaned_data for f in s3_col_formset.forms ],
'columns': [f.cleaned_data for f in s3_col_formset.forms],
'partition_columns': [],
'database': database,
'databases': databases
Expand Down Expand Up @@ -337,7 +333,7 @@ def _delim_preview(fs, file_form, encoding, file_types, delimiters):
LOG.exception(msg)
raise PopupException(msg)

n_cols = max([ len(row) for row in fields_list ])
n_cols = max([len(row) for row in fields_list])
# ``delimiter`` is a MultiValueField. delimiter_0 and delimiter_1 are the sub-fields.
delimiter_0 = delim
delimiter_1 = ''
Expand Down Expand Up @@ -409,13 +405,12 @@ def score_delim(fields_list):
avg_n_fields = math.floor(sum(len_list) / n_lines)
sq_of_exp = avg_n_fields * avg_n_fields

len_list_sq = [l * l for l in len_list]
len_list_sq = [len * len for len in len_list]
exp_of_sq = math.floor(sum(len_list_sq) / n_lines)
var = exp_of_sq - sq_of_exp
# Favour more fields
return (1000.0 / (var + 1)) + avg_n_fields


max_score = -1
res = (None, None)

Expand All @@ -424,7 +419,7 @@ def score_delim(fields_list):
delimiter = delim.decode('string_escape')
try:
fields_list = _get_rows(lines, delimiter)
except:
except Exception:
LOG.exception('failed to get rows')
fields_list = [line.split(delimiter) for line in lines if line]

Expand Down Expand Up @@ -472,6 +467,7 @@ def readlines(fileobj, encoding):
except UnicodeError:
return None


FILE_READERS.append(GzipFileReader)


Expand All @@ -488,6 +484,7 @@ def readlines(fileobj, encoding):
except UnicodeError:
return None


FILE_READERS.append(TextFileReader)


Expand Down
Loading

0 comments on commit ccecafa

Please sign in to comment.