Skip to content

Commit

Permalink
IMPALA-11973: Add absolute_import, division to all eligible Python files
Browse files Browse the repository at this point in the history
This takes steps to make Python 2 behave like Python 3 as
a way to flush out issues with running on Python 3. Specifically,
it handles two main differences:
 1. Python 3 requires absolute imports within packages. This
    can be emulated via "from __future__ import absolute_import"
 2. Python 3 changed division to "true" division that doesn't
    round to an integer. This can be emulated via
    "from __future__ import division"

This changes all Python files to add imports for absolute_import
and division. For completeness, this also includes print_function in the
import.

I scrutinized each old-division location and converted some locations
to use the integer division '//' operator if it needed an integer
result (e.g. for indices, counts of records, etc). Some code was also using
relative imports and needed to be adjusted to handle absolute_import.
This fixes all Pylint warnings about no-absolute-import and old-division,
and these warnings are now banned.

Testing:
 - Ran core tests

Change-Id: Idb0fcbd11f3e8791f5951c4944be44fb580e576b
Reviewed-on: http://gerrit.cloudera.org:8080/19588
Reviewed-by: Joe McDonnell <[email protected]>
Tested-by: Joe McDonnell <[email protected]>
  • Loading branch information
joemcdonnell committed Mar 9, 2023
1 parent 566df80 commit 82bd087
Show file tree
Hide file tree
Showing 354 changed files with 409 additions and 114 deletions.
2 changes: 2 additions & 0 deletions bin/banned_py3k_warnings.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
no-absolute-import
old-division
1 change: 1 addition & 0 deletions bin/dump_breakpad_symbols.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
# $IMPALA_TOOLCHAIN_PACKAGES_HOME/breakpad-*/bin/minidump_stackwalk \
# /tmp/impala-minidumps/impalad/03c0ee26-bfd1-cf3e-43fa49ca-1a6aae25.dmp /tmp/syms

from __future__ import absolute_import, division, print_function
import errno
import logging
import glob
Expand Down
1 change: 1 addition & 0 deletions bin/generate_minidump_collection_testdata.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
# create the files in the interval [now - duration, now]. Minidumps are simulated by
# making the files easily compressible by having some repeated data.

from __future__ import absolute_import, division, print_function
import errno
import os
import random
Expand Down
2 changes: 1 addition & 1 deletion bin/get_code_size.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

# This tool walks the build directory (release by default) and will print the text, data,
# and bss section sizes of the archives.
from __future__ import print_function
from __future__ import absolute_import, division, print_function
import fnmatch
import os
import re
Expand Down
2 changes: 1 addition & 1 deletion bin/inline_pom.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
#
# Usage: inline_pom.py <pom.xml>...

from __future__ import print_function
from __future__ import absolute_import, division, print_function
import re
import sys
from tempfile import mkstemp
Expand Down
2 changes: 1 addition & 1 deletion bin/load-data.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
# This script is used to load the proper datasets for the specified workloads. It loads
# all data via Hive except for parquet data which needs to be loaded via Impala.
# Most ddl commands are executed by Impala.
from __future__ import print_function
from __future__ import absolute_import, division, print_function
import collections
import getpass
import logging
Expand Down
2 changes: 1 addition & 1 deletion bin/parse-thrift-profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
# 2018-04-13T15:06:34.144000 e44af7f93edb8cd6:1b1f801600000000 TRuntimeProfileTree(nodes=[TRuntimeProf...


from __future__ import print_function
from __future__ import absolute_import, division, print_function
from impala_py_lib import profiles
import sys

Expand Down
2 changes: 1 addition & 1 deletion bin/run-workload.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
# - Stores the execution details in JSON format.
#

from __future__ import print_function
from __future__ import absolute_import, division, print_function
import getpass
import json
import logging
Expand Down
2 changes: 1 addition & 1 deletion bin/single_node_perf_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@
# --start_minicluster start a new Hadoop minicluster
# --ninja use ninja, rather than Make, as the build tool

from __future__ import print_function
from __future__ import absolute_import, division, print_function
from optparse import OptionParser
from tempfile import mkdtemp

Expand Down
3 changes: 2 additions & 1 deletion bin/start-impala-cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
# Starts up an Impala cluster (ImpalaD + State Store) with the specified number of
# ImpalaD instances. Each ImpalaD runs on a different port allowing this to be run
# on a single machine.
from __future__ import absolute_import, division, print_function
import getpass
import itertools
import json
Expand Down Expand Up @@ -473,7 +474,7 @@ def compute_impalad_mem_limit(cluster_size):
# memory choice here to max out at 12GB. This should be sufficient for tests.
#
# Beware that ASAN builds use more memory than regular builds.
physical_mem_gb = psutil.virtual_memory().total / 1024 / 1024 / 1024
physical_mem_gb = psutil.virtual_memory().total // 1024 // 1024 // 1024
available_mem = int(os.getenv("IMPALA_CLUSTER_MAX_MEM_GB", str(physical_mem_gb)))
mem_limit = int(0.7 * available_mem * 1024 * 1024 * 1024 / cluster_size)
return min(12 * 1024 * 1024 * 1024, mem_limit)
Expand Down
1 change: 1 addition & 0 deletions docker/setup_build_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
# Most artifacts are symlinked so need to be dereferenced (e.g. with tar -h) before
# being used as a build context.

from __future__ import absolute_import, division, print_function
import argparse
import glob
import os
Expand Down
2 changes: 1 addition & 1 deletion infra/python/bootstrap_virtualenv.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
# This module can be run with python >= 2.7. It makes no guarantees about usage on
# python < 2.7.

from __future__ import print_function
from __future__ import absolute_import, division, print_function
import glob
import logging
import optparse
Expand Down
2 changes: 1 addition & 1 deletion lib/python/impala_py_lib/gdb/impala-gdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
# A collection of useful Python GDB modules and commands for
# debugging Impala core dumps.
#
from __future__ import print_function
from __future__ import absolute_import, division, print_function
import gdb
from collections import defaultdict

Expand Down
1 change: 1 addition & 0 deletions lib/python/impala_py_lib/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
# specific language governing permissions and limitations
# under the License.

from __future__ import absolute_import, division, print_function
import fnmatch
import logging
import os
Expand Down
2 changes: 1 addition & 1 deletion lib/python/impala_py_lib/profiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

# This file contains library functions to decode and access Impala query profiles.

from __future__ import print_function
from __future__ import absolute_import, division, print_function
import base64
import datetime
import zlib
Expand Down
3 changes: 2 additions & 1 deletion testdata/bin/check-hbase-nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
"""Given a series of hosts and Zookeeper nodes, make sure that each node is accessible.
"""

from __future__ import absolute_import, division, print_function
import argparse
import hdfs
import logging
Expand Down Expand Up @@ -191,4 +192,4 @@ def is_hdfs_running(host, admin_user):
LOGGER.error(msg)
sys.exit(errors)
else:
sys.exit(1)
sys.exit(1)
2 changes: 1 addition & 1 deletion testdata/bin/generate-schema-statements.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@
# This should be used sparingly, because these commands are executed
# serially.
#
from __future__ import print_function
from __future__ import absolute_import, division, print_function
import collections
import csv
import glob
Expand Down
2 changes: 1 addition & 1 deletion testdata/bin/generate-test-vectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
# The pairwise generation is done using the Python 'AllPairs' module. This module can be
# downloaded from http://pypi.python.org/pypi/AllPairs/2.0.1
#
from __future__ import print_function
from __future__ import absolute_import, division, print_function
import collections
import csv
import math
Expand Down
2 changes: 1 addition & 1 deletion testdata/bin/load-tpc-kudu.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
# Kudu tables are created in the specified 'target-db' using the existing HDFS tables
# from 'source-db'.

from __future__ import print_function
from __future__ import absolute_import, division, print_function
import logging
import os
import sqlparse
Expand Down
1 change: 1 addition & 0 deletions testdata/bin/load_nested.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
'''This script creates a nested version of TPC-H. Non-nested TPC-H must already be
loaded.
'''
from __future__ import absolute_import, division, print_function
import logging
import os

Expand Down
1 change: 1 addition & 0 deletions testdata/bin/random_avro_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
# specific language governing permissions and limitations
# under the License.

from __future__ import absolute_import, division, print_function
from random import choice, randint, random, shuffle
from os.path import join as join_path
from optparse import OptionParser
Expand Down
2 changes: 1 addition & 1 deletion testdata/bin/rewrite-iceberg-metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
# specific language governing permissions and limitations
# under the License.

from __future__ import print_function
from __future__ import absolute_import, division, print_function
import glob
import json
import os
Expand Down
2 changes: 1 addition & 1 deletion testdata/bin/wait-for-hiveserver2.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
# TODO: Consider combining this with wait-for-metastore.py. A TCLIService client
# can perhaps also talk to the metastore.

from __future__ import print_function
from __future__ import absolute_import, division, print_function
import os
import time
import getpass
Expand Down
2 changes: 1 addition & 1 deletion testdata/bin/wait-for-metastore.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
# to execute the get_database("default") Thrift RPC until the call succeeds,
# or a timeout is reached.

from __future__ import print_function
from __future__ import absolute_import, division, print_function
import os
import time
from optparse import OptionParser
Expand Down
2 changes: 1 addition & 1 deletion testdata/common/cgroups.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

# Utility code for creating cgroups for the Impala development environment.
# May be used as a library or as a command-line utility for manual testing.
from __future__ import print_function
from __future__ import absolute_import, division, print_function
import os
import sys
import errno
Expand Down
4 changes: 2 additions & 2 deletions testdata/common/text_delims_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
# command line, will generate data files in the specified directory and a
# print a SQL load statement to incorporate into dataload SQL script generation.

from __future__ import print_function
from __future__ import absolute_import, division, print_function
from shutil import rmtree
from optparse import OptionParser
from contextlib import contextmanager
Expand All @@ -35,7 +35,7 @@
parser.add_option("--file_len", dest="file_len", type="int")

def generate_testescape_files(table_location, only_newline, file_len):
data = ''.join(["1234567890" for _ in xrange(1 + file_len / 10)])
data = ''.join(["1234567890" for _ in xrange(1 + file_len // 10)])

suffix_list = ["\\", ",", "a"]
if only_newline:
Expand Down
4 changes: 2 additions & 2 deletions testdata/common/widetable.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
# generate a CSV data file and prints a SQL load statement to incorporate
# into dataload SQL script generation.

from __future__ import print_function
from __future__ import absolute_import, division, print_function
from datetime import datetime, timedelta
import itertools
import optparse
Expand Down Expand Up @@ -51,7 +51,7 @@ def get_columns(num_cols):
iter = itertools.cycle(templates)
# Produces [bool_col1, tinyint_col1, ..., bool_col2, tinyint_col2, ...]
# The final list has 'num_cols' elements.
return [iter.next() % (i / len(templates) + 1) for i in xrange(num_cols)]
return [iter.next() % (i // len(templates) + 1) for i in xrange(num_cols)]

# Data generators for different types. Each generator yields an infinite number of
# value strings suitable for writing to a CSV file.
Expand Down
1 change: 1 addition & 0 deletions tests/authorization/test_authorization.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#
# Client tests for SQL statement authorization

from __future__ import absolute_import, division, print_function
import os
import pytest
import tempfile
Expand Down
1 change: 1 addition & 0 deletions tests/authorization/test_authorized_proxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
# specific language governing permissions and limitations
# under the License.

from __future__ import absolute_import, division, print_function
import pytest
import os
import grp
Expand Down
1 change: 1 addition & 0 deletions tests/authorization/test_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#
# Client tests for SQL statement authorization

from __future__ import absolute_import, division, print_function
import pytest
import os
import tempfile
Expand Down
1 change: 1 addition & 0 deletions tests/authorization/test_ranger.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#
# Client tests for SQL statement authorization

from __future__ import absolute_import, division, print_function
import os
import grp
import json
Expand Down
3 changes: 2 additions & 1 deletion tests/beeswax/impala_beeswax.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
# client.connect()
# result = client.execute(query_string)
# where result is an object of the class ImpalaBeeswaxResult.
from __future__ import absolute_import, division, print_function
import logging
import time
import shlex
Expand Down Expand Up @@ -282,7 +283,7 @@ def __build_summary_table(self, summary, idx, is_fragment_root, indent_level,
setattr(max_stats, attr, max(getattr(max_stats, attr), val))

if len(node.exec_stats) > 0:
avg_time = agg_stats.latency_ns / len(node.exec_stats)
avg_time = agg_stats.latency_ns // len(node.exec_stats)
else:
avg_time = 0

Expand Down
1 change: 1 addition & 0 deletions tests/benchmark/plugins/clear_buffer_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
# specific language governing permissions and limitations
# under the License.

from __future__ import absolute_import, division, print_function
from tests.util.cluster_controller import ClusterController
from tests.benchmark.plugins import Plugin

Expand Down
1 change: 1 addition & 0 deletions tests/benchmark/plugins/vtune_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
# specific language governing permissions and limitations
# under the License.

from __future__ import absolute_import, division, print_function
from os import environ
from tests.util.cluster_controller import ClusterController
from tests.benchmark.plugins import Plugin
Expand Down
3 changes: 1 addition & 2 deletions tests/benchmark/report_benchmark_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,7 @@
# be an int (2). The following line changes this behavior so that float will be returned
# if necessary (2.5).

from __future__ import division
from __future__ import print_function
from __future__ import absolute_import, division, print_function
import difflib
import json
import logging
Expand Down
1 change: 1 addition & 0 deletions tests/catalog_service/test_catalog_service_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#
# Tests to validate the Catalog Service client APIs.

from __future__ import absolute_import, division, print_function
import logging
import pytest

Expand Down
1 change: 1 addition & 0 deletions tests/catalog_service/test_large_num_partitions.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
# Tests to validate the Catalog Service works properly when partitions
# need to be fetched in multiple batches.

from __future__ import absolute_import, division, print_function
from tests.common.impala_test_suite import ImpalaTestSuite
from tests.common.test_dimensions import create_single_exec_option_dimension

Expand Down
1 change: 1 addition & 0 deletions tests/common/base_test_suite.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
# under the License.

# The base class that should be used for tests.
from __future__ import absolute_import, division, print_function
import logging

from tests.common.test_vector import ImpalaTestMatrix
Expand Down
1 change: 1 addition & 0 deletions tests/common/custom_cluster_test_suite.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
# Superclass for all tests that need a custom cluster.
# TODO: Configure cluster size and other parameters.

from __future__ import absolute_import, division, print_function
import logging
import os
import os.path
Expand Down
1 change: 1 addition & 0 deletions tests/common/environ.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
# specific language governing permissions and limitations
# under the License.

from __future__ import absolute_import, division, print_function
import json
import logging
import os
Expand Down
1 change: 1 addition & 0 deletions tests/common/file_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
# and other functions used for checking for strings in files and
# directories.

from __future__ import absolute_import, division, print_function
import os
import re
import tempfile
Expand Down
1 change: 1 addition & 0 deletions tests/common/iceberg_test_suite.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
# specific language governing permissions and limitations
# under the License.

from __future__ import absolute_import, division, print_function
import datetime

from tests.common.impala_test_suite import ImpalaTestSuite
Expand Down
Loading

0 comments on commit 82bd087

Please sign in to comment.