Skip to content

Commit

Permalink
[DAE-121] Adding custom get_partition_values method (#50)
Browse files Browse the repository at this point in the history
  • Loading branch information
Felipe Miquelim authored Feb 24, 2021
1 parent 0d52e52 commit 36c945e
Show file tree
Hide file tree
Showing 4 changed files with 92 additions and 1 deletion.
5 changes: 4 additions & 1 deletion docs/source/getstarted.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ Click on the following links to open the [examples](https://github.com/quintoand

**[#8 Bulk drop partitions values from a table](https://github.com/quintoandar/hive-metastore-client/blob/main/examples/bulk_drop_partitions.py)**

**[#9 Get partition values from a table](https://github.com/quintoandar/hive-metastore-client/blob/main/examples/get_partition_values_from_table.py)**

## Available methods

You can see all the Hive Metastore server available methods by looking at the
Expand All @@ -50,4 +52,5 @@ the [`HiveMetastoreClient`](https://github.com/quintoandar/hive-metastore-client
- [`create_external_table`](https://hive-metastore-client.readthedocs.io/en/latest/hive_metastore_client.html#hive_metastore_client.hive_metastore_client.HiveMetastoreClient.create_external_table)
- [`get_partition_keys_objects`](https://hive-metastore-client.readthedocs.io/en/latest/hive_metastore_client.html#hive_metastore_client.hive_metastore_client.HiveMetastoreClient.get_partition_keys_objects)
- [`get_partition_keys_names`](https://hive-metastore-client.readthedocs.io/en/latest/hive_metastore_client.html#hive_metastore_client.hive_metastore_client.HiveMetastoreClient.get_partition_keys_names)
- [`bulk_drop_partitions`](https://hive-metastore-client.readthedocs.io/en/latest/hive_metastore_client.html#hive_metastore_client.hive_metastore_client.HiveMetastoreClient.bulk_drop_partitions)
- [`bulk_drop_partitions`](https://hive-metastore-client.readthedocs.io/en/latest/hive_metastore_client.html#hive_metastore_client.hive_metastore_client.HiveMetastoreClient.bulk_drop_partitions)
- [`get_partition_values_from_table`](https://hive-metastore-client.readthedocs.io/en/latest/hive_metastore_client.html#hive_metastore_client.hive_metastore_client.HiveMetastoreClient.get_partition_values_from_table)
13 changes: 13 additions & 0 deletions examples/get_partition_values_from_table.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from hive_metastore_client import HiveMetastoreClient

HIVE_HOST = "<ADD_HIVE_HOST_HERE>"
HIVE_PORT = 9083

DATABASE_NAME = "database_name"
TABLE_NAME = "table_name"

with HiveMetastoreClient(HIVE_HOST, HIVE_PORT) as hive_client:
# Getting partition values as a list from specified table
return_value = hive_client.get_partition_values_from_table(
DATABASE_NAME, TABLE_NAME
)
29 changes: 29 additions & 0 deletions hive_metastore_client/hive_metastore_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
Database,
AlreadyExistsException,
Table,
PartitionValuesRequest,
NoSuchObjectException,
)

Expand Down Expand Up @@ -296,3 +297,31 @@ def bulk_drop_partitions(
f"{partitions_not_dropped}, msg=Some partition values were not "
"dropped because they do not exist."
)

def get_partition_values_from_table(
self, db_name: str, table_name: str
) -> List[List[str]]:
"""
Gets the partition names from a table.
It automatically fetches the table's partition keys.
An empty list will be returned when no table is found or
when the table has no partitions.
:param db_name: database name where the table is at
:param table_name: table name which the partitions belong to
"""
partition_values_response = self.get_partition_values(
PartitionValuesRequest(
dbName=db_name,
tblName=table_name,
partitionKeys=self.get_partition_keys_objects(
db_name=db_name, table_name=table_name
),
)
)

return [
partition.row for partition in partition_values_response.partitionValues
]
46 changes: 46 additions & 0 deletions tests/unit/hive_metastore_client/test_hive_metastore_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
FieldSchema,
NoSuchObjectException,
AlreadyExistsException,
PartitionValuesRequest,
)


Expand Down Expand Up @@ -472,3 +473,48 @@ def test_bulk_drop_partitions_with_errors(
)

assert mock_drop_partition.call_count == len(partition_list)

@mock.patch.object(HiveMetastoreClient, "get_partition_values", return_value=[])
@mock.patch.object(
HiveMetastoreClient, "get_partition_keys_objects", return_value=[]
)
def test_get_partition_values_from_table_with_partitioned_table(
self,
mocked_get_partition_keys_objects,
mocked_get_partition_values,
hive_metastore_client,
):
# arrange
table_name = "table_name"
database_name = "database_name"
mocked_partition_values_response = Mock()
mocked_partition_values = []

mocked_partition_values_partition_a = Mock()
mocked_partition_values_partition_a.row = ["partition_a"]
mocked_partition_values.append(mocked_partition_values_partition_a)

mocked_partition_values_partition_b = Mock()
mocked_partition_values_partition_b.row = ["partition_b"]
mocked_partition_values.append(mocked_partition_values_partition_b)

mocked_partition_values_response.partitionValues = mocked_partition_values
mocked_get_partition_values.return_value = mocked_partition_values_response
expected_partition_values_request = PartitionValuesRequest(
dbName=database_name, tblName=table_name, partitionKeys=[],
)
expected_return = [["partition_a"], ["partition_b"]]

# act
returned_value = hive_metastore_client.get_partition_values_from_table(
database_name, table_name
)

# assert
assert returned_value == expected_return
mocked_get_partition_keys_objects.assert_called_once_with(
db_name=database_name, table_name=table_name
)
mocked_get_partition_values.assert_called_once_with(
expected_partition_values_request
)

0 comments on commit 36c945e

Please sign in to comment.