Unverified Commit 5f7224a8 authored by Tao Feng's avatar Tao Feng Committed by GitHub

Support indexing hive view (#244)

* Support indexing hive view

* clean code
parent 125e2ca1
......@@ -29,7 +29,8 @@ class HiveTableMetadataExtractor(Extractor):
SELECT source.* FROM
(SELECT t.TBL_ID, d.NAME as `schema`, t.TBL_NAME name, t.TBL_TYPE, tp.PARAM_VALUE as description,
p.PKEY_NAME as col_name, p.INTEGER_IDX as col_sort_order,
p.PKEY_TYPE as col_type, p.PKEY_COMMENT as col_description, 1 as "is_partition_col"
p.PKEY_TYPE as col_type, p.PKEY_COMMENT as col_description, 1 as "is_partition_col",
IF(t.TBL_TYPE = 'VIRTUAL_VIEW', 1, 0) "is_view"
FROM TBLS t
JOIN DBS d ON t.DB_ID = d.DB_ID
JOIN PARTITION_KEYS p ON t.TBL_ID = p.TBL_ID
......@@ -38,7 +39,8 @@ class HiveTableMetadataExtractor(Extractor):
UNION
SELECT t.TBL_ID, d.NAME as `schema`, t.TBL_NAME name, t.TBL_TYPE, tp.PARAM_VALUE as description,
c.COLUMN_NAME as col_name, c.INTEGER_IDX as col_sort_order,
c.TYPE_NAME as col_type, c.COMMENT as col_description, 0 as "is_partition_col"
c.TYPE_NAME as col_type, c.COMMENT as col_description, 0 as "is_partition_col",
IF(t.TBL_TYPE = 'VIRTUAL_VIEW', 1, 0) "is_view"
FROM TBLS t
JOIN DBS d ON t.DB_ID = d.DB_ID
JOIN SDS s ON t.SD_ID = s.SD_ID
......@@ -99,12 +101,13 @@ class HiveTableMetadataExtractor(Extractor):
last_row = row
columns.append(ColumnMetadata(row['col_name'], row['col_description'],
row['col_type'], row['col_sort_order']))
is_view = last_row['is_view'] == 1
yield TableMetadata('hive', self._cluster,
last_row['schema'],
last_row['name'],
last_row['description'],
columns)
columns,
is_view=is_view)
def _get_raw_extract_iter(self):
# type: () -> Iterator[Dict[str, Any]]
......
......@@ -2,7 +2,7 @@ import os
from setuptools import setup, find_packages
__version__ = '2.5.4'
__version__ = '2.5.5'
requirements_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'requirements.txt')
with open(requirements_path) as requirements_file:
......
......@@ -42,7 +42,8 @@ class TestHiveTableMetadataExtractor(unittest.TestCase):
connection.execute = sql_execute
table = {'schema': 'test_schema',
'name': 'test_table',
'description': 'a table for testing'}
'description': 'a table for testing',
'is_view': 0}
sql_execute.return_value = [
self._union(
......@@ -86,7 +87,8 @@ class TestHiveTableMetadataExtractor(unittest.TestCase):
ColumnMetadata('is_active', None, 'boolean', 2),
ColumnMetadata('source', 'description of source', 'varchar', 3),
ColumnMetadata('etl_created_at', 'description of etl_created_at', 'timestamp', 4),
ColumnMetadata('ds', None, 'varchar', 5)])
ColumnMetadata('ds', None, 'varchar', 5)],
is_view=False)
self.assertEqual(expected.__repr__(), actual.__repr__())
self.assertIsNone(extractor.extract())
......@@ -99,15 +101,18 @@ class TestHiveTableMetadataExtractor(unittest.TestCase):
connection.execute = sql_execute
table = {'schema': 'test_schema1',
'name': 'test_table1',
'description': 'test table 1'}
'description': 'test table 1',
'is_view': 0}
table1 = {'schema': 'test_schema1',
'name': 'test_table2',
'description': 'test table 2'}
'description': 'test table 2',
'is_view': 0}
table2 = {'schema': 'test_schema2',
'name': 'test_table3',
'description': 'test table 3'}
'description': 'test table 3',
'is_view': 0}
sql_execute.return_value = [
self._union(
......@@ -171,18 +176,21 @@ class TestHiveTableMetadataExtractor(unittest.TestCase):
ColumnMetadata('is_active', None, 'boolean', 2),
ColumnMetadata('source', 'description of source', 'varchar', 3),
ColumnMetadata('etl_created_at', 'description of etl_created_at', 'timestamp', 4),
ColumnMetadata('ds', None, 'varchar', 5)])
ColumnMetadata('ds', None, 'varchar', 5)],
is_view=False)
self.assertEqual(expected.__repr__(), extractor.extract().__repr__())
expected = TableMetadata('hive', 'gold', 'test_schema1', 'test_table2', 'test table 2',
[ColumnMetadata('col_name', 'description of col_name', 'varchar', 0),
ColumnMetadata('col_name2', 'description of col_name2', 'varchar', 1)])
ColumnMetadata('col_name2', 'description of col_name2', 'varchar', 1)],
is_view=False)
self.assertEqual(expected.__repr__(), extractor.extract().__repr__())
expected = TableMetadata('hive', 'gold', 'test_schema2', 'test_table3', 'test table 3',
[ColumnMetadata('col_id3', 'description of col_id3', 'varchar', 0),
ColumnMetadata('col_name3', 'description of col_name3',
'varchar', 1)])
'varchar', 1)],
is_view=False)
self.assertEqual(expected.__repr__(), extractor.extract().__repr__())
self.assertIsNone(extractor.extract())
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment