Unverified Commit 5f7224a8 authored by Tao Feng's avatar Tao Feng Committed by GitHub

Support indexing hive view (#244)

* Support indexing hive view

* clean code
parent 125e2ca1
...@@ -29,7 +29,8 @@ class HiveTableMetadataExtractor(Extractor): ...@@ -29,7 +29,8 @@ class HiveTableMetadataExtractor(Extractor):
SELECT source.* FROM SELECT source.* FROM
(SELECT t.TBL_ID, d.NAME as `schema`, t.TBL_NAME name, t.TBL_TYPE, tp.PARAM_VALUE as description, (SELECT t.TBL_ID, d.NAME as `schema`, t.TBL_NAME name, t.TBL_TYPE, tp.PARAM_VALUE as description,
p.PKEY_NAME as col_name, p.INTEGER_IDX as col_sort_order, p.PKEY_NAME as col_name, p.INTEGER_IDX as col_sort_order,
p.PKEY_TYPE as col_type, p.PKEY_COMMENT as col_description, 1 as "is_partition_col" p.PKEY_TYPE as col_type, p.PKEY_COMMENT as col_description, 1 as "is_partition_col",
IF(t.TBL_TYPE = 'VIRTUAL_VIEW', 1, 0) "is_view"
FROM TBLS t FROM TBLS t
JOIN DBS d ON t.DB_ID = d.DB_ID JOIN DBS d ON t.DB_ID = d.DB_ID
JOIN PARTITION_KEYS p ON t.TBL_ID = p.TBL_ID JOIN PARTITION_KEYS p ON t.TBL_ID = p.TBL_ID
...@@ -38,7 +39,8 @@ class HiveTableMetadataExtractor(Extractor): ...@@ -38,7 +39,8 @@ class HiveTableMetadataExtractor(Extractor):
UNION UNION
SELECT t.TBL_ID, d.NAME as `schema`, t.TBL_NAME name, t.TBL_TYPE, tp.PARAM_VALUE as description, SELECT t.TBL_ID, d.NAME as `schema`, t.TBL_NAME name, t.TBL_TYPE, tp.PARAM_VALUE as description,
c.COLUMN_NAME as col_name, c.INTEGER_IDX as col_sort_order, c.COLUMN_NAME as col_name, c.INTEGER_IDX as col_sort_order,
c.TYPE_NAME as col_type, c.COMMENT as col_description, 0 as "is_partition_col" c.TYPE_NAME as col_type, c.COMMENT as col_description, 0 as "is_partition_col",
IF(t.TBL_TYPE = 'VIRTUAL_VIEW', 1, 0) "is_view"
FROM TBLS t FROM TBLS t
JOIN DBS d ON t.DB_ID = d.DB_ID JOIN DBS d ON t.DB_ID = d.DB_ID
JOIN SDS s ON t.SD_ID = s.SD_ID JOIN SDS s ON t.SD_ID = s.SD_ID
...@@ -99,12 +101,13 @@ class HiveTableMetadataExtractor(Extractor): ...@@ -99,12 +101,13 @@ class HiveTableMetadataExtractor(Extractor):
last_row = row last_row = row
columns.append(ColumnMetadata(row['col_name'], row['col_description'], columns.append(ColumnMetadata(row['col_name'], row['col_description'],
row['col_type'], row['col_sort_order'])) row['col_type'], row['col_sort_order']))
is_view = last_row['is_view'] == 1
yield TableMetadata('hive', self._cluster, yield TableMetadata('hive', self._cluster,
last_row['schema'], last_row['schema'],
last_row['name'], last_row['name'],
last_row['description'], last_row['description'],
columns) columns,
is_view=is_view)
def _get_raw_extract_iter(self): def _get_raw_extract_iter(self):
# type: () -> Iterator[Dict[str, Any]] # type: () -> Iterator[Dict[str, Any]]
......
...@@ -2,7 +2,7 @@ import os ...@@ -2,7 +2,7 @@ import os
from setuptools import setup, find_packages from setuptools import setup, find_packages
__version__ = '2.5.4' __version__ = '2.5.5'
requirements_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'requirements.txt') requirements_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'requirements.txt')
with open(requirements_path) as requirements_file: with open(requirements_path) as requirements_file:
......
...@@ -42,7 +42,8 @@ class TestHiveTableMetadataExtractor(unittest.TestCase): ...@@ -42,7 +42,8 @@ class TestHiveTableMetadataExtractor(unittest.TestCase):
connection.execute = sql_execute connection.execute = sql_execute
table = {'schema': 'test_schema', table = {'schema': 'test_schema',
'name': 'test_table', 'name': 'test_table',
'description': 'a table for testing'} 'description': 'a table for testing',
'is_view': 0}
sql_execute.return_value = [ sql_execute.return_value = [
self._union( self._union(
...@@ -86,7 +87,8 @@ class TestHiveTableMetadataExtractor(unittest.TestCase): ...@@ -86,7 +87,8 @@ class TestHiveTableMetadataExtractor(unittest.TestCase):
ColumnMetadata('is_active', None, 'boolean', 2), ColumnMetadata('is_active', None, 'boolean', 2),
ColumnMetadata('source', 'description of source', 'varchar', 3), ColumnMetadata('source', 'description of source', 'varchar', 3),
ColumnMetadata('etl_created_at', 'description of etl_created_at', 'timestamp', 4), ColumnMetadata('etl_created_at', 'description of etl_created_at', 'timestamp', 4),
ColumnMetadata('ds', None, 'varchar', 5)]) ColumnMetadata('ds', None, 'varchar', 5)],
is_view=False)
self.assertEqual(expected.__repr__(), actual.__repr__()) self.assertEqual(expected.__repr__(), actual.__repr__())
self.assertIsNone(extractor.extract()) self.assertIsNone(extractor.extract())
...@@ -99,15 +101,18 @@ class TestHiveTableMetadataExtractor(unittest.TestCase): ...@@ -99,15 +101,18 @@ class TestHiveTableMetadataExtractor(unittest.TestCase):
connection.execute = sql_execute connection.execute = sql_execute
table = {'schema': 'test_schema1', table = {'schema': 'test_schema1',
'name': 'test_table1', 'name': 'test_table1',
'description': 'test table 1'} 'description': 'test table 1',
'is_view': 0}
table1 = {'schema': 'test_schema1', table1 = {'schema': 'test_schema1',
'name': 'test_table2', 'name': 'test_table2',
'description': 'test table 2'} 'description': 'test table 2',
'is_view': 0}
table2 = {'schema': 'test_schema2', table2 = {'schema': 'test_schema2',
'name': 'test_table3', 'name': 'test_table3',
'description': 'test table 3'} 'description': 'test table 3',
'is_view': 0}
sql_execute.return_value = [ sql_execute.return_value = [
self._union( self._union(
...@@ -171,18 +176,21 @@ class TestHiveTableMetadataExtractor(unittest.TestCase): ...@@ -171,18 +176,21 @@ class TestHiveTableMetadataExtractor(unittest.TestCase):
ColumnMetadata('is_active', None, 'boolean', 2), ColumnMetadata('is_active', None, 'boolean', 2),
ColumnMetadata('source', 'description of source', 'varchar', 3), ColumnMetadata('source', 'description of source', 'varchar', 3),
ColumnMetadata('etl_created_at', 'description of etl_created_at', 'timestamp', 4), ColumnMetadata('etl_created_at', 'description of etl_created_at', 'timestamp', 4),
ColumnMetadata('ds', None, 'varchar', 5)]) ColumnMetadata('ds', None, 'varchar', 5)],
is_view=False)
self.assertEqual(expected.__repr__(), extractor.extract().__repr__()) self.assertEqual(expected.__repr__(), extractor.extract().__repr__())
expected = TableMetadata('hive', 'gold', 'test_schema1', 'test_table2', 'test table 2', expected = TableMetadata('hive', 'gold', 'test_schema1', 'test_table2', 'test table 2',
[ColumnMetadata('col_name', 'description of col_name', 'varchar', 0), [ColumnMetadata('col_name', 'description of col_name', 'varchar', 0),
ColumnMetadata('col_name2', 'description of col_name2', 'varchar', 1)]) ColumnMetadata('col_name2', 'description of col_name2', 'varchar', 1)],
is_view=False)
self.assertEqual(expected.__repr__(), extractor.extract().__repr__()) self.assertEqual(expected.__repr__(), extractor.extract().__repr__())
expected = TableMetadata('hive', 'gold', 'test_schema2', 'test_table3', 'test table 3', expected = TableMetadata('hive', 'gold', 'test_schema2', 'test_table3', 'test table 3',
[ColumnMetadata('col_id3', 'description of col_id3', 'varchar', 0), [ColumnMetadata('col_id3', 'description of col_id3', 'varchar', 0),
ColumnMetadata('col_name3', 'description of col_name3', ColumnMetadata('col_name3', 'description of col_name3',
'varchar', 1)]) 'varchar', 1)],
is_view=False)
self.assertEqual(expected.__repr__(), extractor.extract().__repr__()) self.assertEqual(expected.__repr__(), extractor.extract().__repr__())
self.assertIsNone(extractor.extract()) self.assertIsNone(extractor.extract())
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment