Unverified Commit 8b9456f1 authored by Tao Feng's avatar Tao Feng Committed by GitHub

Refactor tableESDocument to match schema in search service (#92)

parent 1f15f5a3
...@@ -27,13 +27,13 @@ class Neo4jSearchDataExtractor(Extractor): ...@@ -27,13 +27,13 @@ class Neo4jSearchDataExtractor(Extractor):
OPTIONAL MATCH (table)-[:TAGGED_BY]->(tags:Tag) OPTIONAL MATCH (table)-[:TAGGED_BY]->(tags:Tag)
OPTIONAL MATCH (table)-[:LAST_UPDATED_AT]->(time_stamp:Timestamp) OPTIONAL MATCH (table)-[:LAST_UPDATED_AT]->(time_stamp:Timestamp)
RETURN db.name as database, cluster.name AS cluster, schema.name AS schema_name, RETURN db.name as database, cluster.name AS cluster, schema.name AS schema_name,
table.name AS table_name, table.key AS table_key, table_description.description AS table_description, table.name AS name, table.key AS key, table_description.description AS description,
time_stamp.last_updated_timestamp AS table_last_updated_epoch, time_stamp.last_updated_timestamp AS last_updated_epoch,
EXTRACT(c in COLLECT(DISTINCT cols)| c.name) AS column_names, EXTRACT(c in COLLECT(DISTINCT cols)| c.name) AS column_names,
EXTRACT(cd IN COLLECT(DISTINCT col_description)| cd.description) AS column_descriptions, EXTRACT(cd IN COLLECT(DISTINCT col_description)| cd.description) AS column_descriptions,
REDUCE(sum_r = 0, r in COLLECT(DISTINCT read)| sum_r + r.read_count) AS total_usage, REDUCE(sum_r = 0, r in COLLECT(DISTINCT read)| sum_r + r.read_count) AS total_usage,
COUNT(DISTINCT user.email) as unique_usage, COUNT(DISTINCT user.email) as unique_usage,
COLLECT(DISTINCT tags.key) as tag_names COLLECT(DISTINCT tags.key) as tags
ORDER BY table.name; ORDER BY table.name;
""" """
) )
......
...@@ -11,27 +11,28 @@ class TableESDocument(ElasticsearchDocument): ...@@ -11,27 +11,28 @@ class TableESDocument(ElasticsearchDocument):
database, # type: str database, # type: str
cluster, # type: str cluster, # type: str
schema_name, # type: str schema_name, # type: str
table_name, # type: str name, # type: str
table_key, # type: str key, # type: str
table_description, # type: str description, # type: str
table_last_updated_epoch, # type: Optional[int] last_updated_epoch, # type: Optional[int]
column_names, # type: List[str] column_names, # type: List[str]
column_descriptions, # type: List[str] column_descriptions, # type: List[str]
total_usage, # type: int total_usage, # type: int
unique_usage, # type: int unique_usage, # type: int
tag_names, # type: List[str] tags, # type: List[str]
): ):
# type: (...) -> None # type: (...) -> None
self.database = database self.database = database
self.cluster = cluster self.cluster = cluster
self.schema_name = schema_name self.schema_name = schema_name
self.table_name = table_name self.name = name
self.table_key = table_key self.key = key
self.table_description = table_description self.description = description
self.table_last_updated_epoch = int(table_last_updated_epoch) if table_last_updated_epoch else None # todo: use last_updated_timestamp to match the record in metadata
self.last_updated_epoch = int(last_updated_epoch) if last_updated_epoch else None
self.column_names = column_names self.column_names = column_names
self.column_descriptions = column_descriptions self.column_descriptions = column_descriptions
self.total_usage = total_usage self.total_usage = total_usage
self.unique_usage = unique_usage self.unique_usage = unique_usage
# todo: will include tag_type once we have better understanding from UI flow. # todo: will include tag_type once we have better understanding from UI flow.
self.tag_names = tag_names self.tags = tags
from setuptools import setup, find_packages from setuptools import setup, find_packages
__version__ = '1.3.1' __version__ = '1.3.2'
setup( setup(
......
...@@ -103,15 +103,15 @@ class TestNeo4jExtractor(unittest.TestCase): ...@@ -103,15 +103,15 @@ class TestNeo4jExtractor(unittest.TestCase):
result_dict = dict(database='test_database', result_dict = dict(database='test_database',
cluster='test_cluster', cluster='test_cluster',
schema_name='test_schema', schema_name='test_schema',
table_name='test_table_name', name='test_table_name',
table_key='test_table_key', key='test_table_key',
table_description='test_table_description', description='test_table_description',
table_last_updated_epoch=123456789, last_updated_epoch=123456789,
column_names=['test_col1', 'test_col2', 'test_col3'], column_names=['test_col1', 'test_col2', 'test_col3'],
column_descriptions=['test_description1', 'test_description2', ''], column_descriptions=['test_description1', 'test_description2', ''],
total_usage=100, total_usage=100,
unique_usage=5, unique_usage=5,
tag_names=['hive']) tags=['hive'])
extractor.results = [result_dict] extractor.results = [result_dict]
result_obj = extractor.extract() result_obj = extractor.extract()
......
...@@ -64,15 +64,15 @@ class TestFSElasticsearchJSONLoader(unittest.TestCase): ...@@ -64,15 +64,15 @@ class TestFSElasticsearchJSONLoader(unittest.TestCase):
data = dict(database='test_database', data = dict(database='test_database',
cluster='test_cluster', cluster='test_cluster',
schema_name='test_schema', schema_name='test_schema',
table_name='test_table', name='test_table',
table_key='test_table_key', key='test_table_key',
table_last_updated_epoch=123456789, last_updated_epoch=123456789,
table_description='test_description', description='test_description',
column_names=['test_col1', 'test_col2'], column_names=['test_col1', 'test_col2'],
column_descriptions=['test_comment1', 'test_comment2'], column_descriptions=['test_comment1', 'test_comment2'],
total_usage=10, total_usage=10,
unique_usage=5, unique_usage=5,
tag_names=['test_tag1', 'test_tag2']) tags=['test_tag1', 'test_tag2'])
with self.assertRaises(Exception) as context: with self.assertRaises(Exception) as context:
loader.load(data) # type: ignore loader.load(data) # type: ignore
...@@ -92,25 +92,25 @@ class TestFSElasticsearchJSONLoader(unittest.TestCase): ...@@ -92,25 +92,25 @@ class TestFSElasticsearchJSONLoader(unittest.TestCase):
data = TableESDocument(database='test_database', data = TableESDocument(database='test_database',
cluster='test_cluster', cluster='test_cluster',
schema_name='test_schema', schema_name='test_schema',
table_name='test_table', name='test_table',
table_key='test_table_key', key='test_table_key',
table_last_updated_epoch=123456789, last_updated_epoch=123456789,
table_description='test_description', description='test_description',
column_names=['test_col1', 'test_col2'], column_names=['test_col1', 'test_col2'],
column_descriptions=['test_comment1', 'test_comment2'], column_descriptions=['test_comment1', 'test_comment2'],
total_usage=10, total_usage=10,
unique_usage=5, unique_usage=5,
tag_names=['test_tag1', 'test_tag2']) tags=['test_tag1', 'test_tag2'])
loader.load(data) loader.load(data)
loader.close() loader.close()
expected = [ expected = [
('{"table_key": "test_table_key", "column_descriptions": ["test_comment1", "test_comment2"], ' ('{"key": "test_table_key", "column_descriptions": ["test_comment1", "test_comment2"], '
'"schema_name": "test_schema", "database": "test_database", "cluster": "test_cluster", ' '"schema_name": "test_schema", "database": "test_database", "cluster": "test_cluster", '
'"column_names": ["test_col1", "test_col2"], "table_name": "test_table", ' '"column_names": ["test_col1", "test_col2"], "name": "test_table", '
'"table_last_updated_epoch": 123456789,' '"last_updated_epoch": 123456789,'
'"table_description": "test_description", "unique_usage": 5, "total_usage": 10, ' '"description": "test_description", "unique_usage": 5, "total_usage": 10, '
'"tag_names": ["test_tag1", "test_tag2"]}') '"tags": ["test_tag1", "test_tag2"]}')
] ]
self._check_results_helper(expected=expected) self._check_results_helper(expected=expected)
...@@ -128,27 +128,27 @@ class TestFSElasticsearchJSONLoader(unittest.TestCase): ...@@ -128,27 +128,27 @@ class TestFSElasticsearchJSONLoader(unittest.TestCase):
data = [TableESDocument(database='test_database', data = [TableESDocument(database='test_database',
cluster='test_cluster', cluster='test_cluster',
schema_name='test_schema', schema_name='test_schema',
table_name='test_table', name='test_table',
table_key='test_table_key', key='test_table_key',
table_last_updated_epoch=123456789, last_updated_epoch=123456789,
table_description='test_description', description='test_description',
column_names=['test_col1', 'test_col2'], column_names=['test_col1', 'test_col2'],
column_descriptions=['test_comment1', 'test_comment2'], column_descriptions=['test_comment1', 'test_comment2'],
total_usage=10, total_usage=10,
unique_usage=5, unique_usage=5,
tag_names=['test_tag1', 'test_tag2'])] * 5 tags=['test_tag1', 'test_tag2'])] * 5
for d in data: for d in data:
loader.load(d) loader.load(d)
loader.close() loader.close()
expected = [ expected = [
('{"table_key": "test_table_key", "column_descriptions": ["test_comment1", "test_comment2"], ' ('{"key": "test_table_key", "column_descriptions": ["test_comment1", "test_comment2"], '
'"schema_name": "test_schema", "database": "test_database", "cluster": "test_cluster", ' '"schema_name": "test_schema", "database": "test_database", "cluster": "test_cluster", '
'"column_names": ["test_col1", "test_col2"], "table_name": "test_table", ' '"column_names": ["test_col1", "test_col2"], "name": "test_table", '
'"table_last_updated_epoch": 123456789,' '"last_updated_epoch": 123456789,'
'"table_description": "test_description", "unique_usage": 5, "total_usage": 10, ' '"description": "test_description", "unique_usage": 5, "total_usage": 10, '
'"tag_names": ["test_tag1", "test_tag2"]}') '"tags": ["test_tag1", "test_tag2"]}')
] * 5 ] * 5
self._check_results_helper(expected=expected) self._check_results_helper(expected=expected)
...@@ -14,28 +14,28 @@ class TestTableElasticsearchDocument(unittest.TestCase): ...@@ -14,28 +14,28 @@ class TestTableElasticsearchDocument(unittest.TestCase):
test_obj = TableESDocument(database='test_database', test_obj = TableESDocument(database='test_database',
cluster='test_cluster', cluster='test_cluster',
schema_name='test_schema', schema_name='test_schema',
table_name='test_table', name='test_table',
table_key='test_table_key', key='test_table_key',
table_last_updated_epoch=123456789, last_updated_epoch=123456789,
table_description='test_table_description', description='test_table_description',
column_names=['test_col1', 'test_col2'], column_names=['test_col1', 'test_col2'],
column_descriptions=['test_description1', 'test_description2'], column_descriptions=['test_description1', 'test_description2'],
total_usage=100, total_usage=100,
unique_usage=10, unique_usage=10,
tag_names=['test']) tags=['test'])
expected_document_dict = {"database": "test_database", expected_document_dict = {"database": "test_database",
"cluster": "test_cluster", "cluster": "test_cluster",
"schema_name": "test_schema", "schema_name": "test_schema",
"table_name": "test_table", "name": "test_table",
"table_key": "test_table_key", "key": "test_table_key",
"table_last_updated_epoch": 123456789, "last_updated_epoch": 123456789,
"table_description": "test_table_description", "description": "test_table_description",
"column_names": ["test_col1", "test_col2"], "column_names": ["test_col1", "test_col2"],
"column_descriptions": ["test_description1", "test_description2"], "column_descriptions": ["test_description1", "test_description2"],
"total_usage": 100, "total_usage": 100,
"unique_usage": 10, "unique_usage": 10,
"tag_names": ["test"] "tags": ["test"]
} }
result = test_obj.to_json() result = test_obj.to_json()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment