Unverified Commit 8b9456f1 authored by Tao Feng's avatar Tao Feng Committed by GitHub

Refactor tableESDocument to match schema in search service (#92)

parent 1f15f5a3
......@@ -27,13 +27,13 @@ class Neo4jSearchDataExtractor(Extractor):
OPTIONAL MATCH (table)-[:TAGGED_BY]->(tags:Tag)
OPTIONAL MATCH (table)-[:LAST_UPDATED_AT]->(time_stamp:Timestamp)
RETURN db.name as database, cluster.name AS cluster, schema.name AS schema_name,
table.name AS table_name, table.key AS table_key, table_description.description AS table_description,
time_stamp.last_updated_timestamp AS table_last_updated_epoch,
table.name AS name, table.key AS key, table_description.description AS description,
time_stamp.last_updated_timestamp AS last_updated_epoch,
EXTRACT(c in COLLECT(DISTINCT cols)| c.name) AS column_names,
EXTRACT(cd IN COLLECT(DISTINCT col_description)| cd.description) AS column_descriptions,
REDUCE(sum_r = 0, r in COLLECT(DISTINCT read)| sum_r + r.read_count) AS total_usage,
COUNT(DISTINCT user.email) as unique_usage,
COLLECT(DISTINCT tags.key) as tag_names
COLLECT(DISTINCT tags.key) as tags
ORDER BY table.name;
"""
)
......
......@@ -11,27 +11,28 @@ class TableESDocument(ElasticsearchDocument):
database, # type: str
cluster, # type: str
schema_name, # type: str
table_name, # type: str
table_key, # type: str
table_description, # type: str
table_last_updated_epoch, # type: Optional[int]
name, # type: str
key, # type: str
description, # type: str
last_updated_epoch, # type: Optional[int]
column_names, # type: List[str]
column_descriptions, # type: List[str]
total_usage, # type: int
unique_usage, # type: int
tag_names, # type: List[str]
tags, # type: List[str]
):
# type: (...) -> None
self.database = database
self.cluster = cluster
self.schema_name = schema_name
self.table_name = table_name
self.table_key = table_key
self.table_description = table_description
self.table_last_updated_epoch = int(table_last_updated_epoch) if table_last_updated_epoch else None
self.name = name
self.key = key
self.description = description
# todo: use last_updated_timestamp to match the record in metadata
self.last_updated_epoch = int(last_updated_epoch) if last_updated_epoch else None
self.column_names = column_names
self.column_descriptions = column_descriptions
self.total_usage = total_usage
self.unique_usage = unique_usage
# todo: will include tag_type once we have better understanding from UI flow.
self.tag_names = tag_names
self.tags = tags
from setuptools import setup, find_packages
__version__ = '1.3.1'
__version__ = '1.3.2'
setup(
......
......@@ -103,15 +103,15 @@ class TestNeo4jExtractor(unittest.TestCase):
result_dict = dict(database='test_database',
cluster='test_cluster',
schema_name='test_schema',
table_name='test_table_name',
table_key='test_table_key',
table_description='test_table_description',
table_last_updated_epoch=123456789,
name='test_table_name',
key='test_table_key',
description='test_table_description',
last_updated_epoch=123456789,
column_names=['test_col1', 'test_col2', 'test_col3'],
column_descriptions=['test_description1', 'test_description2', ''],
total_usage=100,
unique_usage=5,
tag_names=['hive'])
tags=['hive'])
extractor.results = [result_dict]
result_obj = extractor.extract()
......
......@@ -64,15 +64,15 @@ class TestFSElasticsearchJSONLoader(unittest.TestCase):
data = dict(database='test_database',
cluster='test_cluster',
schema_name='test_schema',
table_name='test_table',
table_key='test_table_key',
table_last_updated_epoch=123456789,
table_description='test_description',
name='test_table',
key='test_table_key',
last_updated_epoch=123456789,
description='test_description',
column_names=['test_col1', 'test_col2'],
column_descriptions=['test_comment1', 'test_comment2'],
total_usage=10,
unique_usage=5,
tag_names=['test_tag1', 'test_tag2'])
tags=['test_tag1', 'test_tag2'])
with self.assertRaises(Exception) as context:
loader.load(data) # type: ignore
......@@ -92,25 +92,25 @@ class TestFSElasticsearchJSONLoader(unittest.TestCase):
data = TableESDocument(database='test_database',
cluster='test_cluster',
schema_name='test_schema',
table_name='test_table',
table_key='test_table_key',
table_last_updated_epoch=123456789,
table_description='test_description',
name='test_table',
key='test_table_key',
last_updated_epoch=123456789,
description='test_description',
column_names=['test_col1', 'test_col2'],
column_descriptions=['test_comment1', 'test_comment2'],
total_usage=10,
unique_usage=5,
tag_names=['test_tag1', 'test_tag2'])
tags=['test_tag1', 'test_tag2'])
loader.load(data)
loader.close()
expected = [
('{"table_key": "test_table_key", "column_descriptions": ["test_comment1", "test_comment2"], '
('{"key": "test_table_key", "column_descriptions": ["test_comment1", "test_comment2"], '
'"schema_name": "test_schema", "database": "test_database", "cluster": "test_cluster", '
'"column_names": ["test_col1", "test_col2"], "table_name": "test_table", '
'"table_last_updated_epoch": 123456789,'
'"table_description": "test_description", "unique_usage": 5, "total_usage": 10, '
'"tag_names": ["test_tag1", "test_tag2"]}')
'"column_names": ["test_col1", "test_col2"], "name": "test_table", '
'"last_updated_epoch": 123456789,'
'"description": "test_description", "unique_usage": 5, "total_usage": 10, '
'"tags": ["test_tag1", "test_tag2"]}')
]
self._check_results_helper(expected=expected)
......@@ -128,27 +128,27 @@ class TestFSElasticsearchJSONLoader(unittest.TestCase):
data = [TableESDocument(database='test_database',
cluster='test_cluster',
schema_name='test_schema',
table_name='test_table',
table_key='test_table_key',
table_last_updated_epoch=123456789,
table_description='test_description',
name='test_table',
key='test_table_key',
last_updated_epoch=123456789,
description='test_description',
column_names=['test_col1', 'test_col2'],
column_descriptions=['test_comment1', 'test_comment2'],
total_usage=10,
unique_usage=5,
tag_names=['test_tag1', 'test_tag2'])] * 5
tags=['test_tag1', 'test_tag2'])] * 5
for d in data:
loader.load(d)
loader.close()
expected = [
('{"table_key": "test_table_key", "column_descriptions": ["test_comment1", "test_comment2"], '
('{"key": "test_table_key", "column_descriptions": ["test_comment1", "test_comment2"], '
'"schema_name": "test_schema", "database": "test_database", "cluster": "test_cluster", '
'"column_names": ["test_col1", "test_col2"], "table_name": "test_table", '
'"table_last_updated_epoch": 123456789,'
'"table_description": "test_description", "unique_usage": 5, "total_usage": 10, '
'"tag_names": ["test_tag1", "test_tag2"]}')
'"column_names": ["test_col1", "test_col2"], "name": "test_table", '
'"last_updated_epoch": 123456789,'
'"description": "test_description", "unique_usage": 5, "total_usage": 10, '
'"tags": ["test_tag1", "test_tag2"]}')
] * 5
self._check_results_helper(expected=expected)
......@@ -14,28 +14,28 @@ class TestTableElasticsearchDocument(unittest.TestCase):
test_obj = TableESDocument(database='test_database',
cluster='test_cluster',
schema_name='test_schema',
table_name='test_table',
table_key='test_table_key',
table_last_updated_epoch=123456789,
table_description='test_table_description',
name='test_table',
key='test_table_key',
last_updated_epoch=123456789,
description='test_table_description',
column_names=['test_col1', 'test_col2'],
column_descriptions=['test_description1', 'test_description2'],
total_usage=100,
unique_usage=10,
tag_names=['test'])
tags=['test'])
expected_document_dict = {"database": "test_database",
"cluster": "test_cluster",
"schema_name": "test_schema",
"table_name": "test_table",
"table_key": "test_table_key",
"table_last_updated_epoch": 123456789,
"table_description": "test_table_description",
"name": "test_table",
"key": "test_table_key",
"last_updated_epoch": 123456789,
"description": "test_table_description",
"column_names": ["test_col1", "test_col2"],
"column_descriptions": ["test_description1", "test_description2"],
"total_usage": 100,
"unique_usage": 10,
"tag_names": ["test"]
"tags": ["test"]
}
result = test_obj.to_json()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment