Unverified Commit a4d049fe authored by Jin Hyuk Chang's avatar Jin Hyuk Chang Committed by GitHub

Added Description on Schema for ES document on table (#245)

* Added Description on Schema

* Update
parent 5f7224a8
...@@ -23,19 +23,23 @@ class Neo4jSearchDataExtractor(Extractor): ...@@ -23,19 +23,23 @@ class Neo4jSearchDataExtractor(Extractor):
<-[:SCHEMA_OF]-(schema:Schema)<-[:TABLE_OF]-(table:Table) <-[:SCHEMA_OF]-(schema:Schema)<-[:TABLE_OF]-(table:Table)
{publish_tag_filter} {publish_tag_filter}
OPTIONAL MATCH (table)-[:DESCRIPTION]->(table_description:Description) OPTIONAL MATCH (table)-[:DESCRIPTION]->(table_description:Description)
OPTIONAL MATCH (schema)-[:DESCRIPTION]->(schema_description:Description)
OPTIONAL MATCH (table)-[:TAGGED_BY]->(tags:Tag) WHERE tags.tag_type='default' OPTIONAL MATCH (table)-[:TAGGED_BY]->(tags:Tag) WHERE tags.tag_type='default'
WITH db, cluster, schema, table, table_description, COLLECT(DISTINCT tags.key) as tags WITH db, cluster, schema, schema_description, table, table_description, COLLECT(DISTINCT tags.key) as tags
OPTIONAL MATCH (table)-[:TAGGED_BY]->(badges:Tag) WHERE badges.tag_type='badge' OPTIONAL MATCH (table)-[:TAGGED_BY]->(badges:Tag) WHERE badges.tag_type='badge'
WITH db, cluster, schema, table, table_description, tags, COLLECT(DISTINCT badges.key) as badges WITH db, cluster, schema, schema_description, table, table_description, tags, COLLECT(DISTINCT badges.key) AS
badges
OPTIONAL MATCH (table)-[read:READ_BY]->(user:User) OPTIONAL MATCH (table)-[read:READ_BY]->(user:User)
WITH db, cluster, schema, table, table_description, tags, badges, SUM(read.read_count) AS total_usage, WITH db, cluster, schema, schema_description, table, table_description, tags, badges, SUM(read.read_count) AS
total_usage,
COUNT(DISTINCT user.email) as unique_usage COUNT(DISTINCT user.email) as unique_usage
OPTIONAL MATCH (table)-[:COLUMN]->(col:Column) OPTIONAL MATCH (table)-[:COLUMN]->(col:Column)
OPTIONAL MATCH (col)-[:DESCRIPTION]->(col_description:Description) OPTIONAL MATCH (col)-[:DESCRIPTION]->(col_description:Description)
WITH db, cluster, schema, table, table_description, tags, badges, total_usage, unique_usage, WITH db, cluster, schema, schema_description, table, table_description, tags, badges, total_usage, unique_usage,
COLLECT(col.name) AS column_names, COLLECT(col_description.description) AS column_descriptions COLLECT(col.name) AS column_names, COLLECT(col_description.description) AS column_descriptions
OPTIONAL MATCH (table)-[:LAST_UPDATED_AT]->(time_stamp:Timestamp) OPTIONAL MATCH (table)-[:LAST_UPDATED_AT]->(time_stamp:Timestamp)
RETURN db.name as database, cluster.name AS cluster, schema.name AS schema, RETURN db.name as database, cluster.name AS cluster, schema.name AS schema,
schema_description.description AS schema_description,
table.name AS name, table.key AS key, table_description.description AS description, table.name AS name, table.key AS key, table_description.description AS description,
time_stamp.last_updated_timestamp AS last_updated_timestamp, time_stamp.last_updated_timestamp AS last_updated_timestamp,
column_names, column_names,
......
...@@ -22,6 +22,7 @@ class TableESDocument(ElasticsearchDocument): ...@@ -22,6 +22,7 @@ class TableESDocument(ElasticsearchDocument):
tags, # type: List[str], tags, # type: List[str],
badges=None, # type: Optional[List[str]] badges=None, # type: Optional[List[str]]
display_name=None, # type: Optional[str] display_name=None, # type: Optional[str]
schema_description=None, # type: Optional[str]
): ):
# type: (...) -> None # type: (...) -> None
self.database = database self.database = database
...@@ -40,3 +41,4 @@ class TableESDocument(ElasticsearchDocument): ...@@ -40,3 +41,4 @@ class TableESDocument(ElasticsearchDocument):
# todo: will include tag_type once we have better understanding from UI flow. # todo: will include tag_type once we have better understanding from UI flow.
self.tags = tags self.tags = tags
self.badges = badges self.badges = badges
self.schema_description = schema_description
schema_key,schema,description
hive://gold.test_schema,test_schema,"test schema description"
\ No newline at end of file
...@@ -264,6 +264,8 @@ if __name__ == "__main__": ...@@ -264,6 +264,8 @@ if __name__ == "__main__":
'databuilder.models.table_metadata.TagMetadata') 'databuilder.models.table_metadata.TagMetadata')
run_csv_job('example/sample_data/sample_table_last_updated.csv', 'test_table_last_updated_metadata', run_csv_job('example/sample_data/sample_table_last_updated.csv', 'test_table_last_updated_metadata',
'databuilder.models.table_last_updated.TableLastUpdated') 'databuilder.models.table_last_updated.TableLastUpdated')
run_csv_job('example/sample_data/sample_schema_description.csv', 'test_schema_description',
'databuilder.models.schema.schema.SchemaModel')
create_last_updated_job().launch() create_last_updated_job().launch()
......
...@@ -113,7 +113,8 @@ class TestNeo4jExtractor(unittest.TestCase): ...@@ -113,7 +113,8 @@ class TestNeo4jExtractor(unittest.TestCase):
total_usage=100, total_usage=100,
unique_usage=5, unique_usage=5,
tags=['hive'], tags=['hive'],
badges=['badge1']) badges=['badge1'],
schema_description='schema_description')
extractor.results = [result_dict] extractor.results = [result_dict]
result_obj = extractor.extract() result_obj = extractor.extract()
......
...@@ -101,7 +101,8 @@ class TestFSElasticsearchJSONLoader(unittest.TestCase): ...@@ -101,7 +101,8 @@ class TestFSElasticsearchJSONLoader(unittest.TestCase):
total_usage=10, total_usage=10,
unique_usage=5, unique_usage=5,
tags=['test_tag1', 'test_tag2'], tags=['test_tag1', 'test_tag2'],
badges=['badge1']) badges=['badge1'],
schema_description='schema description')
loader.load(data) loader.load(data)
loader.close() loader.close()
...@@ -111,7 +112,7 @@ class TestFSElasticsearchJSONLoader(unittest.TestCase): ...@@ -111,7 +112,7 @@ class TestFSElasticsearchJSONLoader(unittest.TestCase):
'"column_names": ["test_col1", "test_col2"], "name": "test_table", ' '"column_names": ["test_col1", "test_col2"], "name": "test_table", '
'"last_updated_timestamp": 123456789, "display_name": "test_schema.test_table", ' '"last_updated_timestamp": 123456789, "display_name": "test_schema.test_table", '
'"description": "test_description", "unique_usage": 5, "total_usage": 10, ' '"description": "test_description", "unique_usage": 5, "total_usage": 10, '
'"tags": ["test_tag1", "test_tag2"], "badges": ["badge1"]}') '"tags": ["test_tag1", "test_tag2"], "badges": ["badge1"], "schema_description": "schema description"}')
] ]
self._check_results_helper(expected=expected) self._check_results_helper(expected=expected)
...@@ -138,7 +139,8 @@ class TestFSElasticsearchJSONLoader(unittest.TestCase): ...@@ -138,7 +139,8 @@ class TestFSElasticsearchJSONLoader(unittest.TestCase):
total_usage=10, total_usage=10,
unique_usage=5, unique_usage=5,
tags=['test_tag1', 'test_tag2'], tags=['test_tag1', 'test_tag2'],
badges=['badge1'])] * 5 badges=['badge1'],
schema_description='schema_description')] * 5
for d in data: for d in data:
loader.load(d) loader.load(d)
...@@ -150,7 +152,7 @@ class TestFSElasticsearchJSONLoader(unittest.TestCase): ...@@ -150,7 +152,7 @@ class TestFSElasticsearchJSONLoader(unittest.TestCase):
'"column_names": ["test_col1", "test_col2"], "name": "test_table", ' '"column_names": ["test_col1", "test_col2"], "name": "test_table", '
'"last_updated_timestamp": 123456789, "display_name": "test_schema.test_table", ' '"last_updated_timestamp": 123456789, "display_name": "test_schema.test_table", '
'"description": "test_description", "unique_usage": 5, "total_usage": 10, ' '"description": "test_description", "unique_usage": 5, "total_usage": 10, '
'"tags": ["test_tag1", "test_tag2"], "badges": ["badge1"]}') '"tags": ["test_tag1", "test_tag2"], "badges": ["badge1"], "schema_description": "schema_description"}')
] * 5 ] * 5
self._check_results_helper(expected=expected) self._check_results_helper(expected=expected)
...@@ -23,7 +23,8 @@ class TestTableElasticsearchDocument(unittest.TestCase): ...@@ -23,7 +23,8 @@ class TestTableElasticsearchDocument(unittest.TestCase):
total_usage=100, total_usage=100,
unique_usage=10, unique_usage=10,
tags=['test'], tags=['test'],
badges=['badge1']) badges=['badge1'],
schema_description='schema description')
expected_document_dict = {"database": "test_database", expected_document_dict = {"database": "test_database",
"cluster": "test_cluster", "cluster": "test_cluster",
...@@ -38,7 +39,8 @@ class TestTableElasticsearchDocument(unittest.TestCase): ...@@ -38,7 +39,8 @@ class TestTableElasticsearchDocument(unittest.TestCase):
"total_usage": 100, "total_usage": 100,
"unique_usage": 10, "unique_usage": 10,
"tags": ["test"], "tags": ["test"],
"badges": ["badge1"] "badges": ["badge1"],
'schema_description': 'schema description'
} }
result = test_obj.to_json() result = test_obj.to_json()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment