Unverified Commit 8f18fafd authored by samshuster's avatar samshuster Committed by GitHub

issue-297/Adding programmatic_descriptions to table search export (#198)

* Adding programmatic_descriptions to table search export

* fixing tests from merge

* Rebasing from upstream master

* fixing merge

* fixing the neo4j query to be more optimized

* adding programmatic_descriptions to the elasticsearch_constants.py
parent 2ac583cf
...@@ -24,18 +24,23 @@ class Neo4jSearchDataExtractor(Extractor): ...@@ -24,18 +24,23 @@ class Neo4jSearchDataExtractor(Extractor):
{publish_tag_filter} {publish_tag_filter}
OPTIONAL MATCH (table)-[:DESCRIPTION]->(table_description:Description) OPTIONAL MATCH (table)-[:DESCRIPTION]->(table_description:Description)
OPTIONAL MATCH (schema)-[:DESCRIPTION]->(schema_description:Description) OPTIONAL MATCH (schema)-[:DESCRIPTION]->(schema_description:Description)
OPTIONAL MATCH (table)-[:DESCRIPTION]->(prog_descs:Programmatic_Description)
WITH db, cluster, schema, schema_description, table, table_description,
COLLECT(prog_descs.description) as programmatic_descriptions
OPTIONAL MATCH (table)-[:TAGGED_BY]->(tags:Tag) WHERE tags.tag_type='default' OPTIONAL MATCH (table)-[:TAGGED_BY]->(tags:Tag) WHERE tags.tag_type='default'
WITH db, cluster, schema, schema_description, table, table_description, COLLECT(DISTINCT tags.key) as tags WITH db, cluster, schema, schema_description, table, table_description, programmatic_descriptions,
COLLECT(DISTINCT tags.key) as tags
OPTIONAL MATCH (table)-[:TAGGED_BY]->(badges:Tag) WHERE badges.tag_type='badge' OPTIONAL MATCH (table)-[:TAGGED_BY]->(badges:Tag) WHERE badges.tag_type='badge'
WITH db, cluster, schema, schema_description, table, table_description, tags, COLLECT(DISTINCT badges.key) AS WITH db, cluster, schema, schema_description, table, table_description, programmatic_descriptions, tags,
badges COLLECT(DISTINCT badges.key) as badges
OPTIONAL MATCH (table)-[read:READ_BY]->(user:User) OPTIONAL MATCH (table)-[read:READ_BY]->(user:User)
WITH db, cluster, schema, schema_description, table, table_description, tags, badges, SUM(read.read_count) AS WITH db, cluster, schema, schema_description, table, table_description, programmatic_descriptions, tags, badges,
total_usage, SUM(read.read_count) AS total_usage,
COUNT(DISTINCT user.email) as unique_usage COUNT(DISTINCT user.email) as unique_usage
OPTIONAL MATCH (table)-[:COLUMN]->(col:Column) OPTIONAL MATCH (table)-[:COLUMN]->(col:Column)
OPTIONAL MATCH (col)-[:DESCRIPTION]->(col_description:Description) OPTIONAL MATCH (col)-[:DESCRIPTION]->(col_description:Description)
WITH db, cluster, schema, schema_description, table, table_description, tags, badges, total_usage, unique_usage, WITH db, cluster, schema, schema_description, table, table_description, tags, badges, total_usage, unique_usage,
programmatic_descriptions,
COLLECT(col.name) AS column_names, COLLECT(col_description.description) AS column_descriptions COLLECT(col.name) AS column_names, COLLECT(col_description.description) AS column_descriptions
OPTIONAL MATCH (table)-[:LAST_UPDATED_AT]->(time_stamp:Timestamp) OPTIONAL MATCH (table)-[:LAST_UPDATED_AT]->(time_stamp:Timestamp)
RETURN db.name as database, cluster.name AS cluster, schema.name AS schema, RETURN db.name as database, cluster.name AS cluster, schema.name AS schema,
...@@ -47,7 +52,8 @@ class Neo4jSearchDataExtractor(Extractor): ...@@ -47,7 +52,8 @@ class Neo4jSearchDataExtractor(Extractor):
total_usage, total_usage,
unique_usage, unique_usage,
tags, tags,
badges badges,
programmatic_descriptions
ORDER BY table.name; ORDER BY table.name;
""" """
) )
......
...@@ -23,6 +23,7 @@ class TableESDocument(ElasticsearchDocument): ...@@ -23,6 +23,7 @@ class TableESDocument(ElasticsearchDocument):
badges=None, # type: Optional[List[str]] badges=None, # type: Optional[List[str]]
display_name=None, # type: Optional[str] display_name=None, # type: Optional[str]
schema_description=None, # type: Optional[str] schema_description=None, # type: Optional[str]
programmatic_descriptions=[], # type: List[str]
): ):
# type: (...) -> None # type: (...) -> None
self.database = database self.database = database
...@@ -42,3 +43,4 @@ class TableESDocument(ElasticsearchDocument): ...@@ -42,3 +43,4 @@ class TableESDocument(ElasticsearchDocument):
self.tags = tags self.tags = tags
self.badges = badges self.badges = badges
self.schema_description = schema_description self.schema_description = schema_description
self.programmatic_descriptions = programmatic_descriptions
...@@ -81,6 +81,10 @@ TABLE_ELASTICSEARCH_INDEX_MAPPING = textwrap.dedent( ...@@ -81,6 +81,10 @@ TABLE_ELASTICSEARCH_INDEX_MAPPING = textwrap.dedent(
}, },
"unique_usage": { "unique_usage": {
"type": "long" "type": "long"
},
"programmatic_descriptions": {
"type": "text",
"analyzer": "simple"
} }
} }
} }
......
...@@ -114,7 +114,8 @@ class TestNeo4jExtractor(unittest.TestCase): ...@@ -114,7 +114,8 @@ class TestNeo4jExtractor(unittest.TestCase):
unique_usage=5, unique_usage=5,
tags=['hive'], tags=['hive'],
badges=['badge1'], badges=['badge1'],
schema_description='schema_description') schema_description='schema_description',
programmatic_descriptions=['TEST'])
extractor.results = [result_dict] extractor.results = [result_dict]
result_obj = extractor.extract() result_obj = extractor.extract()
......
...@@ -72,7 +72,8 @@ class TestFSElasticsearchJSONLoader(unittest.TestCase): ...@@ -72,7 +72,8 @@ class TestFSElasticsearchJSONLoader(unittest.TestCase):
column_descriptions=['test_comment1', 'test_comment2'], column_descriptions=['test_comment1', 'test_comment2'],
total_usage=10, total_usage=10,
unique_usage=5, unique_usage=5,
tags=['test_tag1', 'test_tag2']) tags=['test_tag1', 'test_tag2'],
programmatic_descriptions=['test'])
with self.assertRaises(Exception) as context: with self.assertRaises(Exception) as context:
loader.load(data) # type: ignore loader.load(data) # type: ignore
...@@ -102,7 +103,8 @@ class TestFSElasticsearchJSONLoader(unittest.TestCase): ...@@ -102,7 +103,8 @@ class TestFSElasticsearchJSONLoader(unittest.TestCase):
unique_usage=5, unique_usage=5,
tags=['test_tag1', 'test_tag2'], tags=['test_tag1', 'test_tag2'],
badges=['badge1'], badges=['badge1'],
schema_description='schema description') schema_description='schema description',
programmatic_descriptions=['test'])
loader.load(data) loader.load(data)
loader.close() loader.close()
...@@ -112,7 +114,9 @@ class TestFSElasticsearchJSONLoader(unittest.TestCase): ...@@ -112,7 +114,9 @@ class TestFSElasticsearchJSONLoader(unittest.TestCase):
'"column_names": ["test_col1", "test_col2"], "name": "test_table", ' '"column_names": ["test_col1", "test_col2"], "name": "test_table", '
'"last_updated_timestamp": 123456789, "display_name": "test_schema.test_table", ' '"last_updated_timestamp": 123456789, "display_name": "test_schema.test_table", '
'"description": "test_description", "unique_usage": 5, "total_usage": 10, ' '"description": "test_description", "unique_usage": 5, "total_usage": 10, '
'"tags": ["test_tag1", "test_tag2"], "badges": ["badge1"], "schema_description": "schema description"}') '"tags": ["test_tag1", "test_tag2"], "schema_description": "schema description", '
'"programmatic_descriptions": ["test"], '
'"badges": ["badge1"]}')
] ]
self._check_results_helper(expected=expected) self._check_results_helper(expected=expected)
...@@ -140,7 +144,8 @@ class TestFSElasticsearchJSONLoader(unittest.TestCase): ...@@ -140,7 +144,8 @@ class TestFSElasticsearchJSONLoader(unittest.TestCase):
unique_usage=5, unique_usage=5,
tags=['test_tag1', 'test_tag2'], tags=['test_tag1', 'test_tag2'],
badges=['badge1'], badges=['badge1'],
schema_description='schema_description')] * 5 schema_description='schema_description',
programmatic_descriptions=['test'])] * 5
for d in data: for d in data:
loader.load(d) loader.load(d)
...@@ -152,7 +157,9 @@ class TestFSElasticsearchJSONLoader(unittest.TestCase): ...@@ -152,7 +157,9 @@ class TestFSElasticsearchJSONLoader(unittest.TestCase):
'"column_names": ["test_col1", "test_col2"], "name": "test_table", ' '"column_names": ["test_col1", "test_col2"], "name": "test_table", '
'"last_updated_timestamp": 123456789, "display_name": "test_schema.test_table", ' '"last_updated_timestamp": 123456789, "display_name": "test_schema.test_table", '
'"description": "test_description", "unique_usage": 5, "total_usage": 10, ' '"description": "test_description", "unique_usage": 5, "total_usage": 10, '
'"tags": ["test_tag1", "test_tag2"], "badges": ["badge1"], "schema_description": "schema_description"}') '"tags": ["test_tag1", "test_tag2"], "schema_description": "schema_description", '
'"programmatic_descriptions":["test"], '
'"badges": ["badge1"]}')
] * 5 ] * 5
self._check_results_helper(expected=expected) self._check_results_helper(expected=expected)
...@@ -23,6 +23,7 @@ class TestTableElasticsearchDocument(unittest.TestCase): ...@@ -23,6 +23,7 @@ class TestTableElasticsearchDocument(unittest.TestCase):
total_usage=100, total_usage=100,
unique_usage=10, unique_usage=10,
tags=['test'], tags=['test'],
programmatic_descriptions=['test'],
badges=['badge1'], badges=['badge1'],
schema_description='schema description') schema_description='schema description')
...@@ -39,6 +40,7 @@ class TestTableElasticsearchDocument(unittest.TestCase): ...@@ -39,6 +40,7 @@ class TestTableElasticsearchDocument(unittest.TestCase):
"total_usage": 100, "total_usage": 100,
"unique_usage": 10, "unique_usage": 10,
"tags": ["test"], "tags": ["test"],
"programmatic_descriptions": ['test'],
"badges": ["badge1"], "badges": ["badge1"],
'schema_description': 'schema description' 'schema_description': 'schema description'
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment