Unverified Commit 33fd3bef authored by christina stead's avatar christina stead Committed by GitHub

Add badges to Neo4jExtractor and elastic search (#204)

* Add badges to Neo4jSearchExtractor

* update publisher to have badges

* update elastic search document

* fix typo

* update name

* filter tags by type

* typo

* do not filter tags because then i can't get badges on staging :|

* update tests

* fix tests

* use amunsen_common for elastic search index

* revert commit using amundsencommon

* add comment

* make backwards compatible

* remove badges from tags
parent 7b1d55a3
......@@ -24,7 +24,8 @@ class Neo4jSearchDataExtractor(Extractor):
OPTIONAL MATCH (table)-[read:READ_BY]->(user:User)
OPTIONAL MATCH (table)-[:COLUMN]->(cols:Column)
OPTIONAL MATCH (cols)-[:DESCRIPTION]->(col_description:Description)
OPTIONAL MATCH (table)-[:TAGGED_BY]->(tags:Tag)
OPTIONAL MATCH (table)-[:TAGGED_BY]->(tags:Tag) WHERE tags.tag_type='default'
OPTIONAL MATCH (table)-[:TAGGED_BY]->(badges:Tag) WHERE badges.tag_type='badge'
OPTIONAL MATCH (table)-[:LAST_UPDATED_AT]->(time_stamp:Timestamp)
RETURN db.name as database, cluster.name AS cluster, schema.name AS schema,
table.name AS name, table.key AS key, table_description.description AS description,
......@@ -33,7 +34,8 @@ class Neo4jSearchDataExtractor(Extractor):
EXTRACT(cd IN COLLECT(DISTINCT col_description)| cd.description) AS column_descriptions,
REDUCE(sum_r = 0, r in COLLECT(DISTINCT read)| sum_r + r.read_count) AS total_usage,
COUNT(DISTINCT user.email) as unique_usage,
COLLECT(DISTINCT tags.key) as tags
COLLECT(DISTINCT tags.key) as tags,
COLLECT(DISTINCT badges.key) as badges
ORDER BY table.name;
"""
)
......
......@@ -19,7 +19,8 @@ class TableESDocument(ElasticsearchDocument):
column_descriptions, # type: List[str]
total_usage, # type: int
unique_usage, # type: int
tags, # type: List[str]
tags, # type: List[str],
badges=None, # type: Optional[List[str]]
display_name=None, # type: Optional[str]
):
# type: (...) -> None
......@@ -38,3 +39,4 @@ class TableESDocument(ElasticsearchDocument):
self.unique_usage = unique_usage
# todo: will include tag_type once we have better understanding from UI flow.
self.tags = tags
self.badges = badges
......@@ -36,6 +36,7 @@ class ElasticsearchPublisher(Publisher):
# https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-simple-analyzer.html
# Standard Analyzer is used for all text fields that don't explicitly specify an analyzer
# https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-standard-analyzer.html
# TODO use amundsencommon for this when this project is updated to py3
DEFAULT_ELASTICSEARCH_INDEX_MAPPING = textwrap.dedent(
"""
{
......@@ -87,6 +88,9 @@ class ElasticsearchPublisher(Publisher):
"tags": {
"type": "keyword"
},
"badges": {
"type": "keyword"
},
"cluster": {
"type": "text"
},
......
......@@ -56,8 +56,6 @@ statsd==3.2.1
retrying==1.3.3
unicodecsv==0.14.1,<1.0
httplib2~=0.9.2
unidecode
......@@ -112,7 +112,8 @@ class TestNeo4jExtractor(unittest.TestCase):
column_descriptions=['test_description1', 'test_description2', ''],
total_usage=100,
unique_usage=5,
tags=['hive'])
tags=['hive'],
badges=['badge1'])
extractor.results = [result_dict]
result_obj = extractor.extract()
......
......@@ -100,7 +100,8 @@ class TestFSElasticsearchJSONLoader(unittest.TestCase):
column_descriptions=['test_comment1', 'test_comment2'],
total_usage=10,
unique_usage=5,
tags=['test_tag1', 'test_tag2'])
tags=['test_tag1', 'test_tag2'],
badges=['badge1'])
loader.load(data)
loader.close()
......@@ -110,7 +111,7 @@ class TestFSElasticsearchJSONLoader(unittest.TestCase):
'"column_names": ["test_col1", "test_col2"], "name": "test_table", '
'"last_updated_timestamp": 123456789, "display_name": "test_schema.test_table", '
'"description": "test_description", "unique_usage": 5, "total_usage": 10, '
'"tags": ["test_tag1", "test_tag2"]}')
'"tags": ["test_tag1", "test_tag2"], "badges": ["badge1"]}')
]
self._check_results_helper(expected=expected)
......@@ -136,7 +137,8 @@ class TestFSElasticsearchJSONLoader(unittest.TestCase):
column_descriptions=['test_comment1', 'test_comment2'],
total_usage=10,
unique_usage=5,
tags=['test_tag1', 'test_tag2'])] * 5
tags=['test_tag1', 'test_tag2'],
badges=['badge1'])] * 5
for d in data:
loader.load(d)
......@@ -148,7 +150,7 @@ class TestFSElasticsearchJSONLoader(unittest.TestCase):
'"column_names": ["test_col1", "test_col2"], "name": "test_table", '
'"last_updated_timestamp": 123456789, "display_name": "test_schema.test_table", '
'"description": "test_description", "unique_usage": 5, "total_usage": 10, '
'"tags": ["test_tag1", "test_tag2"]}')
'"tags": ["test_tag1", "test_tag2"], "badges": ["badge1"]}')
] * 5
self._check_results_helper(expected=expected)
......@@ -22,7 +22,8 @@ class TestTableElasticsearchDocument(unittest.TestCase):
column_descriptions=['test_description1', 'test_description2'],
total_usage=100,
unique_usage=10,
tags=['test'])
tags=['test'],
badges=['badge1'])
expected_document_dict = {"database": "test_database",
"cluster": "test_cluster",
......@@ -36,7 +37,8 @@ class TestTableElasticsearchDocument(unittest.TestCase):
"column_descriptions": ["test_description1", "test_description2"],
"total_usage": 100,
"unique_usage": 10,
"tags": ["test"]
"tags": ["test"],
"badges": ["badge1"]
}
result = test_obj.to_json()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment