Unverified Commit 824c9e48 authored by Tao Feng's avatar Tao Feng Committed by GitHub

Add default cypher query for user/dashboard entity to search extractor (#220)

parent cf7a28ce
...@@ -15,10 +15,12 @@ class Neo4jSearchDataExtractor(Extractor): ...@@ -15,10 +15,12 @@ class Neo4jSearchDataExtractor(Extractor):
Use Neo4jExtractor extractor class Use Neo4jExtractor extractor class
""" """
CYPHER_QUERY_CONFIG_KEY = 'cypher_query' CYPHER_QUERY_CONFIG_KEY = 'cypher_query'
ENTITY_TYPE = 'entity_type'
DEFAULT_NEO4J_CYPHER_QUERY = textwrap.dedent( DEFAULT_NEO4J_TABLE_CYPHER_QUERY = textwrap.dedent(
""" """
MATCH (db:Database)<-[:CLUSTER_OF]-(cluster:Cluster)<-[:SCHEMA_OF]-(schema:Schema)<-[:TABLE_OF]-(table:Table) MATCH (db:Database)<-[:CLUSTER_OF]-(cluster:Cluster)
<-[:SCHEMA_OF]-(schema:Schema)<-[:TABLE_OF]-(table:Table)
{publish_tag_filter} {publish_tag_filter}
OPTIONAL MATCH (table)-[:DESCRIPTION]->(table_description:Description) OPTIONAL MATCH (table)-[:DESCRIPTION]->(table_description:Description)
OPTIONAL MATCH (table)-[:TAGGED_BY]->(tags:Tag) WHERE tags.tag_type='default' OPTIONAL MATCH (table)-[:TAGGED_BY]->(tags:Tag) WHERE tags.tag_type='default'
...@@ -46,19 +48,68 @@ class Neo4jSearchDataExtractor(Extractor): ...@@ -46,19 +48,68 @@ class Neo4jSearchDataExtractor(Extractor):
""" """
) )
DEFAULT_NEO4J_USER_CYPHER_QUERY = textwrap.dedent(
"""
MATCH (user:User)
OPTIONAL MATCH (user)-[read:READ]->(a)
OPTIONAL MATCH (user)-[own:OWNER_OF]->(b)
OPTIONAL MATCH (user)-[follow:FOLLOWED_BY]->(c)
OPTIONAL MATCH (user)-[manage_by:MANAGE_BY]->(manager)
{publish_tag_filter}
with user, a, b, c, read, own, follow, manager
where user.full_name is not null
return user.email as email, user.first_name as first_name, user.last_name as last_name,
user.full_name as full_name, user.github_username as github_username, user.team_name as team_name,
user.employee_type as employee_type, manager.email as manager_email,
user.slack_id as slack_id, user.is_active as is_active,
REDUCE(sum_r = 0, r in COLLECT(DISTINCT read)| sum_r + r.read_count) AS total_read,
count(distinct b) as total_own,
count(distinct c) AS total_follow
order by user.email
"""
)
# todo: 1. change total_read once we have the usage;
# 2. add more fields once we have in the graph; 3. change mode to generic once add more support for dashboard
DEFAULT_NEO4J_DASHBOARD_CYPHER_QUERY = textwrap.dedent(
"""
MATCH (db:Dashboard)
OPTIONAL MATCH (db)-[:DASHBOARD_OF]->(dbg:Dashboardgroup)
OPTIONAL MATCH (db)-[:DESCRIPTION]->(db_descr:Description)
OPTIONAL MATCH (dbg)-[:DESCRIPTION]->(dbg_descr:Description)
{publish_tag_filter}
with db, dbg, db_descr, dbg_descr
where db.name is not null
return dbg.name as dashboard_group, db.name as dashboard_name,
coalesce(db_descr.description, '') as description,
coalesce(dbg.description, '') as dashboard_group_description,
'mode' as product,
1 AS total_usage
order by dbg.name
"""
)
# todo: we will add more once we add more entities
DEFAULT_QUERY_BY_ENTITY = {
'table': DEFAULT_NEO4J_TABLE_CYPHER_QUERY,
'user': DEFAULT_NEO4J_USER_CYPHER_QUERY,
'dashboard': DEFAULT_NEO4J_DASHBOARD_CYPHER_QUERY
}
def init(self, conf): def init(self, conf):
# type: (ConfigTree) -> None # type: (ConfigTree) -> None
""" """
Initialize Neo4jExtractor object from configuration and use that for extraction Initialize Neo4jExtractor object from configuration and use that for extraction
""" """
self.conf = conf self.conf = conf
# extract cypher query from conf, if specified, else use default query # extract cypher query from conf, if specified, else use default query
if Neo4jSearchDataExtractor.CYPHER_QUERY_CONFIG_KEY in conf: if Neo4jSearchDataExtractor.CYPHER_QUERY_CONFIG_KEY in conf:
self.cypher_query = conf.get_string(Neo4jSearchDataExtractor.CYPHER_QUERY_CONFIG_KEY) self.cypher_query = conf.get_string(Neo4jSearchDataExtractor.CYPHER_QUERY_CONFIG_KEY)
else: else:
entity_type = conf.get_string(Neo4jSearchDataExtractor.ENTITY_TYPE, default='table').lower()
default_query = Neo4jSearchDataExtractor.DEFAULT_QUERY_BY_ENTITY[entity_type]
self.cypher_query = self._add_publish_tag_filter(conf.get_string(JOB_PUBLISH_TAG, ''), self.cypher_query = self._add_publish_tag_filter(conf.get_string(JOB_PUBLISH_TAG, ''),
Neo4jSearchDataExtractor.DEFAULT_NEO4J_CYPHER_QUERY) cypher_query=default_query)
self.neo4j_extractor = Neo4jExtractor() self.neo4j_extractor = Neo4jExtractor()
# write the cypher query in configs in Neo4jExtractor scope # write the cypher query in configs in Neo4jExtractor scope
...@@ -98,5 +149,4 @@ class Neo4jSearchDataExtractor(Extractor): ...@@ -98,5 +149,4 @@ class Neo4jSearchDataExtractor(Extractor):
publish_tag_filter = '' publish_tag_filter = ''
else: else:
publish_tag_filter = """WHERE table.published_tag = '{}'""".format(publish_tag) publish_tag_filter = """WHERE table.published_tag = '{}'""".format(publish_tag)
return cypher_query.format(publish_tag_filter=publish_tag_filter) return cypher_query.format(publish_tag_filter=publish_tag_filter)
import unittest import unittest
from mock import patch
from pyhocon import ConfigFactory
from databuilder import Scoped
from databuilder.extractor.neo4j_extractor import Neo4jExtractor
from databuilder.extractor.neo4j_search_data_extractor import Neo4jSearchDataExtractor from databuilder.extractor.neo4j_search_data_extractor import Neo4jSearchDataExtractor
...@@ -18,6 +23,25 @@ class TestNeo4jExtractor(unittest.TestCase): ...@@ -18,6 +23,25 @@ class TestNeo4jExtractor(unittest.TestCase):
self.assertEqual(actual, """MATCH (table:Table) RETURN table""") self.assertEqual(actual, """MATCH (table:Table) RETURN table""")
def test_default_search_query(self):
# type: (Any) -> None
with patch.object(Neo4jExtractor, '_get_driver'):
extractor = Neo4jSearchDataExtractor()
conf = ConfigFactory.from_dict({
'extractor.search_data.extractor.neo4j.{}'.format(Neo4jExtractor.GRAPH_URL_CONFIG_KEY):
'test-endpoint',
'extractor.search_data.extractor.neo4j.{}'.format(Neo4jExtractor.NEO4J_AUTH_USER):
'test-user',
'extractor.search_data.extractor.neo4j.{}'.format(Neo4jExtractor.NEO4J_AUTH_PW):
'test-passwd',
'extractor.search_data.{}'.format(Neo4jSearchDataExtractor.ENTITY_TYPE):
'dashboard',
})
extractor.init(Scoped.get_scoped_conf(conf=conf,
scope=extractor.get_scope()))
self.assertEqual(extractor.cypher_query, Neo4jSearchDataExtractor
.DEFAULT_NEO4J_DASHBOARD_CYPHER_QUERY.format(publish_tag_filter=''))
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment