Unverified Commit 13d6018a authored by Jin Hyuk Chang's avatar Jin Hyuk Chang Committed by GitHub

Optimize Neo4j Cypher query on Neo4jSearchDataExtractor (#213)

* Optimize Neo4j Cypher query on Neo4jSearchDataExtractor

* flake8
parent 7c3ad118
......@@ -21,21 +21,27 @@ class Neo4jSearchDataExtractor(Extractor):
MATCH (db:Database)<-[:CLUSTER_OF]-(cluster:Cluster)<-[:SCHEMA_OF]-(schema:Schema)<-[:TABLE_OF]-(table:Table)
{publish_tag_filter}
OPTIONAL MATCH (table)-[:DESCRIPTION]->(table_description:Description)
OPTIONAL MATCH (table)-[read:READ_BY]->(user:User)
OPTIONAL MATCH (table)-[:COLUMN]->(cols:Column)
OPTIONAL MATCH (cols)-[:DESCRIPTION]->(col_description:Description)
OPTIONAL MATCH (table)-[:TAGGED_BY]->(tags:Tag) WHERE tags.tag_type='default'
WITH db, cluster, schema, table, table_description, COLLECT(DISTINCT tags.key) as tags
OPTIONAL MATCH (table)-[:TAGGED_BY]->(badges:Tag) WHERE badges.tag_type='badge'
WITH db, cluster, schema, table, table_description, tags, COLLECT(DISTINCT badges.key) as badges
OPTIONAL MATCH (table)-[read:READ_BY]->(user:User)
WITH db, cluster, schema, table, table_description, tags, badges, SUM(read.read_count) AS total_usage,
COUNT(DISTINCT user.email) as unique_usage
OPTIONAL MATCH (table)-[:COLUMN]->(col:Column)
OPTIONAL MATCH (col)-[:DESCRIPTION]->(col_description:Description)
WITH db, cluster, schema, table, table_description, tags, badges, total_usage, unique_usage,
COLLECT(col.name) AS column_names, COLLECT(col_description.description) AS column_descriptions
OPTIONAL MATCH (table)-[:LAST_UPDATED_AT]->(time_stamp:Timestamp)
RETURN db.name as database, cluster.name AS cluster, schema.name AS schema,
table.name AS name, table.key AS key, table_description.description AS description,
time_stamp.last_updated_timestamp AS last_updated_timestamp,
EXTRACT(c in COLLECT(DISTINCT cols)| c.name) AS column_names,
EXTRACT(cd IN COLLECT(DISTINCT col_description)| cd.description) AS column_descriptions,
REDUCE(sum_r = 0, r in COLLECT(DISTINCT read)| sum_r + r.read_count) AS total_usage,
COUNT(DISTINCT user.email) as unique_usage,
COLLECT(DISTINCT tags.key) as tags,
COLLECT(DISTINCT badges.key) as badges
column_names,
column_descriptions,
total_usage,
unique_usage,
tags,
badges
ORDER BY table.name;
"""
)
......
......@@ -2,7 +2,7 @@ import os
from setuptools import setup, find_packages
__version__ = '2.2.0'
__version__ = '2.3.1'
requirements_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'requirements.txt')
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment