Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
A
AmendsenProject
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Shaik Janipasha
AmendsenProject
Commits
a4d049fe
Unverified
Commit
a4d049fe
authored
Apr 24, 2020
by
Jin Hyuk Chang
Committed by
GitHub
Apr 24, 2020
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Added Description on Schema for ES document on table (#245)
* Added Description on Schema * Update
parent
5f7224a8
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
26 additions
and
11 deletions
+26
-11
neo4j_search_data_extractor.py
databuilder/extractor/neo4j_search_data_extractor.py
+8
-4
table_elasticsearch_document.py
databuilder/models/table_elasticsearch_document.py
+2
-0
sample_schema_description.csv
example/sample_data/sample_schema_description.csv
+2
-0
sample_data_loader.py
example/scripts/sample_data_loader.py
+2
-0
test_neo4j_extractor.py
tests/unit/extractor/test_neo4j_extractor.py
+2
-1
test_file_system_elasticsearch_json_loader.py
...unit/loader/test_file_system_elasticsearch_json_loader.py
+6
-4
test_table_elasticsearch_document.py
tests/unit/models/test_table_elasticsearch_document.py
+4
-2
No files found.
databuilder/extractor/neo4j_search_data_extractor.py
View file @
a4d049fe
...
@@ -23,19 +23,23 @@ class Neo4jSearchDataExtractor(Extractor):
...
@@ -23,19 +23,23 @@ class Neo4jSearchDataExtractor(Extractor):
<-[:SCHEMA_OF]-(schema:Schema)<-[:TABLE_OF]-(table:Table)
<-[:SCHEMA_OF]-(schema:Schema)<-[:TABLE_OF]-(table:Table)
{publish_tag_filter}
{publish_tag_filter}
OPTIONAL MATCH (table)-[:DESCRIPTION]->(table_description:Description)
OPTIONAL MATCH (table)-[:DESCRIPTION]->(table_description:Description)
OPTIONAL MATCH (schema)-[:DESCRIPTION]->(schema_description:Description)
OPTIONAL MATCH (table)-[:TAGGED_BY]->(tags:Tag) WHERE tags.tag_type='default'
OPTIONAL MATCH (table)-[:TAGGED_BY]->(tags:Tag) WHERE tags.tag_type='default'
WITH db, cluster, schema, table, table_description, COLLECT(DISTINCT tags.key) as tags
WITH db, cluster, schema,
schema_description,
table, table_description, COLLECT(DISTINCT tags.key) as tags
OPTIONAL MATCH (table)-[:TAGGED_BY]->(badges:Tag) WHERE badges.tag_type='badge'
OPTIONAL MATCH (table)-[:TAGGED_BY]->(badges:Tag) WHERE badges.tag_type='badge'
WITH db, cluster, schema, table, table_description, tags, COLLECT(DISTINCT badges.key) as badges
WITH db, cluster, schema, schema_description, table, table_description, tags, COLLECT(DISTINCT badges.key) AS
badges
OPTIONAL MATCH (table)-[read:READ_BY]->(user:User)
OPTIONAL MATCH (table)-[read:READ_BY]->(user:User)
WITH db, cluster, schema, table, table_description, tags, badges, SUM(read.read_count) AS total_usage,
WITH db, cluster, schema, schema_description, table, table_description, tags, badges, SUM(read.read_count) AS
total_usage,
COUNT(DISTINCT user.email) as unique_usage
COUNT(DISTINCT user.email) as unique_usage
OPTIONAL MATCH (table)-[:COLUMN]->(col:Column)
OPTIONAL MATCH (table)-[:COLUMN]->(col:Column)
OPTIONAL MATCH (col)-[:DESCRIPTION]->(col_description:Description)
OPTIONAL MATCH (col)-[:DESCRIPTION]->(col_description:Description)
WITH db, cluster, schema, table, table_description, tags, badges, total_usage, unique_usage,
WITH db, cluster, schema,
schema_description,
table, table_description, tags, badges, total_usage, unique_usage,
COLLECT(col.name) AS column_names, COLLECT(col_description.description) AS column_descriptions
COLLECT(col.name) AS column_names, COLLECT(col_description.description) AS column_descriptions
OPTIONAL MATCH (table)-[:LAST_UPDATED_AT]->(time_stamp:Timestamp)
OPTIONAL MATCH (table)-[:LAST_UPDATED_AT]->(time_stamp:Timestamp)
RETURN db.name as database, cluster.name AS cluster, schema.name AS schema,
RETURN db.name as database, cluster.name AS cluster, schema.name AS schema,
schema_description.description AS schema_description,
table.name AS name, table.key AS key, table_description.description AS description,
table.name AS name, table.key AS key, table_description.description AS description,
time_stamp.last_updated_timestamp AS last_updated_timestamp,
time_stamp.last_updated_timestamp AS last_updated_timestamp,
column_names,
column_names,
...
...
databuilder/models/table_elasticsearch_document.py
View file @
a4d049fe
...
@@ -22,6 +22,7 @@ class TableESDocument(ElasticsearchDocument):
...
@@ -22,6 +22,7 @@ class TableESDocument(ElasticsearchDocument):
tags
,
# type: List[str],
tags
,
# type: List[str],
badges
=
None
,
# type: Optional[List[str]]
badges
=
None
,
# type: Optional[List[str]]
display_name
=
None
,
# type: Optional[str]
display_name
=
None
,
# type: Optional[str]
schema_description
=
None
,
# type: Optional[str]
):
):
# type: (...) -> None
# type: (...) -> None
self
.
database
=
database
self
.
database
=
database
...
@@ -40,3 +41,4 @@ class TableESDocument(ElasticsearchDocument):
...
@@ -40,3 +41,4 @@ class TableESDocument(ElasticsearchDocument):
# todo: will include tag_type once we have better understanding from UI flow.
# todo: will include tag_type once we have better understanding from UI flow.
self
.
tags
=
tags
self
.
tags
=
tags
self
.
badges
=
badges
self
.
badges
=
badges
self
.
schema_description
=
schema_description
example/sample_data/sample_schema_description.csv
0 → 100644
View file @
a4d049fe
schema_key,schema,description
hive://gold.test_schema,test_schema,"test schema description"
\ No newline at end of file
example/scripts/sample_data_loader.py
View file @
a4d049fe
...
@@ -264,6 +264,8 @@ if __name__ == "__main__":
...
@@ -264,6 +264,8 @@ if __name__ == "__main__":
'databuilder.models.table_metadata.TagMetadata'
)
'databuilder.models.table_metadata.TagMetadata'
)
run_csv_job
(
'example/sample_data/sample_table_last_updated.csv'
,
'test_table_last_updated_metadata'
,
run_csv_job
(
'example/sample_data/sample_table_last_updated.csv'
,
'test_table_last_updated_metadata'
,
'databuilder.models.table_last_updated.TableLastUpdated'
)
'databuilder.models.table_last_updated.TableLastUpdated'
)
run_csv_job
(
'example/sample_data/sample_schema_description.csv'
,
'test_schema_description'
,
'databuilder.models.schema.schema.SchemaModel'
)
create_last_updated_job
()
.
launch
()
create_last_updated_job
()
.
launch
()
...
...
tests/unit/extractor/test_neo4j_extractor.py
View file @
a4d049fe
...
@@ -113,7 +113,8 @@ class TestNeo4jExtractor(unittest.TestCase):
...
@@ -113,7 +113,8 @@ class TestNeo4jExtractor(unittest.TestCase):
total_usage
=
100
,
total_usage
=
100
,
unique_usage
=
5
,
unique_usage
=
5
,
tags
=
[
'hive'
],
tags
=
[
'hive'
],
badges
=
[
'badge1'
])
badges
=
[
'badge1'
],
schema_description
=
'schema_description'
)
extractor
.
results
=
[
result_dict
]
extractor
.
results
=
[
result_dict
]
result_obj
=
extractor
.
extract
()
result_obj
=
extractor
.
extract
()
...
...
tests/unit/loader/test_file_system_elasticsearch_json_loader.py
View file @
a4d049fe
...
@@ -101,7 +101,8 @@ class TestFSElasticsearchJSONLoader(unittest.TestCase):
...
@@ -101,7 +101,8 @@ class TestFSElasticsearchJSONLoader(unittest.TestCase):
total_usage
=
10
,
total_usage
=
10
,
unique_usage
=
5
,
unique_usage
=
5
,
tags
=
[
'test_tag1'
,
'test_tag2'
],
tags
=
[
'test_tag1'
,
'test_tag2'
],
badges
=
[
'badge1'
])
badges
=
[
'badge1'
],
schema_description
=
'schema description'
)
loader
.
load
(
data
)
loader
.
load
(
data
)
loader
.
close
()
loader
.
close
()
...
@@ -111,7 +112,7 @@ class TestFSElasticsearchJSONLoader(unittest.TestCase):
...
@@ -111,7 +112,7 @@ class TestFSElasticsearchJSONLoader(unittest.TestCase):
'"column_names": ["test_col1", "test_col2"], "name": "test_table", '
'"column_names": ["test_col1", "test_col2"], "name": "test_table", '
'"last_updated_timestamp": 123456789, "display_name": "test_schema.test_table", '
'"last_updated_timestamp": 123456789, "display_name": "test_schema.test_table", '
'"description": "test_description", "unique_usage": 5, "total_usage": 10, '
'"description": "test_description", "unique_usage": 5, "total_usage": 10, '
'"tags": ["test_tag1", "test_tag2"], "badges": ["badge1"]}'
)
'"tags": ["test_tag1", "test_tag2"], "badges": ["badge1"]
, "schema_description": "schema description"
}'
)
]
]
self
.
_check_results_helper
(
expected
=
expected
)
self
.
_check_results_helper
(
expected
=
expected
)
...
@@ -138,7 +139,8 @@ class TestFSElasticsearchJSONLoader(unittest.TestCase):
...
@@ -138,7 +139,8 @@ class TestFSElasticsearchJSONLoader(unittest.TestCase):
total_usage
=
10
,
total_usage
=
10
,
unique_usage
=
5
,
unique_usage
=
5
,
tags
=
[
'test_tag1'
,
'test_tag2'
],
tags
=
[
'test_tag1'
,
'test_tag2'
],
badges
=
[
'badge1'
])]
*
5
badges
=
[
'badge1'
],
schema_description
=
'schema_description'
)]
*
5
for
d
in
data
:
for
d
in
data
:
loader
.
load
(
d
)
loader
.
load
(
d
)
...
@@ -150,7 +152,7 @@ class TestFSElasticsearchJSONLoader(unittest.TestCase):
...
@@ -150,7 +152,7 @@ class TestFSElasticsearchJSONLoader(unittest.TestCase):
'"column_names": ["test_col1", "test_col2"], "name": "test_table", '
'"column_names": ["test_col1", "test_col2"], "name": "test_table", '
'"last_updated_timestamp": 123456789, "display_name": "test_schema.test_table", '
'"last_updated_timestamp": 123456789, "display_name": "test_schema.test_table", '
'"description": "test_description", "unique_usage": 5, "total_usage": 10, '
'"description": "test_description", "unique_usage": 5, "total_usage": 10, '
'"tags": ["test_tag1", "test_tag2"], "badges": ["badge1"]}'
)
'"tags": ["test_tag1", "test_tag2"], "badges": ["badge1"]
, "schema_description": "schema_description"
}'
)
]
*
5
]
*
5
self
.
_check_results_helper
(
expected
=
expected
)
self
.
_check_results_helper
(
expected
=
expected
)
tests/unit/models/test_table_elasticsearch_document.py
View file @
a4d049fe
...
@@ -23,7 +23,8 @@ class TestTableElasticsearchDocument(unittest.TestCase):
...
@@ -23,7 +23,8 @@ class TestTableElasticsearchDocument(unittest.TestCase):
total_usage
=
100
,
total_usage
=
100
,
unique_usage
=
10
,
unique_usage
=
10
,
tags
=
[
'test'
],
tags
=
[
'test'
],
badges
=
[
'badge1'
])
badges
=
[
'badge1'
],
schema_description
=
'schema description'
)
expected_document_dict
=
{
"database"
:
"test_database"
,
expected_document_dict
=
{
"database"
:
"test_database"
,
"cluster"
:
"test_cluster"
,
"cluster"
:
"test_cluster"
,
...
@@ -38,7 +39,8 @@ class TestTableElasticsearchDocument(unittest.TestCase):
...
@@ -38,7 +39,8 @@ class TestTableElasticsearchDocument(unittest.TestCase):
"total_usage"
:
100
,
"total_usage"
:
100
,
"unique_usage"
:
10
,
"unique_usage"
:
10
,
"tags"
:
[
"test"
],
"tags"
:
[
"test"
],
"badges"
:
[
"badge1"
]
"badges"
:
[
"badge1"
],
'schema_description'
:
'schema description'
}
}
result
=
test_obj
.
to_json
()
result
=
test_obj
.
to_json
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment