Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
A
AmendsenProject
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Shaik Janipasha
AmendsenProject
Commits
8b9456f1
Unverified
Commit
8b9456f1
authored
Jun 19, 2019
by
Tao Feng
Committed by
GitHub
Jun 19, 2019
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Refactor tableESDocument to match schema in search service (#92)
parent
1f15f5a3
Changes
6
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
55 additions
and
54 deletions
+55
-54
neo4j_search_data_extractor.py
databuilder/extractor/neo4j_search_data_extractor.py
+3
-3
table_elasticsearch_document.py
databuilder/models/table_elasticsearch_document.py
+11
-10
setup.py
setup.py
+1
-1
test_neo4j_extractor.py
tests/unit/extractor/test_neo4j_extractor.py
+5
-5
test_file_system_elasticsearch_json_loader.py
...unit/loader/test_file_system_elasticsearch_json_loader.py
+25
-25
test_table_elasticsearch_document.py
tests/unit/models/test_table_elasticsearch_document.py
+10
-10
No files found.
databuilder/extractor/neo4j_search_data_extractor.py
View file @
8b9456f1
...
...
@@ -27,13 +27,13 @@ class Neo4jSearchDataExtractor(Extractor):
OPTIONAL MATCH (table)-[:TAGGED_BY]->(tags:Tag)
OPTIONAL MATCH (table)-[:LAST_UPDATED_AT]->(time_stamp:Timestamp)
RETURN db.name as database, cluster.name AS cluster, schema.name AS schema_name,
table.name AS
table_name, table.key AS table_key, table_description.description AS table_
description,
time_stamp.last_updated_timestamp AS
table_
last_updated_epoch,
table.name AS
name, table.key AS key, table_description.description AS
description,
time_stamp.last_updated_timestamp AS last_updated_epoch,
EXTRACT(c in COLLECT(DISTINCT cols)| c.name) AS column_names,
EXTRACT(cd IN COLLECT(DISTINCT col_description)| cd.description) AS column_descriptions,
REDUCE(sum_r = 0, r in COLLECT(DISTINCT read)| sum_r + r.read_count) AS total_usage,
COUNT(DISTINCT user.email) as unique_usage,
COLLECT(DISTINCT tags.key) as tag
_name
s
COLLECT(DISTINCT tags.key) as tags
ORDER BY table.name;
"""
)
...
...
databuilder/models/table_elasticsearch_document.py
View file @
8b9456f1
...
...
@@ -11,27 +11,28 @@ class TableESDocument(ElasticsearchDocument):
database
,
# type: str
cluster
,
# type: str
schema_name
,
# type: str
table_
name
,
# type: str
table_
key
,
# type: str
table_
description
,
# type: str
table_
last_updated_epoch
,
# type: Optional[int]
name
,
# type: str
key
,
# type: str
description
,
# type: str
last_updated_epoch
,
# type: Optional[int]
column_names
,
# type: List[str]
column_descriptions
,
# type: List[str]
total_usage
,
# type: int
unique_usage
,
# type: int
tag
_name
s
,
# type: List[str]
tags
,
# type: List[str]
):
# type: (...) -> None
self
.
database
=
database
self
.
cluster
=
cluster
self
.
schema_name
=
schema_name
self
.
table_name
=
table_name
self
.
table_key
=
table_key
self
.
table_description
=
table_description
self
.
table_last_updated_epoch
=
int
(
table_last_updated_epoch
)
if
table_last_updated_epoch
else
None
self
.
name
=
name
self
.
key
=
key
self
.
description
=
description
# todo: use last_updated_timestamp to match the record in metadata
self
.
last_updated_epoch
=
int
(
last_updated_epoch
)
if
last_updated_epoch
else
None
self
.
column_names
=
column_names
self
.
column_descriptions
=
column_descriptions
self
.
total_usage
=
total_usage
self
.
unique_usage
=
unique_usage
# todo: will include tag_type once we have better understanding from UI flow.
self
.
tag
_names
=
tag_name
s
self
.
tag
s
=
tag
s
setup.py
View file @
8b9456f1
from
setuptools
import
setup
,
find_packages
__version__
=
'1.3.
1
'
__version__
=
'1.3.
2
'
setup
(
...
...
tests/unit/extractor/test_neo4j_extractor.py
View file @
8b9456f1
...
...
@@ -103,15 +103,15 @@ class TestNeo4jExtractor(unittest.TestCase):
result_dict
=
dict
(
database
=
'test_database'
,
cluster
=
'test_cluster'
,
schema_name
=
'test_schema'
,
table_
name
=
'test_table_name'
,
table_
key
=
'test_table_key'
,
table_
description
=
'test_table_description'
,
table_
last_updated_epoch
=
123456789
,
name
=
'test_table_name'
,
key
=
'test_table_key'
,
description
=
'test_table_description'
,
last_updated_epoch
=
123456789
,
column_names
=
[
'test_col1'
,
'test_col2'
,
'test_col3'
],
column_descriptions
=
[
'test_description1'
,
'test_description2'
,
''
],
total_usage
=
100
,
unique_usage
=
5
,
tag
_name
s
=
[
'hive'
])
tags
=
[
'hive'
])
extractor
.
results
=
[
result_dict
]
result_obj
=
extractor
.
extract
()
...
...
tests/unit/loader/test_file_system_elasticsearch_json_loader.py
View file @
8b9456f1
...
...
@@ -64,15 +64,15 @@ class TestFSElasticsearchJSONLoader(unittest.TestCase):
data
=
dict
(
database
=
'test_database'
,
cluster
=
'test_cluster'
,
schema_name
=
'test_schema'
,
table_
name
=
'test_table'
,
table_
key
=
'test_table_key'
,
table_
last_updated_epoch
=
123456789
,
table_
description
=
'test_description'
,
name
=
'test_table'
,
key
=
'test_table_key'
,
last_updated_epoch
=
123456789
,
description
=
'test_description'
,
column_names
=
[
'test_col1'
,
'test_col2'
],
column_descriptions
=
[
'test_comment1'
,
'test_comment2'
],
total_usage
=
10
,
unique_usage
=
5
,
tag
_name
s
=
[
'test_tag1'
,
'test_tag2'
])
tags
=
[
'test_tag1'
,
'test_tag2'
])
with
self
.
assertRaises
(
Exception
)
as
context
:
loader
.
load
(
data
)
# type: ignore
...
...
@@ -92,25 +92,25 @@ class TestFSElasticsearchJSONLoader(unittest.TestCase):
data
=
TableESDocument
(
database
=
'test_database'
,
cluster
=
'test_cluster'
,
schema_name
=
'test_schema'
,
table_
name
=
'test_table'
,
table_
key
=
'test_table_key'
,
table_
last_updated_epoch
=
123456789
,
table_
description
=
'test_description'
,
name
=
'test_table'
,
key
=
'test_table_key'
,
last_updated_epoch
=
123456789
,
description
=
'test_description'
,
column_names
=
[
'test_col1'
,
'test_col2'
],
column_descriptions
=
[
'test_comment1'
,
'test_comment2'
],
total_usage
=
10
,
unique_usage
=
5
,
tag
_name
s
=
[
'test_tag1'
,
'test_tag2'
])
tags
=
[
'test_tag1'
,
'test_tag2'
])
loader
.
load
(
data
)
loader
.
close
()
expected
=
[
(
'{"
table_
key": "test_table_key", "column_descriptions": ["test_comment1", "test_comment2"], '
(
'{"key": "test_table_key", "column_descriptions": ["test_comment1", "test_comment2"], '
'"schema_name": "test_schema", "database": "test_database", "cluster": "test_cluster", '
'"column_names": ["test_col1", "test_col2"], "
table_
name": "test_table", '
'"
table_
last_updated_epoch": 123456789,'
'"
table_
description": "test_description", "unique_usage": 5, "total_usage": 10, '
'"tag
_name
s": ["test_tag1", "test_tag2"]}'
)
'"column_names": ["test_col1", "test_col2"], "name": "test_table", '
'"last_updated_epoch": 123456789,'
'"description": "test_description", "unique_usage": 5, "total_usage": 10, '
'"tags": ["test_tag1", "test_tag2"]}'
)
]
self
.
_check_results_helper
(
expected
=
expected
)
...
...
@@ -128,27 +128,27 @@ class TestFSElasticsearchJSONLoader(unittest.TestCase):
data
=
[
TableESDocument
(
database
=
'test_database'
,
cluster
=
'test_cluster'
,
schema_name
=
'test_schema'
,
table_
name
=
'test_table'
,
table_
key
=
'test_table_key'
,
table_
last_updated_epoch
=
123456789
,
table_
description
=
'test_description'
,
name
=
'test_table'
,
key
=
'test_table_key'
,
last_updated_epoch
=
123456789
,
description
=
'test_description'
,
column_names
=
[
'test_col1'
,
'test_col2'
],
column_descriptions
=
[
'test_comment1'
,
'test_comment2'
],
total_usage
=
10
,
unique_usage
=
5
,
tag
_name
s
=
[
'test_tag1'
,
'test_tag2'
])]
*
5
tags
=
[
'test_tag1'
,
'test_tag2'
])]
*
5
for
d
in
data
:
loader
.
load
(
d
)
loader
.
close
()
expected
=
[
(
'{"
table_
key": "test_table_key", "column_descriptions": ["test_comment1", "test_comment2"], '
(
'{"key": "test_table_key", "column_descriptions": ["test_comment1", "test_comment2"], '
'"schema_name": "test_schema", "database": "test_database", "cluster": "test_cluster", '
'"column_names": ["test_col1", "test_col2"], "
table_
name": "test_table", '
'"
table_
last_updated_epoch": 123456789,'
'"
table_
description": "test_description", "unique_usage": 5, "total_usage": 10, '
'"tag
_name
s": ["test_tag1", "test_tag2"]}'
)
'"column_names": ["test_col1", "test_col2"], "name": "test_table", '
'"last_updated_epoch": 123456789,'
'"description": "test_description", "unique_usage": 5, "total_usage": 10, '
'"tags": ["test_tag1", "test_tag2"]}'
)
]
*
5
self
.
_check_results_helper
(
expected
=
expected
)
tests/unit/models/test_table_elasticsearch_document.py
View file @
8b9456f1
...
...
@@ -14,28 +14,28 @@ class TestTableElasticsearchDocument(unittest.TestCase):
test_obj
=
TableESDocument
(
database
=
'test_database'
,
cluster
=
'test_cluster'
,
schema_name
=
'test_schema'
,
table_
name
=
'test_table'
,
table_
key
=
'test_table_key'
,
table_
last_updated_epoch
=
123456789
,
table_
description
=
'test_table_description'
,
name
=
'test_table'
,
key
=
'test_table_key'
,
last_updated_epoch
=
123456789
,
description
=
'test_table_description'
,
column_names
=
[
'test_col1'
,
'test_col2'
],
column_descriptions
=
[
'test_description1'
,
'test_description2'
],
total_usage
=
100
,
unique_usage
=
10
,
tag
_name
s
=
[
'test'
])
tags
=
[
'test'
])
expected_document_dict
=
{
"database"
:
"test_database"
,
"cluster"
:
"test_cluster"
,
"schema_name"
:
"test_schema"
,
"
table_
name"
:
"test_table"
,
"
table_
key"
:
"test_table_key"
,
"
table_
last_updated_epoch"
:
123456789
,
"
table_
description"
:
"test_table_description"
,
"name"
:
"test_table"
,
"key"
:
"test_table_key"
,
"last_updated_epoch"
:
123456789
,
"description"
:
"test_table_description"
,
"column_names"
:
[
"test_col1"
,
"test_col2"
],
"column_descriptions"
:
[
"test_description1"
,
"test_description2"
],
"total_usage"
:
100
,
"unique_usage"
:
10
,
"tag
_name
s"
:
[
"test"
]
"tags"
:
[
"test"
]
}
result
=
test_obj
.
to_json
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment