Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
A
AmendsenProject
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Shaik Janipasha
AmendsenProject
Commits
8b9456f1
Unverified
Commit
8b9456f1
authored
Jun 19, 2019
by
Tao Feng
Committed by
GitHub
Jun 19, 2019
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Refactor tableESDocument to match schema in search service (#92)
parent
1f15f5a3
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
55 additions
and
54 deletions
+55
-54
neo4j_search_data_extractor.py
databuilder/extractor/neo4j_search_data_extractor.py
+3
-3
table_elasticsearch_document.py
databuilder/models/table_elasticsearch_document.py
+11
-10
setup.py
setup.py
+1
-1
test_neo4j_extractor.py
tests/unit/extractor/test_neo4j_extractor.py
+5
-5
test_file_system_elasticsearch_json_loader.py
...unit/loader/test_file_system_elasticsearch_json_loader.py
+25
-25
test_table_elasticsearch_document.py
tests/unit/models/test_table_elasticsearch_document.py
+10
-10
No files found.
databuilder/extractor/neo4j_search_data_extractor.py
View file @
8b9456f1
...
@@ -27,13 +27,13 @@ class Neo4jSearchDataExtractor(Extractor):
...
@@ -27,13 +27,13 @@ class Neo4jSearchDataExtractor(Extractor):
OPTIONAL MATCH (table)-[:TAGGED_BY]->(tags:Tag)
OPTIONAL MATCH (table)-[:TAGGED_BY]->(tags:Tag)
OPTIONAL MATCH (table)-[:LAST_UPDATED_AT]->(time_stamp:Timestamp)
OPTIONAL MATCH (table)-[:LAST_UPDATED_AT]->(time_stamp:Timestamp)
RETURN db.name as database, cluster.name AS cluster, schema.name AS schema_name,
RETURN db.name as database, cluster.name AS cluster, schema.name AS schema_name,
table.name AS
table_name, table.key AS table_key, table_description.description AS table_
description,
table.name AS
name, table.key AS key, table_description.description AS
description,
time_stamp.last_updated_timestamp AS
table_
last_updated_epoch,
time_stamp.last_updated_timestamp AS last_updated_epoch,
EXTRACT(c in COLLECT(DISTINCT cols)| c.name) AS column_names,
EXTRACT(c in COLLECT(DISTINCT cols)| c.name) AS column_names,
EXTRACT(cd IN COLLECT(DISTINCT col_description)| cd.description) AS column_descriptions,
EXTRACT(cd IN COLLECT(DISTINCT col_description)| cd.description) AS column_descriptions,
REDUCE(sum_r = 0, r in COLLECT(DISTINCT read)| sum_r + r.read_count) AS total_usage,
REDUCE(sum_r = 0, r in COLLECT(DISTINCT read)| sum_r + r.read_count) AS total_usage,
COUNT(DISTINCT user.email) as unique_usage,
COUNT(DISTINCT user.email) as unique_usage,
COLLECT(DISTINCT tags.key) as tag
_name
s
COLLECT(DISTINCT tags.key) as tags
ORDER BY table.name;
ORDER BY table.name;
"""
"""
)
)
...
...
databuilder/models/table_elasticsearch_document.py
View file @
8b9456f1
...
@@ -11,27 +11,28 @@ class TableESDocument(ElasticsearchDocument):
...
@@ -11,27 +11,28 @@ class TableESDocument(ElasticsearchDocument):
database
,
# type: str
database
,
# type: str
cluster
,
# type: str
cluster
,
# type: str
schema_name
,
# type: str
schema_name
,
# type: str
table_
name
,
# type: str
name
,
# type: str
table_
key
,
# type: str
key
,
# type: str
table_
description
,
# type: str
description
,
# type: str
table_
last_updated_epoch
,
# type: Optional[int]
last_updated_epoch
,
# type: Optional[int]
column_names
,
# type: List[str]
column_names
,
# type: List[str]
column_descriptions
,
# type: List[str]
column_descriptions
,
# type: List[str]
total_usage
,
# type: int
total_usage
,
# type: int
unique_usage
,
# type: int
unique_usage
,
# type: int
tag
_name
s
,
# type: List[str]
tags
,
# type: List[str]
):
):
# type: (...) -> None
# type: (...) -> None
self
.
database
=
database
self
.
database
=
database
self
.
cluster
=
cluster
self
.
cluster
=
cluster
self
.
schema_name
=
schema_name
self
.
schema_name
=
schema_name
self
.
table_name
=
table_name
self
.
name
=
name
self
.
table_key
=
table_key
self
.
key
=
key
self
.
table_description
=
table_description
self
.
description
=
description
self
.
table_last_updated_epoch
=
int
(
table_last_updated_epoch
)
if
table_last_updated_epoch
else
None
# todo: use last_updated_timestamp to match the record in metadata
self
.
last_updated_epoch
=
int
(
last_updated_epoch
)
if
last_updated_epoch
else
None
self
.
column_names
=
column_names
self
.
column_names
=
column_names
self
.
column_descriptions
=
column_descriptions
self
.
column_descriptions
=
column_descriptions
self
.
total_usage
=
total_usage
self
.
total_usage
=
total_usage
self
.
unique_usage
=
unique_usage
self
.
unique_usage
=
unique_usage
# todo: will include tag_type once we have better understanding from UI flow.
# todo: will include tag_type once we have better understanding from UI flow.
self
.
tag
_names
=
tag_name
s
self
.
tag
s
=
tag
s
setup.py
View file @
8b9456f1
from
setuptools
import
setup
,
find_packages
from
setuptools
import
setup
,
find_packages
__version__
=
'1.3.
1
'
__version__
=
'1.3.
2
'
setup
(
setup
(
...
...
tests/unit/extractor/test_neo4j_extractor.py
View file @
8b9456f1
...
@@ -103,15 +103,15 @@ class TestNeo4jExtractor(unittest.TestCase):
...
@@ -103,15 +103,15 @@ class TestNeo4jExtractor(unittest.TestCase):
result_dict
=
dict
(
database
=
'test_database'
,
result_dict
=
dict
(
database
=
'test_database'
,
cluster
=
'test_cluster'
,
cluster
=
'test_cluster'
,
schema_name
=
'test_schema'
,
schema_name
=
'test_schema'
,
table_
name
=
'test_table_name'
,
name
=
'test_table_name'
,
table_
key
=
'test_table_key'
,
key
=
'test_table_key'
,
table_
description
=
'test_table_description'
,
description
=
'test_table_description'
,
table_
last_updated_epoch
=
123456789
,
last_updated_epoch
=
123456789
,
column_names
=
[
'test_col1'
,
'test_col2'
,
'test_col3'
],
column_names
=
[
'test_col1'
,
'test_col2'
,
'test_col3'
],
column_descriptions
=
[
'test_description1'
,
'test_description2'
,
''
],
column_descriptions
=
[
'test_description1'
,
'test_description2'
,
''
],
total_usage
=
100
,
total_usage
=
100
,
unique_usage
=
5
,
unique_usage
=
5
,
tag
_name
s
=
[
'hive'
])
tags
=
[
'hive'
])
extractor
.
results
=
[
result_dict
]
extractor
.
results
=
[
result_dict
]
result_obj
=
extractor
.
extract
()
result_obj
=
extractor
.
extract
()
...
...
tests/unit/loader/test_file_system_elasticsearch_json_loader.py
View file @
8b9456f1
...
@@ -64,15 +64,15 @@ class TestFSElasticsearchJSONLoader(unittest.TestCase):
...
@@ -64,15 +64,15 @@ class TestFSElasticsearchJSONLoader(unittest.TestCase):
data
=
dict
(
database
=
'test_database'
,
data
=
dict
(
database
=
'test_database'
,
cluster
=
'test_cluster'
,
cluster
=
'test_cluster'
,
schema_name
=
'test_schema'
,
schema_name
=
'test_schema'
,
table_
name
=
'test_table'
,
name
=
'test_table'
,
table_
key
=
'test_table_key'
,
key
=
'test_table_key'
,
table_
last_updated_epoch
=
123456789
,
last_updated_epoch
=
123456789
,
table_
description
=
'test_description'
,
description
=
'test_description'
,
column_names
=
[
'test_col1'
,
'test_col2'
],
column_names
=
[
'test_col1'
,
'test_col2'
],
column_descriptions
=
[
'test_comment1'
,
'test_comment2'
],
column_descriptions
=
[
'test_comment1'
,
'test_comment2'
],
total_usage
=
10
,
total_usage
=
10
,
unique_usage
=
5
,
unique_usage
=
5
,
tag
_name
s
=
[
'test_tag1'
,
'test_tag2'
])
tags
=
[
'test_tag1'
,
'test_tag2'
])
with
self
.
assertRaises
(
Exception
)
as
context
:
with
self
.
assertRaises
(
Exception
)
as
context
:
loader
.
load
(
data
)
# type: ignore
loader
.
load
(
data
)
# type: ignore
...
@@ -92,25 +92,25 @@ class TestFSElasticsearchJSONLoader(unittest.TestCase):
...
@@ -92,25 +92,25 @@ class TestFSElasticsearchJSONLoader(unittest.TestCase):
data
=
TableESDocument
(
database
=
'test_database'
,
data
=
TableESDocument
(
database
=
'test_database'
,
cluster
=
'test_cluster'
,
cluster
=
'test_cluster'
,
schema_name
=
'test_schema'
,
schema_name
=
'test_schema'
,
table_
name
=
'test_table'
,
name
=
'test_table'
,
table_
key
=
'test_table_key'
,
key
=
'test_table_key'
,
table_
last_updated_epoch
=
123456789
,
last_updated_epoch
=
123456789
,
table_
description
=
'test_description'
,
description
=
'test_description'
,
column_names
=
[
'test_col1'
,
'test_col2'
],
column_names
=
[
'test_col1'
,
'test_col2'
],
column_descriptions
=
[
'test_comment1'
,
'test_comment2'
],
column_descriptions
=
[
'test_comment1'
,
'test_comment2'
],
total_usage
=
10
,
total_usage
=
10
,
unique_usage
=
5
,
unique_usage
=
5
,
tag
_name
s
=
[
'test_tag1'
,
'test_tag2'
])
tags
=
[
'test_tag1'
,
'test_tag2'
])
loader
.
load
(
data
)
loader
.
load
(
data
)
loader
.
close
()
loader
.
close
()
expected
=
[
expected
=
[
(
'{"
table_
key": "test_table_key", "column_descriptions": ["test_comment1", "test_comment2"], '
(
'{"key": "test_table_key", "column_descriptions": ["test_comment1", "test_comment2"], '
'"schema_name": "test_schema", "database": "test_database", "cluster": "test_cluster", '
'"schema_name": "test_schema", "database": "test_database", "cluster": "test_cluster", '
'"column_names": ["test_col1", "test_col2"], "
table_
name": "test_table", '
'"column_names": ["test_col1", "test_col2"], "name": "test_table", '
'"
table_
last_updated_epoch": 123456789,'
'"last_updated_epoch": 123456789,'
'"
table_
description": "test_description", "unique_usage": 5, "total_usage": 10, '
'"description": "test_description", "unique_usage": 5, "total_usage": 10, '
'"tag
_name
s": ["test_tag1", "test_tag2"]}'
)
'"tags": ["test_tag1", "test_tag2"]}'
)
]
]
self
.
_check_results_helper
(
expected
=
expected
)
self
.
_check_results_helper
(
expected
=
expected
)
...
@@ -128,27 +128,27 @@ class TestFSElasticsearchJSONLoader(unittest.TestCase):
...
@@ -128,27 +128,27 @@ class TestFSElasticsearchJSONLoader(unittest.TestCase):
data
=
[
TableESDocument
(
database
=
'test_database'
,
data
=
[
TableESDocument
(
database
=
'test_database'
,
cluster
=
'test_cluster'
,
cluster
=
'test_cluster'
,
schema_name
=
'test_schema'
,
schema_name
=
'test_schema'
,
table_
name
=
'test_table'
,
name
=
'test_table'
,
table_
key
=
'test_table_key'
,
key
=
'test_table_key'
,
table_
last_updated_epoch
=
123456789
,
last_updated_epoch
=
123456789
,
table_
description
=
'test_description'
,
description
=
'test_description'
,
column_names
=
[
'test_col1'
,
'test_col2'
],
column_names
=
[
'test_col1'
,
'test_col2'
],
column_descriptions
=
[
'test_comment1'
,
'test_comment2'
],
column_descriptions
=
[
'test_comment1'
,
'test_comment2'
],
total_usage
=
10
,
total_usage
=
10
,
unique_usage
=
5
,
unique_usage
=
5
,
tag
_name
s
=
[
'test_tag1'
,
'test_tag2'
])]
*
5
tags
=
[
'test_tag1'
,
'test_tag2'
])]
*
5
for
d
in
data
:
for
d
in
data
:
loader
.
load
(
d
)
loader
.
load
(
d
)
loader
.
close
()
loader
.
close
()
expected
=
[
expected
=
[
(
'{"
table_
key": "test_table_key", "column_descriptions": ["test_comment1", "test_comment2"], '
(
'{"key": "test_table_key", "column_descriptions": ["test_comment1", "test_comment2"], '
'"schema_name": "test_schema", "database": "test_database", "cluster": "test_cluster", '
'"schema_name": "test_schema", "database": "test_database", "cluster": "test_cluster", '
'"column_names": ["test_col1", "test_col2"], "
table_
name": "test_table", '
'"column_names": ["test_col1", "test_col2"], "name": "test_table", '
'"
table_
last_updated_epoch": 123456789,'
'"last_updated_epoch": 123456789,'
'"
table_
description": "test_description", "unique_usage": 5, "total_usage": 10, '
'"description": "test_description", "unique_usage": 5, "total_usage": 10, '
'"tag
_name
s": ["test_tag1", "test_tag2"]}'
)
'"tags": ["test_tag1", "test_tag2"]}'
)
]
*
5
]
*
5
self
.
_check_results_helper
(
expected
=
expected
)
self
.
_check_results_helper
(
expected
=
expected
)
tests/unit/models/test_table_elasticsearch_document.py
View file @
8b9456f1
...
@@ -14,28 +14,28 @@ class TestTableElasticsearchDocument(unittest.TestCase):
...
@@ -14,28 +14,28 @@ class TestTableElasticsearchDocument(unittest.TestCase):
test_obj
=
TableESDocument
(
database
=
'test_database'
,
test_obj
=
TableESDocument
(
database
=
'test_database'
,
cluster
=
'test_cluster'
,
cluster
=
'test_cluster'
,
schema_name
=
'test_schema'
,
schema_name
=
'test_schema'
,
table_
name
=
'test_table'
,
name
=
'test_table'
,
table_
key
=
'test_table_key'
,
key
=
'test_table_key'
,
table_
last_updated_epoch
=
123456789
,
last_updated_epoch
=
123456789
,
table_
description
=
'test_table_description'
,
description
=
'test_table_description'
,
column_names
=
[
'test_col1'
,
'test_col2'
],
column_names
=
[
'test_col1'
,
'test_col2'
],
column_descriptions
=
[
'test_description1'
,
'test_description2'
],
column_descriptions
=
[
'test_description1'
,
'test_description2'
],
total_usage
=
100
,
total_usage
=
100
,
unique_usage
=
10
,
unique_usage
=
10
,
tag
_name
s
=
[
'test'
])
tags
=
[
'test'
])
expected_document_dict
=
{
"database"
:
"test_database"
,
expected_document_dict
=
{
"database"
:
"test_database"
,
"cluster"
:
"test_cluster"
,
"cluster"
:
"test_cluster"
,
"schema_name"
:
"test_schema"
,
"schema_name"
:
"test_schema"
,
"
table_
name"
:
"test_table"
,
"name"
:
"test_table"
,
"
table_
key"
:
"test_table_key"
,
"key"
:
"test_table_key"
,
"
table_
last_updated_epoch"
:
123456789
,
"last_updated_epoch"
:
123456789
,
"
table_
description"
:
"test_table_description"
,
"description"
:
"test_table_description"
,
"column_names"
:
[
"test_col1"
,
"test_col2"
],
"column_names"
:
[
"test_col1"
,
"test_col2"
],
"column_descriptions"
:
[
"test_description1"
,
"test_description2"
],
"column_descriptions"
:
[
"test_description1"
,
"test_description2"
],
"total_usage"
:
100
,
"total_usage"
:
100
,
"unique_usage"
:
10
,
"unique_usage"
:
10
,
"tag
_name
s"
:
[
"test"
]
"tags"
:
[
"test"
]
}
}
result
=
test_obj
.
to_json
()
result
=
test_obj
.
to_json
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment