Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
A
AmendsenProject
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Shaik Janipasha
AmendsenProject
Commits
898d067a
Unverified
Commit
898d067a
authored
Mar 21, 2019
by
Tao Feng
Committed by
GitHub
Mar 21, 2019
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Move ESDocument to TableESDocument (#17)
parent
4af09d85
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
143 additions
and
120 deletions
+143
-120
elasticsearch_document.py
databuilder/models/elasticsearch_document.py
+6
-45
table_elasticsearch_document.py
databuilder/models/table_elasticsearch_document.py
+61
-0
elasticsearch_document_transformer.py
...builder/transformer/elasticsearch_document_transformer.py
+15
-15
setup.py
setup.py
+1
-1
test_file_system_elasticsearch_json_loader.py
...unit/loader/test_file_system_elasticsearch_json_loader.py
+29
-29
test_table_elasticsearch_document.py
tests/unit/models/test_table_elasticsearch_document.py
+15
-15
test_elasticsearch_document_transfer.py
.../unit/transformer/test_elasticsearch_document_transfer.py
+16
-15
No files found.
databuilder/models/elasticsearch_document.py
View file @
898d067a
import
json
from
typing
import
List
,
Optional
# noqa: F401
from
abc
import
ABCMeta
,
abstractmethod
class
ElasticsearchDocument
:
"""
Schema for the Search index document
Base class for ElasticsearchDocument
Each different resource ESDoc will be a subclass
"""
def
__init__
(
self
,
elasticsearch_index
,
# type: str
elasticsearch_type
,
# type: str
database
,
# type: str
cluster
,
# type: str
schema_name
,
# type: str
table_name
,
# type: str
table_key
,
# type: str
table_description
,
# type: str
table_last_updated_epoch
,
# type: Optional[int]
column_names
,
# type: List[str]
column_descriptions
,
# type: List[str]
total_usage
,
# type: int
unique_usage
,
# type: int
tag_names
,
# type: List[str]
):
# type: (...) -> None
self
.
elasticsearch_index
=
elasticsearch_index
self
.
elasticsearch_type
=
elasticsearch_type
self
.
database
=
database
self
.
cluster
=
cluster
self
.
schema_name
=
schema_name
self
.
table_name
=
table_name
self
.
table_key
=
table_key
self
.
table_description
=
table_description
self
.
table_last_updated_epoch
=
table_last_updated_epoch
self
.
column_names
=
column_names
self
.
column_descriptions
=
column_descriptions
self
.
total_usage
=
total_usage
self
.
unique_usage
=
unique_usage
# todo: will include tag_type once we have better understanding from UI flow.
self
.
tag_names
=
tag_names
__metaclass__
=
ABCMeta
@
abstractmethod
def
to_json
(
self
):
# type: () -> str
"""
...
...
@@ -47,13 +17,4 @@ class ElasticsearchDocument:
https://www.elastic.co/guide/en/elasticsearch/reference/6.2/docs-bulk.html
:return:
"""
index_row
=
dict
(
index
=
dict
(
_index
=
self
.
elasticsearch_index
,
_type
=
self
.
elasticsearch_type
))
data
=
json
.
dumps
(
index_row
)
+
"
\n
"
# convert rest of the object
obj_dict
=
{
k
:
v
for
k
,
v
in
sorted
(
self
.
__dict__
.
items
())
if
k
not
in
[
'elasticsearch_index'
,
'elasticsearch_type'
]}
data
+=
json
.
dumps
(
obj_dict
)
+
"
\n
"
return
data
pass
databuilder/models/table_elasticsearch_document.py
0 → 100644
View file @
898d067a
import
json
from
typing
import
List
,
Optional
# noqa: F401
from
databuilder.models.elasticsearch_document
import
ElasticsearchDocument
class
TableESDocument
(
ElasticsearchDocument
):
"""
Schema for the Search index document
"""
def
__init__
(
self
,
elasticsearch_index
,
# type: str
elasticsearch_type
,
# type: str
database
,
# type: str
cluster
,
# type: str
schema_name
,
# type: str
table_name
,
# type: str
table_key
,
# type: str
table_description
,
# type: str
table_last_updated_epoch
,
# type: Optional[int]
column_names
,
# type: List[str]
column_descriptions
,
# type: List[str]
total_usage
,
# type: int
unique_usage
,
# type: int
tag_names
,
# type: List[str]
):
# type: (...) -> None
self
.
elasticsearch_index
=
elasticsearch_index
self
.
elasticsearch_type
=
elasticsearch_type
self
.
database
=
database
self
.
cluster
=
cluster
self
.
schema_name
=
schema_name
self
.
table_name
=
table_name
self
.
table_key
=
table_key
self
.
table_description
=
table_description
self
.
table_last_updated_epoch
=
table_last_updated_epoch
self
.
column_names
=
column_names
self
.
column_descriptions
=
column_descriptions
self
.
total_usage
=
total_usage
self
.
unique_usage
=
unique_usage
# todo: will include tag_type once we have better understanding from UI flow.
self
.
tag_names
=
tag_names
def
to_json
(
self
):
# type: () -> str
"""
Convert object to json for elasticsearch bulk upload
Bulk load JSON format is defined here:
https://www.elastic.co/guide/en/elasticsearch/reference/6.2/docs-bulk.html
:return:
"""
index_row
=
dict
(
index
=
dict
(
_index
=
self
.
elasticsearch_index
,
_type
=
self
.
elasticsearch_type
))
data
=
json
.
dumps
(
index_row
)
+
"
\n
"
# convert rest of the object
obj_dict
=
{
k
:
v
for
k
,
v
in
sorted
(
self
.
__dict__
.
items
())
if
k
not
in
[
'elasticsearch_index'
,
'elasticsearch_type'
]}
data
+=
json
.
dumps
(
obj_dict
)
+
"
\n
"
return
data
databuilder/transformer/elasticsearch_document_transformer.py
View file @
898d067a
...
...
@@ -2,7 +2,7 @@ from pyhocon import ConfigTree # noqa: F401
from
typing
import
Optional
# noqa: F401
from
databuilder.transformer.base_transformer
import
Transformer
from
databuilder.models.
elasticsearch_document
import
Elasticsearch
Document
from
databuilder.models.
table_elasticsearch_document
import
TableES
Document
from
databuilder.models.neo4j_data
import
Neo4jDataResult
...
...
@@ -28,20 +28,20 @@ class ElasticsearchDocumentTransformer(Transformer):
if
not
isinstance
(
record
,
Neo4jDataResult
):
raise
Exception
(
"ElasticsearchDocumentTransformer expects record of type 'Neo4jDataResult'!"
)
elasticsearch_obj
=
Elasticsearch
Document
(
elasticsearch_index
=
self
.
elasticsearch_index
,
elasticsearch_type
=
self
.
elasticsearch_type
,
database
=
record
.
database
,
cluster
=
record
.
cluster
,
schema_name
=
record
.
schema_name
,
table_name
=
record
.
table_name
,
table_key
=
record
.
table_key
,
table_description
=
record
.
table_description
,
table_last_updated_epoch
=
record
.
table_last_updated_epoch
,
column_names
=
record
.
column_names
,
column_descriptions
=
record
.
column_descriptions
,
total_usage
=
record
.
total_usage
,
unique_usage
=
record
.
unique_usage
,
tag_names
=
record
.
tag_names
)
elasticsearch_obj
=
TableES
Document
(
elasticsearch_index
=
self
.
elasticsearch_index
,
elasticsearch_type
=
self
.
elasticsearch_type
,
database
=
record
.
database
,
cluster
=
record
.
cluster
,
schema_name
=
record
.
schema_name
,
table_name
=
record
.
table_name
,
table_key
=
record
.
table_key
,
table_description
=
record
.
table_description
,
table_last_updated_epoch
=
record
.
table_last_updated_epoch
,
column_names
=
record
.
column_names
,
column_descriptions
=
record
.
column_descriptions
,
total_usage
=
record
.
total_usage
,
unique_usage
=
record
.
unique_usage
,
tag_names
=
record
.
tag_names
)
return
elasticsearch_obj
def
get_scope
(
self
):
...
...
setup.py
View file @
898d067a
from
setuptools
import
setup
,
find_packages
__version__
=
'1.0.
7
'
__version__
=
'1.0.
8
'
setup
(
...
...
tests/unit/loader/test_file_system_elasticsearch_json_loader.py
View file @
898d067a
...
...
@@ -8,7 +8,7 @@ from typing import Any, List # noqa: F401
from
databuilder
import
Scoped
from
databuilder.loader.file_system_elasticsearch_json_loader
import
FSElasticsearchJSONLoader
from
databuilder.models.
elasticsearch_document
import
Elasticsearch
Document
from
databuilder.models.
table_elasticsearch_document
import
TableES
Document
class
TestFSElasticsearchJSONLoader
(
unittest
.
TestCase
):
...
...
@@ -91,20 +91,20 @@ class TestFSElasticsearchJSONLoader(unittest.TestCase):
loader
.
init
(
conf
=
Scoped
.
get_scoped_conf
(
conf
=
self
.
conf
,
scope
=
loader
.
get_scope
()))
data
=
Elasticsearch
Document
(
elasticsearch_index
=
'test_es_index'
,
elasticsearch_type
=
'test_es_type'
,
database
=
'test_database'
,
cluster
=
'test_cluster'
,
schema_name
=
'test_schema'
,
table_name
=
'test_table'
,
table_key
=
'test_table_key'
,
table_last_updated_epoch
=
123456789
,
table_description
=
'test_description'
,
column_names
=
[
'test_col1'
,
'test_col2'
],
column_descriptions
=
[
'test_comment1'
,
'test_comment2'
],
total_usage
=
10
,
unique_usage
=
5
,
tag_names
=
[
'test_tag1'
,
'test_tag2'
])
data
=
TableES
Document
(
elasticsearch_index
=
'test_es_index'
,
elasticsearch_type
=
'test_es_type'
,
database
=
'test_database'
,
cluster
=
'test_cluster'
,
schema_name
=
'test_schema'
,
table_name
=
'test_table'
,
table_key
=
'test_table_key'
,
table_last_updated_epoch
=
123456789
,
table_description
=
'test_description'
,
column_names
=
[
'test_col1'
,
'test_col2'
],
column_descriptions
=
[
'test_comment1'
,
'test_comment2'
],
total_usage
=
10
,
unique_usage
=
5
,
tag_names
=
[
'test_tag1'
,
'test_tag2'
])
loader
.
load
(
data
)
loader
.
close
()
...
...
@@ -130,20 +130,20 @@ class TestFSElasticsearchJSONLoader(unittest.TestCase):
loader
.
init
(
conf
=
Scoped
.
get_scoped_conf
(
conf
=
self
.
conf
,
scope
=
loader
.
get_scope
()))
data
=
[
Elasticsearch
Document
(
elasticsearch_index
=
'test_es_index'
,
elasticsearch_type
=
'test_es_type'
,
database
=
'test_database'
,
cluster
=
'test_cluster'
,
schema_name
=
'test_schema'
,
table_name
=
'test_table'
,
table_key
=
'test_table_key'
,
table_last_updated_epoch
=
123456789
,
table_description
=
'test_description'
,
column_names
=
[
'test_col1'
,
'test_col2'
],
column_descriptions
=
[
'test_comment1'
,
'test_comment2'
],
total_usage
=
10
,
unique_usage
=
5
,
tag_names
=
[
'test_tag1'
,
'test_tag2'
])]
*
5
data
=
[
TableES
Document
(
elasticsearch_index
=
'test_es_index'
,
elasticsearch_type
=
'test_es_type'
,
database
=
'test_database'
,
cluster
=
'test_cluster'
,
schema_name
=
'test_schema'
,
table_name
=
'test_table'
,
table_key
=
'test_table_key'
,
table_last_updated_epoch
=
123456789
,
table_description
=
'test_description'
,
column_names
=
[
'test_col1'
,
'test_col2'
],
column_descriptions
=
[
'test_comment1'
,
'test_comment2'
],
total_usage
=
10
,
unique_usage
=
5
,
tag_names
=
[
'test_tag1'
,
'test_tag2'
])]
*
5
for
d
in
data
:
loader
.
load
(
d
)
...
...
tests/unit/models/test_elasticsearch_document.py
→
tests/unit/models/test_
table_
elasticsearch_document.py
View file @
898d067a
import
json
import
unittest
from
databuilder.models.
elasticsearch_document
import
Elasticsearch
Document
from
databuilder.models.
table_elasticsearch_document
import
TableES
Document
class
TestElasticsearchDocument
(
unittest
.
TestCase
):
...
...
@@ -11,20 +11,20 @@ class TestElasticsearchDocument(unittest.TestCase):
"""
Test string generated from to_json method
"""
test_obj
=
Elasticsearch
Document
(
elasticsearch_index
=
'test_index'
,
elasticsearch_type
=
'test_type'
,
database
=
'test_database'
,
cluster
=
'test_cluster'
,
schema_name
=
'test_schema'
,
table_name
=
'test_table'
,
table_key
=
'test_table_key'
,
table_last_updated_epoch
=
123456789
,
table_description
=
'test_table_description'
,
column_names
=
[
'test_col1'
,
'test_col2'
],
column_descriptions
=
[
'test_description1'
,
'test_description2'
],
total_usage
=
100
,
unique_usage
=
10
,
tag_names
=
[
'test'
])
test_obj
=
TableES
Document
(
elasticsearch_index
=
'test_index'
,
elasticsearch_type
=
'test_type'
,
database
=
'test_database'
,
cluster
=
'test_cluster'
,
schema_name
=
'test_schema'
,
table_name
=
'test_table'
,
table_key
=
'test_table_key'
,
table_last_updated_epoch
=
123456789
,
table_description
=
'test_table_description'
,
column_names
=
[
'test_col1'
,
'test_col2'
],
column_descriptions
=
[
'test_description1'
,
'test_description2'
],
total_usage
=
100
,
unique_usage
=
10
,
tag_names
=
[
'test'
])
expected_index_dict
=
{
"index"
:
{
"_type"
:
"test_type"
,
"_index"
:
"test_index"
}}
expected_document_dict
=
{
"database"
:
"test_database"
,
...
...
tests/unit/transformer/test_elasticsearch_document_transfer.py
View file @
898d067a
...
...
@@ -5,6 +5,7 @@ from pyhocon import ConfigFactory # noqa: F401
from
databuilder
import
Scoped
from
databuilder.transformer.elasticsearch_document_transformer
import
ElasticsearchDocumentTransformer
from
databuilder.models.elasticsearch_document
import
ElasticsearchDocument
from
databuilder.models.table_elasticsearch_document
import
TableESDocument
from
databuilder.models.neo4j_data
import
Neo4jDataResult
...
...
@@ -71,21 +72,21 @@ class TestElasticsearchDocumentTransformer(unittest.TestCase):
result
=
transformer
.
transform
(
data
)
expected
=
Elasticsearch
Document
(
elasticsearch_index
=
'test_es_index'
,
elasticsearch_type
=
'test_es_type'
,
database
=
"test_database"
,
cluster
=
"test_cluster"
,
schema_name
=
"test_schema_name"
,
table_name
=
"test_table_name"
,
table_key
=
"test_table_key"
,
table_last_updated_epoch
=
123456789
,
table_description
=
"test_table_description"
,
column_names
=
[
"test_col1"
,
"test_col2"
],
column_descriptions
=
[
"test_col_description1"
,
"test_col_description2"
],
total_usage
=
10
,
unique_usage
=
5
,
tag_names
=
[
"test_tag1"
,
"test_tag2"
])
expected
=
TableES
Document
(
elasticsearch_index
=
'test_es_index'
,
elasticsearch_type
=
'test_es_type'
,
database
=
"test_database"
,
cluster
=
"test_cluster"
,
schema_name
=
"test_schema_name"
,
table_name
=
"test_table_name"
,
table_key
=
"test_table_key"
,
table_last_updated_epoch
=
123456789
,
table_description
=
"test_table_description"
,
column_names
=
[
"test_col1"
,
"test_col2"
],
column_descriptions
=
[
"test_col_description1"
,
"test_col_description2"
],
total_usage
=
10
,
unique_usage
=
5
,
tag_names
=
[
"test_tag1"
,
"test_tag2"
])
self
.
assertIsInstance
(
result
,
ElasticsearchDocument
)
self
.
assertDictEqual
(
vars
(
result
),
vars
(
expected
))
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment