Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
A
AmendsenProject
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Shaik Janipasha
AmendsenProject
Commits
975a59a2
Unverified
Commit
975a59a2
authored
Apr 20, 2020
by
Jin Hyuk Chang
Committed by
GitHub
Apr 20, 2020
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Add description to schema (#242)
* Add description to schema * Update * Update
parent
cb6dc2ed
Changes
11
Show whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
241 additions
and
16 deletions
+241
-16
generic_extractor.py
databuilder/extractor/generic_extractor.py
+1
-1
__init__.py
databuilder/models/schema/__init__.py
+0
-0
schema.py
databuilder/models/schema/schema.py
+58
-0
schema_constant.py
databuilder/models/schema/schema_constant.py
+8
-0
table_metadata.py
databuilder/models/table_metadata.py
+5
-4
base_transformer.py
databuilder/transformer/base_transformer.py
+10
-5
setup.py
setup.py
+1
-1
test_generic_extractor.py
tests/unit/extractor/test_generic_extractor.py
+22
-5
__init__.py
tests/unit/models/schema/__init__.py
+0
-0
test_schema.py
tests/unit/models/schema/test_schema.py
+82
-0
test_chained_transformer.py
tests/unit/transformer/test_chained_transformer.py
+54
-0
No files found.
databuilder/extractor/generic_extractor.py
View file @
975a59a2
...
...
@@ -32,7 +32,7 @@ class GenericExtractor(Extractor):
self
.
_iter
=
iter
(
results
)
else
:
raise
RuntimeError
(
'model class needs to be provided!'
)
self
.
_iter
=
iter
(
self
.
values
)
def
extract
(
self
):
# type: () -> Any
...
...
databuilder/models/schema/__init__.py
0 → 100644
View file @
975a59a2
databuilder/models/schema/schema.py
0 → 100644
View file @
975a59a2
from
typing
import
Dict
,
Any
,
Union
,
Iterator
# noqa: F401
from
databuilder.models.neo4j_csv_serde
import
(
Neo4jCsvSerializable
,
NODE_LABEL
,
NODE_KEY
)
from
databuilder.models.schema.schema_constant
import
SCHEMA_NODE_LABEL
,
SCHEMA_NAME_ATTR
from
databuilder.models.table_metadata
import
DescriptionMetadata
class
SchemaModel
(
Neo4jCsvSerializable
):
def
__init__
(
self
,
schema_key
,
schema
,
description
=
None
,
description_source
=
None
,
**
kwargs
):
self
.
_schema_key
=
schema_key
self
.
_schema
=
schema
self
.
_description
=
DescriptionMetadata
.
create_description_metadata
(
text
=
description
,
source
=
description_source
)
\
if
description
else
None
self
.
_node_iterator
=
self
.
_create_node_iterator
()
self
.
_relation_iterator
=
self
.
_create_relation_iterator
()
def
create_next_node
(
self
):
# type: () -> Union[Dict[str, Any], None]
try
:
return
next
(
self
.
_node_iterator
)
except
StopIteration
:
return
None
def
_create_node_iterator
(
self
):
# type: () -> Iterator[[Dict[str, Any]]]
yield
{
NODE_LABEL
:
SCHEMA_NODE_LABEL
,
NODE_KEY
:
self
.
_schema_key
,
SCHEMA_NAME_ATTR
:
self
.
_schema
,
}
if
self
.
_description
:
yield
self
.
_description
.
get_node_dict
(
self
.
_get_description_node_key
())
def
create_next_relation
(
self
):
# type: () -> Union[Dict[str, Any], None]
try
:
return
next
(
self
.
_relation_iterator
)
except
StopIteration
:
return
None
def
_get_description_node_key
(
self
):
return
'{}/{}'
.
format
(
self
.
_schema_key
,
self
.
_description
.
get_description_id
())
def
_create_relation_iterator
(
self
):
# type: () -> Iterator[[Dict[str, Any]]]
if
self
.
_description
:
yield
self
.
_description
.
get_relation
(
start_node
=
SCHEMA_NODE_LABEL
,
start_key
=
self
.
_schema_key
,
end_key
=
self
.
_get_description_node_key
())
databuilder/models/schema/schema_constant.py
0 → 100644
View file @
975a59a2
SCHEMA_NODE_LABEL
=
'Schema'
SCHEMA_NAME_ATTR
=
'name'
SCHEMA_RELATION_TYPE
=
'SCHEMA'
SCHEMA_REVERSE_RELATION_TYPE
=
'SCHEMA_OF'
DATABASE_SCHEMA_KEY_FORMAT
=
'{db}://{cluster}.{schema}'
databuilder/models/table_metadata.py
View file @
975a59a2
...
...
@@ -8,6 +8,7 @@ from databuilder.models.neo4j_csv_serde import (
Neo4jCsvSerializable
,
NODE_LABEL
,
NODE_KEY
,
RELATION_START_KEY
,
RELATION_END_KEY
,
RELATION_START_LABEL
,
RELATION_END_LABEL
,
RELATION_TYPE
,
RELATION_REVERSE_TYPE
)
from
databuilder.publisher.neo4j_csv_publisher
import
UNQUOTED_SUFFIX
from
databuilder.models.schema
import
schema_constant
DESCRIPTION_NODE_LABEL_VAL
=
'Description'
DESCRIPTION_NODE_LABEL
=
DESCRIPTION_NODE_LABEL_VAL
...
...
@@ -208,11 +209,11 @@ class TableMetadata(Neo4jCsvSerializable):
CLUSTER_NODE_LABEL
=
cluster_constants
.
CLUSTER_NODE_LABEL
CLUSTER_KEY_FORMAT
=
'{db}://{cluster}'
CLUSTER_SCHEMA_RELATION_TYPE
=
'SCHEMA'
SCHEMA_CLUSTER_RELATION_TYPE
=
'SCHEMA_OF'
CLUSTER_SCHEMA_RELATION_TYPE
=
schema_constant
.
SCHEMA_RELATION_TYPE
SCHEMA_CLUSTER_RELATION_TYPE
=
schema_constant
.
SCHEMA_REVERSE_RELATION_TYPE
SCHEMA_NODE_LABEL
=
'Schema'
SCHEMA_KEY_FORMAT
=
'{db}://{cluster}.{schema}'
SCHEMA_NODE_LABEL
=
schema_constant
.
SCHEMA_NODE_LABEL
SCHEMA_KEY_FORMAT
=
schema_constant
.
DATABASE_SCHEMA_KEY_FORMAT
SCHEMA_TABLE_RELATION_TYPE
=
'TABLE'
TABLE_SCHEMA_RELATION_TYPE
=
'TABLE_OF'
...
...
databuilder/transformer/base_transformer.py
View file @
975a59a2
import
abc
from
pyhocon
import
ConfigTree
# noqa: F401
from
typing
import
Any
,
Iterable
# noqa: F401
from
typing
import
Any
,
Iterable
,
Optional
# noqa: F401
from
databuilder
import
Scoped
...
...
@@ -42,13 +42,18 @@ class ChainedTransformer(Transformer):
"""
A chained transformer that iterates transformers and transforms a record
"""
def
__init__
(
self
,
transformers
):
# type: (Iterable[Transformer]) -> None
def
__init__
(
self
,
transformers
,
is_init_transformers
=
False
):
# type: (Iterable[Transformer], Optional[bool]) -> None
self
.
transformers
=
transformers
self
.
is_init_transformers
=
is_init_transformers
def
init
(
self
,
conf
):
# type: (ConfigTree) -> None
pass
if
self
.
is_init_transformers
:
for
transformer
in
self
.
transformers
:
transformer
.
init
(
Scoped
.
get_scoped_conf
(
conf
,
transformer
.
get_scope
()))
def
transform
(
self
,
record
):
# type: (Any) -> Any
...
...
@@ -62,7 +67,7 @@ class ChainedTransformer(Transformer):
def
get_scope
(
self
):
# type: () -> str
pass
return
'transformer.chained'
def
close
(
self
):
# type: () -> None
...
...
setup.py
View file @
975a59a2
...
...
@@ -2,7 +2,7 @@ import os
from
setuptools
import
setup
,
find_packages
__version__
=
'2.5.
3
'
__version__
=
'2.5.
4
'
requirements_path
=
os
.
path
.
join
(
os
.
path
.
dirname
(
os
.
path
.
realpath
(
__file__
)),
'requirements.txt'
)
with
open
(
requirements_path
)
as
requirements_file
:
...
...
tests/unit/extractor/test_generic_extractor.py
View file @
975a59a2
...
...
@@ -8,23 +8,40 @@ from databuilder.extractor.generic_extractor import GenericExtractor
class
TestGenericExtractor
(
unittest
.
TestCase
):
def
setUp
(
self
):
def
test_extraction_with_model_class
(
self
):
# type: () -> None
"""
Test Extraction using model class
"""
config_dict
=
{
'extractor.generic.extraction_items'
:
[{
'timestamp'
:
10000000
}],
'extractor.generic.model_class'
:
'databuilder.models.neo4j_es_last_updated.Neo4jESLastUpdated'
,
}
conf
=
ConfigFactory
.
from_dict
(
config_dict
)
extractor
=
GenericExtractor
()
self
.
conf
=
ConfigFactory
.
from_dict
(
config_dict
)
extractor
.
init
(
Scoped
.
get_scoped_conf
(
conf
=
conf
,
scope
=
extractor
.
get_scope
()))
def
test_extraction_with_model_class
(
self
):
result
=
extractor
.
extract
()
self
.
assertEquals
(
result
.
timestamp
,
10000000
)
def
test_extraction_without_model_class
(
self
):
# type: () -> None
"""
Test Extraction using model class
"""
config_dict
=
{
'extractor.generic.extraction_items'
:
[{
'foo'
:
1
},
{
'bar'
:
2
}],
}
conf
=
ConfigFactory
.
from_dict
(
config_dict
)
extractor
=
GenericExtractor
()
extractor
.
init
(
Scoped
.
get_scoped_conf
(
conf
=
self
.
conf
,
self
.
conf
=
ConfigFactory
.
from_dict
(
config_dict
)
extractor
.
init
(
Scoped
.
get_scoped_conf
(
conf
=
conf
,
scope
=
extractor
.
get_scope
()))
result
=
extractor
.
extract
(
)
self
.
assertEquals
(
result
.
timestamp
,
10000000
)
self
.
assertEquals
(
extractor
.
extract
(),
{
'foo'
:
1
}
)
self
.
assertEquals
(
extractor
.
extract
(),
{
'bar'
:
2
}
)
tests/unit/models/schema/__init__.py
0 → 100644
View file @
975a59a2
tests/unit/models/schema/test_schema.py
0 → 100644
View file @
975a59a2
import
unittest
from
databuilder.models.schema.schema
import
SchemaModel
class
TestSchemaDescription
(
unittest
.
TestCase
):
def
test_create_nodes
(
self
):
# type: () -> None
schema
=
SchemaModel
(
schema_key
=
'db://cluster.schema'
,
schema
=
'schema_name'
,
description
=
'foo'
)
self
.
assertDictEqual
(
schema
.
create_next_node
(),
{
'name'
:
'schema_name'
,
'KEY'
:
'db://cluster.schema'
,
'LABEL'
:
'Schema'
})
self
.
assertDictEqual
(
schema
.
create_next_node
(),
{
'description_source'
:
'description'
,
'description'
:
'foo'
,
'KEY'
:
'db://cluster.schema/_description'
,
'LABEL'
:
'Description'
})
self
.
assertIsNone
(
schema
.
create_next_node
())
def
test_create_nodes_no_description
(
self
):
# type: () -> None
schema
=
SchemaModel
(
schema_key
=
'db://cluster.schema'
,
schema
=
'schema_name'
)
self
.
assertDictEqual
(
schema
.
create_next_node
(),
{
'name'
:
'schema_name'
,
'KEY'
:
'db://cluster.schema'
,
'LABEL'
:
'Schema'
})
self
.
assertIsNone
(
schema
.
create_next_node
())
def
test_create_nodes_programmatic_description
(
self
):
# type: () -> None
schema
=
SchemaModel
(
schema_key
=
'db://cluster.schema'
,
schema
=
'schema_name'
,
description
=
'foo'
,
description_source
=
'bar'
)
self
.
assertDictEqual
(
schema
.
create_next_node
(),
{
'name'
:
'schema_name'
,
'KEY'
:
'db://cluster.schema'
,
'LABEL'
:
'Schema'
})
self
.
assertDictEqual
(
schema
.
create_next_node
(),
{
'description_source'
:
'bar'
,
'description'
:
'foo'
,
'KEY'
:
'db://cluster.schema/_bar_description'
,
'LABEL'
:
'Programmatic_Description'
})
self
.
assertIsNone
(
schema
.
create_next_node
())
def
test_create_relation
(
self
):
# type: () -> None
schema
=
SchemaModel
(
schema_key
=
'db://cluster.schema'
,
schema
=
'schema_name'
,
description
=
'foo'
)
actual
=
schema
.
create_next_relation
()
expected
=
{
'END_KEY'
:
'db://cluster.schema/_description'
,
'START_LABEL'
:
'Schema'
,
'END_LABEL'
:
'Description'
,
'START_KEY'
:
'db://cluster.schema'
,
'TYPE'
:
'DESCRIPTION'
,
'REVERSE_TYPE'
:
'DESCRIPTION_OF'
}
self
.
assertEqual
(
expected
,
actual
)
self
.
assertIsNone
(
schema
.
create_next_relation
())
def
test_create_relation_no_description
(
self
):
# type: () -> None
schema
=
SchemaModel
(
schema_key
=
'db://cluster.schema'
,
schema
=
'schema_name'
)
self
.
assertIsNone
(
schema
.
create_next_relation
())
def
test_create_relation_programmatic_description
(
self
):
# type: () -> None
schema
=
SchemaModel
(
schema_key
=
'db://cluster.schema'
,
schema
=
'schema_name'
,
description
=
'foo'
,
description_source
=
'bar'
)
actual
=
schema
.
create_next_relation
()
expected
=
{
'END_KEY'
:
'db://cluster.schema/_bar_description'
,
'START_LABEL'
:
'Schema'
,
'END_LABEL'
:
'Programmatic_Description'
,
'START_KEY'
:
'db://cluster.schema'
,
'TYPE'
:
'DESCRIPTION'
,
'REVERSE_TYPE'
:
'DESCRIPTION_OF'
}
self
.
assertEqual
(
expected
,
actual
)
self
.
assertIsNone
(
schema
.
create_next_relation
())
tests/unit/transformer/test_chained_transformer.py
0 → 100644
View file @
975a59a2
import
unittest
from
mock
import
MagicMock
from
pyhocon
import
ConfigFactory
from
databuilder.transformer.base_transformer
import
ChainedTransformer
class
TestChainedTransformer
(
unittest
.
TestCase
):
def
test_init_not_called
(
self
):
# type: () -> None
mock_transformer1
=
MagicMock
()
mock_transformer2
=
MagicMock
()
chained_transformer
=
ChainedTransformer
(
transformers
=
[
mock_transformer1
,
mock_transformer2
])
config
=
ConfigFactory
.
from_dict
({})
chained_transformer
.
init
(
conf
=
config
)
chained_transformer
.
transform
(
{
'foo'
:
'bar'
}
)
mock_transformer1
.
init
.
assert_not_called
()
mock_transformer1
.
transform
.
assert_called_once
()
mock_transformer2
.
init
.
assert_not_called
()
mock_transformer2
.
transform
.
assert_called_once
()
def
test_init_called
(
self
):
# type: () -> None
mock_transformer1
=
MagicMock
()
mock_transformer1
.
get_scope
.
return_value
=
'foo'
mock_transformer2
=
MagicMock
()
mock_transformer2
.
get_scope
.
return_value
=
'bar'
chained_transformer
=
ChainedTransformer
(
transformers
=
[
mock_transformer1
,
mock_transformer2
],
is_init_transformers
=
True
)
config
=
ConfigFactory
.
from_dict
({})
chained_transformer
.
init
(
conf
=
config
)
chained_transformer
.
transform
(
{
'foo'
:
'bar'
}
)
mock_transformer1
.
init
.
assert_called_once
()
mock_transformer1
.
transform
.
assert_called_once
()
mock_transformer2
.
init
.
assert_called_once
()
mock_transformer2
.
transform
.
assert_called_once
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment