Commit a9d8e361 authored by Tao feng's avatar Tao feng

Add User node with different user attributes

parent 4a15ac71
from typing import Iterable, Union, Dict, Any, Iterator # noqa: F401 from typing import Iterable, Union, Dict, Any, Iterator # noqa: F401
from databuilder.models.neo4j_csv_serde import ( from databuilder.models.neo4j_csv_serde import (
Neo4jCsvSerializable, NODE_KEY, NODE_LABEL, RELATION_START_KEY, RELATION_END_KEY, Neo4jCsvSerializable, RELATION_START_KEY, RELATION_END_KEY,
RELATION_START_LABEL, RELATION_END_LABEL, RELATION_TYPE, RELATION_REVERSE_TYPE RELATION_START_LABEL, RELATION_END_LABEL, RELATION_TYPE, RELATION_REVERSE_TYPE
) )
from databuilder.models.table_metadata import TableMetadata from databuilder.models.table_metadata import TableMetadata
from databuilder.models.user import User
from databuilder.publisher.neo4j_csv_publisher import UNQUOTED_SUFFIX from databuilder.publisher.neo4j_csv_publisher import UNQUOTED_SUFFIX
...@@ -42,10 +43,6 @@ class TableColumnUsage(Neo4jCsvSerializable): ...@@ -42,10 +43,6 @@ class TableColumnUsage(Neo4jCsvSerializable):
A model represents user <--> column graph model A model represents user <--> column graph model
Currently it only support to serialize to table level Currently it only support to serialize to table level
""" """
USER_NODE_LABEL = 'User'
USER_NODE_KEY_FORMAT = '{email}'
USER_NODE_EMAIL = 'email'
TABLE_NODE_LABEL = TableMetadata.TABLE_NODE_LABEL TABLE_NODE_LABEL = TableMetadata.TABLE_NODE_LABEL
TABLE_NODE_KEY_FORMAT = TableMetadata.TABLE_KEY_FORMAT TABLE_NODE_KEY_FORMAT = TableMetadata.TABLE_KEY_FORMAT
...@@ -64,7 +61,7 @@ class TableColumnUsage(Neo4jCsvSerializable): ...@@ -64,7 +61,7 @@ class TableColumnUsage(Neo4jCsvSerializable):
raise NotImplementedError('Column is not supported yet {}'.format(col_readers)) raise NotImplementedError('Column is not supported yet {}'.format(col_readers))
self.col_readers = col_readers self.col_readers = col_readers
self._node_iterator = self._create_next_node() self._node_iterator = self._create_node_iterator()
self._rel_iter = self._create_rel_iterator() self._rel_iter = self._create_rel_iterator()
def create_next_node(self): def create_next_node(self):
...@@ -75,16 +72,12 @@ class TableColumnUsage(Neo4jCsvSerializable): ...@@ -75,16 +72,12 @@ class TableColumnUsage(Neo4jCsvSerializable):
except StopIteration: except StopIteration:
return None return None
def _create_next_node(self): def _create_node_iterator(self):
# type: () -> Iterator[Any] # type: () -> Iterator[Any]
for col_reader in self.col_readers: for col_reader in self.col_readers:
if col_reader.column != '*': if col_reader.column == '*':
raise NotImplementedError('Column is not supported yet {}'.format(col_reader)) # using yield for better memory efficiency
yield { yield User(email=col_reader.user_email).create_nodes()[0]
NODE_LABEL: TableColumnUsage.USER_NODE_LABEL,
NODE_KEY: self._get_user_key(col_reader.user_email),
TableColumnUsage.USER_NODE_EMAIL: col_reader.user_email
}
def create_next_relation(self): def create_next_relation(self):
# type: () -> Union[Dict[str, Any], None] # type: () -> Union[Dict[str, Any], None]
...@@ -99,7 +92,7 @@ class TableColumnUsage(Neo4jCsvSerializable): ...@@ -99,7 +92,7 @@ class TableColumnUsage(Neo4jCsvSerializable):
for col_reader in self.col_readers: for col_reader in self.col_readers:
yield { yield {
RELATION_START_LABEL: TableMetadata.TABLE_NODE_LABEL, RELATION_START_LABEL: TableMetadata.TABLE_NODE_LABEL,
RELATION_END_LABEL: TableColumnUsage.USER_NODE_LABEL, RELATION_END_LABEL: User.USER_NODE_LABEL,
RELATION_START_KEY: self._get_table_key(col_reader), RELATION_START_KEY: self._get_table_key(col_reader),
RELATION_END_KEY: self._get_user_key(col_reader.user_email), RELATION_END_KEY: self._get_user_key(col_reader.user_email),
RELATION_TYPE: TableColumnUsage.TABLE_USER_RELATION_TYPE, RELATION_TYPE: TableColumnUsage.TABLE_USER_RELATION_TYPE,
...@@ -116,7 +109,7 @@ class TableColumnUsage(Neo4jCsvSerializable): ...@@ -116,7 +109,7 @@ class TableColumnUsage(Neo4jCsvSerializable):
def _get_user_key(self, email): def _get_user_key(self, email):
# type: (str) -> str # type: (str) -> str
return TableColumnUsage.USER_NODE_KEY_FORMAT.format(email=email) return User.get_user_model_key(email=email)
def __repr__(self): def __repr__(self):
# type: () -> str # type: () -> str
......
...@@ -4,7 +4,7 @@ from databuilder.models.neo4j_csv_serde import Neo4jCsvSerializable, NODE_KEY, \ ...@@ -4,7 +4,7 @@ from databuilder.models.neo4j_csv_serde import Neo4jCsvSerializable, NODE_KEY, \
NODE_LABEL, RELATION_START_KEY, RELATION_START_LABEL, RELATION_END_KEY, \ NODE_LABEL, RELATION_START_KEY, RELATION_START_LABEL, RELATION_END_KEY, \
RELATION_END_LABEL, RELATION_TYPE, RELATION_REVERSE_TYPE RELATION_END_LABEL, RELATION_TYPE, RELATION_REVERSE_TYPE
from databuilder.models.table_column_usage import TableColumnUsage from databuilder.models.user import User
class TableOwner(Neo4jCsvSerializable): class TableOwner(Neo4jCsvSerializable):
...@@ -50,7 +50,7 @@ class TableOwner(Neo4jCsvSerializable): ...@@ -50,7 +50,7 @@ class TableOwner(Neo4jCsvSerializable):
def get_owner_model_key(self, owner # type: str def get_owner_model_key(self, owner # type: str
): ):
# type: (...) -> str # type: (...) -> str
return TableColumnUsage.USER_NODE_KEY_FORMAT.format(email=owner) return User.USER_NODE_KEY_FORMAT.format(email=owner)
def get_metadata_model_key(self): def get_metadata_model_key(self):
# type: (...) -> str # type: (...) -> str
...@@ -70,8 +70,8 @@ class TableOwner(Neo4jCsvSerializable): ...@@ -70,8 +70,8 @@ class TableOwner(Neo4jCsvSerializable):
if owner: if owner:
results.append({ results.append({
NODE_KEY: self.get_owner_model_key(owner), NODE_KEY: self.get_owner_model_key(owner),
NODE_LABEL: TableColumnUsage.USER_NODE_LABEL, NODE_LABEL: User.USER_NODE_LABEL,
TableColumnUsage.USER_NODE_EMAIL: owner User.USER_NODE_EMAIL: owner
}) })
return results return results
...@@ -85,7 +85,7 @@ class TableOwner(Neo4jCsvSerializable): ...@@ -85,7 +85,7 @@ class TableOwner(Neo4jCsvSerializable):
for owner in self.owners: for owner in self.owners:
results.append({ results.append({
RELATION_START_KEY: self.get_owner_model_key(owner), RELATION_START_KEY: self.get_owner_model_key(owner),
RELATION_START_LABEL: TableColumnUsage.USER_NODE_LABEL, RELATION_START_LABEL: User.USER_NODE_LABEL,
RELATION_END_KEY: self.get_metadata_model_key(), RELATION_END_KEY: self.get_metadata_model_key(),
RELATION_END_LABEL: 'Table', RELATION_END_LABEL: 'Table',
RELATION_TYPE: TableOwner.OWNER_TABLE_RELATION_TYPE, RELATION_TYPE: TableOwner.OWNER_TABLE_RELATION_TYPE,
......
from typing import Union, Dict, Any # noqa: F401
from databuilder.models.neo4j_csv_serde import Neo4jCsvSerializable, NODE_KEY, \
NODE_LABEL, RELATION_START_KEY, RELATION_START_LABEL, RELATION_END_KEY, \
RELATION_END_LABEL, RELATION_TYPE, RELATION_REVERSE_TYPE
class User(Neo4jCsvSerializable):
# type: (...) -> None
"""
User model. This model doesn't define any relationship.
"""
USER_NODE_LABEL = 'User'
USER_NODE_KEY_FORMAT = '{email}'
USER_NODE_EMAIL = 'email'
USER_NODE_FIRST_NAME = 'first_name'
USER_NODE_LAST_NAME = 'last_name'
USER_NODE_FULL_NAME = 'full_name'
USER_NODE_GITHUB_NAME = 'github_username'
USER_NODE_TEAM = 'team_name'
USER_NODE_EMPLOYEE_TYPE = 'employee_type'
USER_NODE_MANAGER_EMAIL = 'manager_email'
USER_NODE_SLACK_ID = 'slack_id'
USER_NODE_IS_ACTIVE = 'is_active'
USER_NODE_UPDATED_AT = 'updated_at'
USER_MANAGER_RELATION_TYPE = 'MANAGE_BY'
MANAGER_USER_RELATION_TYPE = 'MANAGE'
def __init__(self,
email, # type: str
first_name='', # type: str
last_name='', # type: str
name='', # type: str
github_username='', # type: str
team_name='', # type: str
employee_type='', # type: str
manager_email='', # type: str
slack_id='', # type: str
is_active=True, # type: bool
updated_at=0, # type: int
):
# type: (...) -> None
"""
This class models user node for Amundsen people.
:param first_name:
:param last_name:
:param name:
:param email:
:param github_username:
:param team_name:
:param employee_type:
:param manager_email:
:param is_active:
:param updated_at: everytime we update the node, we will push the timestamp.
then we will have a cron job to update the ex-employee nodes based on
the case if this timestamp hasn't been updated for two weeks.
"""
self.first_name = first_name
self.last_name = last_name
self.name = name
self.email = email
self.github_username = github_username
# todo: team will be a separate node once Amundsen People supports team
self.team_name = team_name
self.manager_email = manager_email
self.employee_type = employee_type
# this attr not available in team service, either update team service, update with FE
self.slack_id = slack_id
self.is_active = is_active
self.updated_at = updated_at
self._node_iter = iter(self.create_nodes())
self._rel_iter = iter(self.create_relation())
def create_next_node(self):
# type: (...) -> Union[Dict[str, Any], None]
# return the string representation of the data
try:
return next(self._node_iter)
except StopIteration:
return None
def create_next_relation(self):
# type: () -> Union[Dict[str, Any], None]
"""
:return:
"""
try:
return next(self._rel_iter)
except StopIteration:
return None
@classmethod
def get_user_model_key(cls,
email=None):
# type: (...) -> str
if not email:
return ''
return User.USER_NODE_KEY_FORMAT.format(email=email)
def create_nodes(self):
# type: () -> List[Dict[str, Any]]
"""
Create a list of Neo4j node records
:return:
"""
result_node = {
NODE_KEY: User.get_user_model_key(email=self.email),
NODE_LABEL: User.USER_NODE_LABEL,
User.USER_NODE_EMAIL: self.email,
User.USER_NODE_IS_ACTIVE: self.is_active,
}
if self.first_name:
result_node[User.USER_NODE_FIRST_NAME] = self.first_name
if self.last_name:
result_node[User.USER_NODE_LAST_NAME] = self.last_name
if self.name:
result_node[User.USER_NODE_FULL_NAME] = self.name
if self.github_username:
result_node[User.USER_NODE_GITHUB_NAME] = self.github_username
if self.team_name:
result_node[User.USER_NODE_TEAM] = self.team_name
if self.employee_type:
result_node[User.USER_NODE_EMPLOYEE_TYPE] = self.employee_type
if self.slack_id:
result_node[User.USER_NODE_SLACK_ID] = self.slack_id
if self.updated_at:
result_node[User.USER_NODE_UPDATED_AT] = self.updated_at
return [result_node]
def create_relation(self):
# type: () -> List[Dict[str, Any]]
if self.manager_email:
# only create the relation if the manager exists
return [{
RELATION_START_KEY: User.get_user_model_key(email=self.email),
RELATION_START_LABEL: User.USER_NODE_LABEL,
RELATION_END_KEY: self.get_user_model_key(email=self.manager_email),
RELATION_END_LABEL: User.USER_NODE_LABEL,
RELATION_TYPE: User.USER_MANAGER_RELATION_TYPE,
RELATION_REVERSE_TYPE: User.MANAGER_USER_RELATION_TYPE
}]
return []
def __repr__(self):
# type: () -> str
return 'User({!r}, {!r}, {!r}, {!r}, {!r}, ' \
'{!r}, {!r}, {!r}, {!r}, {!r}, {!r},)'.format(self.first_name,
self.last_name,
self.name,
self.email,
self.github_username,
self.team_name,
self.slack_id,
self.manager_email,
self.employee_type,
self.is_active)
from setuptools import setup, find_packages from setuptools import setup, find_packages
__version__ = '1.0.1' __version__ = '1.0.2'
setup( setup(
name='amundsen-databuilder', name='amundsen-databuilder',
......
...@@ -22,8 +22,14 @@ class TestTableColumnUsage(unittest.TestCase): ...@@ -22,8 +22,14 @@ class TestTableColumnUsage(unittest.TestCase):
actual.append(node_row) actual.append(node_row)
node_row = table_col_usage.next_node() node_row = table_col_usage.next_node()
expected = [{'email': 'john@example.com', 'KEY': 'john@example.com', 'LABEL': 'User'}, expected = [{'is_active': True,
{'email': 'jane@example.com', 'KEY': 'jane@example.com', 'LABEL': 'User'}] 'LABEL': 'User',
'KEY': 'john@example.com',
'email': 'john@example.com'},
{'is_active': True,
'LABEL': 'User',
'KEY': 'jane@example.com',
'email': 'jane@example.com'}]
self.assertEqual(expected, actual) self.assertEqual(expected, actual)
rel_row = table_col_usage.next_relation() rel_row = table_col_usage.next_relation()
......
import unittest import unittest
from databuilder.models.table_column_usage import TableColumnUsage from databuilder.models.user import User
from databuilder.models.table_owner import TableOwner from databuilder.models.table_owner import TableOwner
...@@ -46,7 +46,7 @@ class TestTableOwner(unittest.TestCase): ...@@ -46,7 +46,7 @@ class TestTableOwner(unittest.TestCase):
relation = { relation = {
RELATION_START_KEY: 'user1@1', RELATION_START_KEY: 'user1@1',
RELATION_START_LABEL: TableColumnUsage.USER_NODE_LABEL, RELATION_START_LABEL: User.USER_NODE_LABEL,
RELATION_END_KEY: self.table_owner.get_metadata_model_key(), RELATION_END_KEY: self.table_owner.get_metadata_model_key(),
RELATION_END_LABEL: 'Table', RELATION_END_LABEL: 'Table',
RELATION_TYPE: TableOwner.OWNER_TABLE_RELATION_TYPE, RELATION_TYPE: TableOwner.OWNER_TABLE_RELATION_TYPE,
......
import unittest
from databuilder.models.neo4j_csv_serde import RELATION_START_KEY, RELATION_START_LABEL, RELATION_END_KEY, \
RELATION_END_LABEL, RELATION_TYPE, RELATION_REVERSE_TYPE
from databuilder.models.user import User
class TestUser(unittest.TestCase):
def setUp(self):
# type: () -> None
super(TestUser, self).setUp()
self.user = User(first_name='test_first',
last_name='test_last',
name='test_first test_last',
email='test@email.com',
github_username='github_test',
team_name='test_team',
employee_type='FTE',
manager_email='test_manager@email.com',
slack_id='slack',
is_active=True,
updated_at=1)
def test_get_user_model_key(self):
# type: () -> None
user_email = User.get_user_model_key(email=self.user.email)
self.assertEquals(user_email, '{email}'.format(email='test@email.com'))
def test_create_nodes(self):
# type: () -> None
nodes = self.user.create_nodes()
self.assertEquals(len(nodes), 1)
def test_create_relation(self):
# type: () -> None
relations = self.user.create_relation()
self.assertEquals(len(relations), 1)
start_key = '{email}'.format(email='test@email.com')
end_key = '{email}'.format(email='test_manager@email.com')
relation = {
RELATION_START_KEY: start_key,
RELATION_START_LABEL: User.USER_NODE_LABEL,
RELATION_END_KEY: end_key,
RELATION_END_LABEL: User.USER_NODE_LABEL,
RELATION_TYPE: User.USER_MANAGER_RELATION_TYPE,
RELATION_REVERSE_TYPE: User.MANAGER_USER_RELATION_TYPE
}
self.assertTrue(relation in relations)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment