Unverified Commit 6802ab13 authored by Nathan Lawrence's avatar Nathan Lawrence Committed by GitHub

feat: Add an extractor for pulling user information from BambooHR (#369)

Signed-off-by: 's avatarNathan Lawrence <nathanlawrence@asana.com>
parent 88c05522
......@@ -877,6 +877,29 @@ job = DefaultJob(conf=job_config,
job.launch()
```
### [BamboohrUserExtractor](./databuilder/extractor/user/bamboohr/bamboohr_user_extractor.py)
The included `BamboohrUserExtractor` provides support for extracting basic user metadata from [BambooHR](https://www.bamboohr.com/). For companies and organizations that use BambooHR to store employee information such as email addresses, first names, last names, titles, and departments, use the `BamboohrUserExtractor` to populate Amundsen user data.
A sample job config is shown below.
```python
extractor = BamboohrUserExtractor()
task = DefaultTask(extractor=extractor, loader=FsNeo4jCSVLoader())
job_config = ConfigFactory.from_dict({
'extractor.bamboohr_user.api_key': api_key,
'extractor.bamboohr_user.subdomain': subdomain,
})
job = DefaultJob(conf=job_config,
task=task,
publisher=Neo4jCsvPublisher())
job.launch()
```
## List of transformers
#### [ChainedTransformer](https://github.com/amundsen-io/amundsendatabuilder/blob/master/databuilder/transformer/base_transformer.py#L41 "ChainedTransformer")
A chanined transformer that can take multiple transformer.
......
# Copyright Contributors to the Amundsen project.
# SPDX-License-Identifier: Apache-2.0
# Copyright Contributors to the Amundsen project.
# SPDX-License-Identifier: Apache-2.0
# Copyright Contributors to the Amundsen project.
# SPDX-License-Identifier: Apache-2.0
from pyhocon import ConfigTree
import requests
from requests.auth import HTTPBasicAuth
from typing import Iterator, Optional
from xml.etree import ElementTree
from databuilder.extractor.base_extractor import Extractor
from databuilder.models.user import User
class BamboohrUserExtractor(Extractor):
API_KEY = 'api_key'
SUBDOMAIN = 'subdomain'
def init(self, conf: ConfigTree) -> None:
self._extract_iter: Optional[Iterator] = None
self._extract_iter = None
self._api_key = conf.get_string(BamboohrUserExtractor.API_KEY)
self._subdomain = conf.get_string(BamboohrUserExtractor.SUBDOMAIN)
def extract(self) -> Optional[User]:
if not self._extract_iter:
self._extract_iter = self._get_extract_iter()
try:
return next(self._extract_iter)
except StopIteration:
return None
def _employee_directory_uri(self) -> str:
return 'https://api.bamboohr.com/api/gateway.php/{subdomain}/v1/employees/directory'.format(
subdomain=self._subdomain
)
def _get_extract_iter(self) -> Iterator[User]:
response = requests.get(
self._employee_directory_uri(), auth=HTTPBasicAuth(self._api_key, 'x')
)
root = ElementTree.fromstring(response.content)
for user in root.findall('./employees/employee'):
def get_field(name: str) -> str:
field = user.find('./field[@id=\'{name}\']'.format(name=name))
if field is not None and field.text is not None:
return field.text
else:
return ''
yield User(
email=get_field('workEmail'),
first_name=get_field('firstName'),
last_name=get_field('lastName'),
name=get_field('displayName'),
team_name=get_field('department'),
role_name=get_field('jobTitle'),
)
def get_scope(self) -> str:
return 'extractor.bamboohr_user'
......@@ -59,3 +59,4 @@ httplib2>=0.18.0
unidecode
requests==2.23.0,<3.0
responses==0.10.6
# Copyright Contributors to the Amundsen project.
# SPDX-License-Identifier: Apache-2.0
# Copyright Contributors to the Amundsen project.
# SPDX-License-Identifier: Apache-2.0
# Copyright Contributors to the Amundsen project.
# SPDX-License-Identifier: Apache-2.0
import io
import unittest
import os
import responses
from pyhocon import ConfigFactory
from databuilder.models.user import User
from databuilder.extractor.user.bamboohr.bamboohr_user_extractor import BamboohrUserExtractor
class TestBamboohrUserExtractor(unittest.TestCase):
@responses.activate
def test_parse_testdata(self) -> None:
bhr = BamboohrUserExtractor()
bhr.init(ConfigFactory.from_dict({'api_key': 'api_key', 'subdomain': 'amundsen'}))
testdata_xml = os.path.join(
os.path.dirname(os.path.realpath(__file__)),
'../../../resources/extractor/user/bamboohr/testdata.xml'
)
with io.open(testdata_xml) as testdata:
responses.add(responses.GET, bhr._employee_directory_uri(), body=testdata.read())
expected = User(
email='roald@amundsen.io',
first_name='Roald',
last_name='Amundsen',
name='Roald Amundsen',
team_name='508 Corporate Marketing',
role_name='Antarctic Explorer',
)
actual_users = list(bhr._get_extract_iter())
self.assertEqual(1, len(actual_users))
self.assertEqual(repr(expected), repr(actual_users[0]))
if __name__ == '__main__':
unittest.main()
<?xml version="1.0"?>
<directory>
<fieldset>
<field id="displayName">Display name</field>
<field id="firstName">First name</field>
<field id="lastName">Last name</field>
<field id="preferredName">Preferred name</field>
<field id="gender">Gender</field>
<field id="jobTitle">Job title</field>
<field id="workPhone">Work Phone</field>
<field id="mobilePhone">Mobile Phone</field>
<field id="workEmail">Work Email</field>
<field id="department">Department</field>
<field id="location">Location</field>
<field id="workPhoneExtension">Work Ext.</field>
<field id="photoUploaded">Employee photo</field>
<field id="photoUrl">Photo URL</field>
<field id="canUploadPhoto">Can Upload Photo</field>
</fieldset>
<employees>
<employee id="1082">
<field id="displayName">Roald Amundsen</field>
<field id="firstName">Roald</field>
<field id="lastName">Amundsen</field>
<field id="preferredName"></field>
<field id="gender">Male</field>
<field id="jobTitle">Antarctic Explorer</field>
<field id="workPhone"></field>
<field id="mobilePhone"></field>
<field id="workEmail">roald@amundsen.io</field>
<field id="department">508 Corporate Marketing</field>
<field id="location">Norway</field>
<field id="workPhoneExtension"></field>
<field id="photoUploaded">true</field>
<field id="photoUrl">https://upload.wikimedia.org/wikipedia/commons/thumb/6/6f/Amundsen_in_fur_skins.jpg/440px-Amundsen_in_fur_skins.jpg</field>
<field id="canUploadPhoto">no</field>
</employee>
</employees>
</directory>
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment