Unverified Commit 87213c5a authored by Tao Feng's avatar Tao Feng Committed by GitHub

feat: Mode Batch dashboard charrt API (#362)

Signed-off-by: 's avatarTao Feng <fengtao04@gmail.com>
parent c3e713e7
......@@ -444,7 +444,7 @@ Other information such as report run, owner, chart name, query name is in separa
It calls two APIs ([spaces API](https://mode.com/developer/api-reference/management/spaces/#listSpaces) and [reports API](https://mode.com/developer/api-reference/analytics/reports/#listReportsInSpace)) joining together.
You can create Databuilder job config like this.
You can create Databuilder job config like this.
```python
task = DefaultTask(extractor=ModeDashboardExtractor(),
loader=FsNeo4jCSVLoader(), )
......@@ -608,6 +608,25 @@ job = DefaultJob(conf=job_config,
job.launch()
```
If your organization's mode account supports discovery feature(paid feature), you could leverage [ModeDashboardChartsBatchExtractor](./databuilder/extractor/dashboard/mode_analytics/batch/mode_dashboard_charts_batch_extractor.py) which does a batch call to mode API which is more performant. You need to generate a bearer account based on the API instruction.
```python
extractor = ModeDashboardChartsBatchExtractor()
task = DefaultTask(extractor=extractor, loader=FsNeo4jCSVLoader())
job_config = ConfigFactory.from_dict({
'{}.{}'.format(extractor.get_scope(), ORGANIZATION): organization,
'{}.{}'.format(extractor.get_scope(), MODE_ACCESS_TOKEN): mode_token,
'{}.{}'.format(extractor.get_scope(), MODE_PASSWORD_TOKEN): mode_password,
'{}.{}'.format(extractor.get_scope(), MODE_BEARER_TOKEN): mode_bearer_token,
})
job = DefaultJob(conf=job_config,
task=task,
publisher=Neo4jCsvPublisher())
job.launch()
```
#### [ModeDashboardUserExtractor](./databuilder/extractor/dashboard/mode_analytics/mode_dashboard_user_extractor.py)
A Extractor that extracts Mode user_id and then update User node.
......@@ -637,6 +656,7 @@ Note that this provides accumulated view count which does [not effectively show
If you are fine with `accumulated usage`, you could use TemplateVariableSubstitutionTransformer to transform Dict payload from [ModeDashboardUsageExtractor](./databuilder/extractor/dashboard/mode_analytics/mode_dashboard_usage_extractor.py) to fit [DashboardUsage](./docs/models.md#dashboardusage) and transform Dict to [DashboardUsage](./docs/models.md#dashboardusage) by [TemplateVariableSubstitutionTransformer](./databuilder/transformer/template_variable_substitution_transformer.py), and [DictToModel](./databuilder/transformer/dict_to_model.py) transformers. ([Example](./databuilder/extractor/dashboard/mode_analytics/mode_dashboard_queries_extractor.py#L36) on how to combining these two transformers)
### [RedashDashboardExtractor](./databuilder/extractor/dashboard/redash/redash_dashboard_extractor.py)
The included `RedashDashboardExtractor` provides support for extracting basic metadata for Redash dashboards (dashboard name, owner, URL, created/updated timestamps, and a generated description) and their associated queries (query name, URL, and raw query). It can be extended with a configurable table parser function to also support extraction of `DashboardTable` metadata. (See below for example usage.)
......
# Copyright Contributors to the Amundsen project.
# SPDX-License-Identifier: Apache-2.0
# Copyright Contributors to the Amundsen project.
# SPDX-License-Identifier: Apache-2.0
import logging
from pyhocon import ConfigTree, ConfigFactory
from typing import Any
from databuilder import Scoped
from databuilder.extractor.base_extractor import Extractor
from databuilder.extractor.dashboard.mode_analytics.mode_dashboard_utils import ModeDashboardUtils
from databuilder.rest_api.mode_analytics.mode_paginated_rest_api_query import ModePaginatedRestApiQuery
from databuilder.rest_api.rest_api_query import RestApiQuery
from databuilder.rest_api.base_rest_api_query import RestApiQuerySeed
from databuilder.extractor.dashboard.mode_analytics.mode_dashboard_constants import ORGANIZATION
from databuilder.transformer.dict_to_model import DictToModel, MODEL_CLASS
LOGGER = logging.getLogger(__name__)
class ModeDashboardChartsBatchExtractor(Extractor):
"""
Mode dashboard chart extractor leveraging batch / discovery endpoint.
The detail could be found in https://mode.com/help/articles/discovery-api/#list-charts-for-an-organization
"""
# config to include the charts from all space
INCLUDE_ALL_SPACE = 'include_all_space'
def init(self, conf: ConfigTree) -> None:
self._conf = conf
restapi_query = self._build_restapi_query()
self._extractor = ModeDashboardUtils.create_mode_rest_api_extractor(
restapi_query=restapi_query,
conf=self._conf
)
dict_to_model_transformer = DictToModel()
dict_to_model_transformer.init(
conf=Scoped.get_scoped_conf(self._conf, dict_to_model_transformer.get_scope()).with_fallback(
ConfigFactory.from_dict(
{MODEL_CLASS: 'databuilder.models.dashboard.dashboard_chart.DashboardChart'})))
self._transformer = dict_to_model_transformer
def extract(self) -> Any:
record = self._extractor.extract()
if not record:
return None
return self._transformer.transform(record=record)
def get_scope(self) -> str:
return 'extractor.mode_dashboard_chart_batch'
def _build_restapi_query(self) -> RestApiQuery:
"""
Build a paginated REST API based on Mode discovery API
:return:
"""
params = ModeDashboardUtils.get_auth_params(conf=self._conf, discover_auth=True)
seed_record = [{
'organization': self._conf.get_string(ORGANIZATION),
'is_active': None,
'updated_at': None,
'do_not_update_empty_attribute': True,
}]
seed_query = RestApiQuerySeed(seed_record=seed_record)
chart_url_template = 'http://app.mode.com/batch/{organization}/charts'
if self._conf.get_bool(ModeDashboardChartsBatchExtractor.INCLUDE_ALL_SPACE, default=False):
chart_url_template += '?include_spaces=all'
json_path = '(charts[*].[space_token,report_token,query_token,token,chart_title,chart_type])'
field_names = ['dashboard_group_id',
'dashboard_id',
'query_id',
'chart_id',
'chart_name',
'chart_type']
chart_batch_query = ModePaginatedRestApiQuery(query_to_join=seed_query,
url=chart_url_template,
params=params,
json_path=json_path,
pagination_json_path=json_path,
field_names=field_names,
skip_no_result=True)
return chart_batch_query
......@@ -4,3 +4,7 @@
ORGANIZATION = 'organization'
MODE_ACCESS_TOKEN = 'mode_user_token'
MODE_PASSWORD_TOKEN = 'mode_password_token'
# this token is needed to access batch discover endpoint
# e.g https://mode.com/developer/discovery-api/introduction/
MODE_BEARER_TOKEN = 'mode_bearer_token'
......@@ -7,7 +7,7 @@ from typing import Any, Dict
from databuilder import Scoped
from databuilder.extractor.dashboard.mode_analytics.mode_dashboard_constants import ORGANIZATION, MODE_ACCESS_TOKEN, \
MODE_PASSWORD_TOKEN
MODE_PASSWORD_TOKEN, MODE_BEARER_TOKEN
from databuilder.extractor.restapi.rest_api_extractor import RestAPIExtractor, REST_API_QUERY, STATIC_RECORD_DICT
from databuilder.rest_api.base_rest_api_query import BaseRestApiQuery
from databuilder.rest_api.base_rest_api_query import RestApiQuerySeed
......@@ -44,11 +44,21 @@ class ModeDashboardUtils(object):
return spaces_query
@staticmethod
def get_auth_params(conf: ConfigTree) -> Dict[str, Any]:
params = {'auth': HTTPBasicAuth(conf.get_string(MODE_ACCESS_TOKEN),
conf.get_string(MODE_PASSWORD_TOKEN)
)
}
def get_auth_params(conf: ConfigTree, discover_auth: bool = False) -> Dict[str, Any]:
if discover_auth:
# Mode discovery API needs custom token set in header
# https://mode.com/developer/discovery-api/introduction/
params = {
"headers": {
"Authorization": conf.get_string(MODE_BEARER_TOKEN),
}
} # type: Dict[str, Any]
else:
params = {
'auth': HTTPBasicAuth(conf.get_string(MODE_ACCESS_TOKEN),
conf.get_string(MODE_PASSWORD_TOKEN)
)
}
return params
@staticmethod
......
# Copyright Contributors to the Amundsen project.
# SPDX-License-Identifier: Apache-2.0
# Copyright Contributors to the Amundsen project.
# SPDX-License-Identifier: Apache-2.0
# Copyright Contributors to the Amundsen project.
# SPDX-License-Identifier: Apache-2.0
import unittest
from mock import patch
from pyhocon import ConfigFactory
from databuilder import Scoped
from databuilder.extractor.dashboard.mode_analytics.batch.\
mode_dashboard_charts_batch_extractor import ModeDashboardChartsBatchExtractor
class TestModeDashboardChartsBatchExtractor(unittest.TestCase):
def setUp(self) -> None:
config = ConfigFactory.from_dict({
'extractor.mode_dashboard_chart_batch.organization': 'amundsen',
'extractor.mode_dashboard_chart_batch.mode_user_token': 'amundsen_user_token',
'extractor.mode_dashboard_chart_batch.mode_password_token': 'amundsen_password_token',
'extractor.mode_dashboard_chart_batch.mode_bearer_token': 'amundsen_bearer_token',
})
self.config = config
def test_dashboard_chart_extractor_empty_record(self) -> None:
extractor = ModeDashboardChartsBatchExtractor()
extractor.init(Scoped.get_scoped_conf(conf=self.config, scope=extractor.get_scope()))
with patch('databuilder.rest_api.rest_api_query.requests.get'):
record = extractor.extract()
self.assertIsNone(record)
def test_dashboard_chart_extractor_actual_record(self) -> None:
extractor = ModeDashboardChartsBatchExtractor()
extractor.init(Scoped.get_scoped_conf(conf=self.config, scope=extractor.get_scope()))
with patch('databuilder.extractor.restapi.rest_api_extractor.RestAPIExtractor.extract') as mock_get:
mock_get.return_value = {
'organization': 'amundsen',
'is_active': None,
'updated_at': None,
'do_not_update_empty_attribute': True,
'dashboard_group_id': 'ggg',
'dashboard_id': 'ddd',
'query_id': 'yyy',
'chart_id': 'xxx',
'chart_name': 'some chart',
'chart_type': 'bigNumber',
'product': 'mode'
}
record = extractor.extract()
self.assertEquals(record._dashboard_group_id, 'ggg')
self.assertEquals(record._dashboard_id, 'ddd')
self.assertEquals(record._chart_name, 'some chart')
self.assertEquals(record._product, 'mode')
if __name__ == '__main__':
unittest.main()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment