Unverified Commit 7a3bdcd6 authored by Tamika Tannis's avatar Tamika Tannis Committed by GitHub

Add `base_superset_client` & documentation (#159)

* Add a WIP base_super_preview_client and example doc

* Update base_superset_preview_client, doc, and tests.  Pass 'database' in preview data request

* Address nits + fix typos
parent 6d51b4b1
import abc
from flask import Response as FlaskResponse, make_response, jsonify
from http import HTTPStatus
from requests import Response
from typing import Dict
from amundsen_application.base.base_preview_client import BasePreviewClient
from amundsen_application.models.preview_data import ColumnItem, PreviewData, PreviewDataSchema
class BaseSupersetPreviewClient(BasePreviewClient):
@abc.abstractmethod
def __init__(self) -> None:
self.headers = {} # type: Dict
@abc.abstractmethod
def post_to_sql_json(self, *, params: Dict, headers: Dict) -> Response:
"""
Returns the post response from Superset's `sql_json` endpoint
"""
pass # pragma: no cover
def get_preview_data(self, params: Dict, optionalHeaders: Dict = None) -> FlaskResponse:
"""
Returns a FlaskResponse object, where the response data represents a json object
with the preview data accessible on 'preview_data' key. The preview data should
match amundsen_application.models.preview_data.PreviewDataSchema
"""
try:
# Clone headers so that it does not mutate instance's state
headers = dict(self.headers)
# Merge optionalHeaders into headers
if optionalHeaders is not None:
headers.update(optionalHeaders)
# Request preview data
response = self.post_to_sql_json(params=params, headers=headers)
# Verify and return the results
response_dict = response.json()
columns = [ColumnItem(c['name'], c['type']) for c in response_dict['columns']]
preview_data = PreviewData(columns, response_dict['data'])
data = PreviewDataSchema().dump(preview_data)[0]
errors = PreviewDataSchema().load(data)[1]
if not errors:
payload = jsonify({'preview_data': data})
return make_response(payload, response.status_code)
else:
return make_response(jsonify({'preview_data': {}}), HTTPStatus.INTERNAL_SERVER_ERROR)
except Exception as e:
return make_response(jsonify({'preview_data': {}}), HTTPStatus.INTERNAL_SERVER_ERROR)
import logging
import requests
import uuid
from requests import Response
from typing import Any, Dict # noqa: F401
from amundsen_application.base.base_superset_preview_client import BaseSupersetPreviewClient
# 'main' is an existing default Superset database which serves for demo purposes
DEFAULT_DATABASE_MAP = {
'main': 1,
}
DEFAULT_URL = 'http://localhost:8088/superset/sql_json/'
class SupersetPreviewClient(BaseSupersetPreviewClient):
def __init__(self,
*,
database_map: Dict[str, int] = DEFAULT_DATABASE_MAP,
url: str = DEFAULT_URL) -> None:
self.database_map = database_map
self.headers = {}
self.url = url
def post_to_sql_json(self, *, params: Dict, headers: Dict) -> Response:
"""
Returns the post response from Superset's `sql_json` endpoint
"""
# Create the appropriate request data
try:
request_data = {} # type: Dict[str, Any]
# Superset's sql_json endpoint requires a unique client_id
request_data['client_id'] = uuid.uuid4()
# Superset's sql_json endpoint requires the id of the database that it will execute the query on
database_name = 'main' # OR params.get('database') in a real use case
request_data['database_id'] = self.database_map.get(database_name, '')
# Generate the sql query for the desired data preview content
try:
# 'main' is an existing default Superset schema which serves for demo purposes
schema = 'main' # OR params.get('schema') in a real use case
# 'ab_role' is an existing default Superset table which serves for demo purposes
table_name = 'ab_role' # OR params.get('tableName') in a real use case
request_data['sql'] = 'SELECT * FROM {schema}.{table} LIMIT 50'.format(schema=schema, table=table_name)
except Exception as e:
logging.error('Encountered error generating request sql: ' + str(e))
except Exception as e:
logging.error('Encountered error generating request data: ' + str(e))
# Post request to Superset's `sql_json` endpoint
return requests.post(self.url, data=request_data, headers=headers)
......@@ -115,7 +115,7 @@ export class TableDetail extends React.Component<TableDetailProps & RouteCompone
}
this.props.getTableData(this.cluster, this.database, this.schema, this.tableName, searchIndex, source);
this.props.getPreviewData({ schema: this.schema, tableName: this.tableName });
this.props.getPreviewData({ database: this.database, schema: this.schema, tableName: this.tableName });
}
frequentUserOnClick = (e) => {
......
......@@ -47,6 +47,7 @@ export interface User {
}
export interface PreviewQueryParams {
database: string;
schema: string;
tableName: string;
}
......
# Overview
Amundsen's data preview feature requires that developers create a custom implementation of `base_preview_client` for requesting that data. This feature assists with data discovery by providing the end user the option to view a sample of the actual resource data so that they can verify whether or not they want to transition into exploring that data, or continue their search.
[Apache Superset](https://github.com/apache/incubator-superset) is an open-source business intelligence tool that can be used for data exploration. Amundsen's data preview feature was created with Superset in mind, and it is what we leverage internally at Lyft to support the feature. This document provides some insight into how to configure Amundsen's frontend application to leverage Superset for data previews.
## Implementation
Implement the `base_superset_preview_client` to make a request to an instance of Superset.
### Shared Logic
[`base_superset_preview_client`](https://github.com/lyft/amundsenfrontendlibrary/tree/master/amundsen_application/base/base_superset_preview_client.py) implements `get_preview_data()` of `base_preview_client` with the minimal logic for this use case.
It updates the headers for the request if `optionalHeaders` are passed in `get_preview_data()`
```
# Clone headers so that it does not mutate instance's state
headers = dict(self.headers)
# Merge optionalHeaders into headers
if optionalHeaders is not None:
headers.update(optionalHeaders)
```
It verifies the shape of the data before returning it to the application. If the data does not match the `PreviewDataSchema`, the request will fail.
```
# Verify and return the results
response_dict = response.json()
columns = [ColumnItem(c['name'], c['type']) for c in response_dict['columns']]
preview_data = PreviewData(columns, response_dict['data'])
data, errors = PreviewDataSchema().dump(preview_data)
if not errors:
payload = jsonify({'preview_data': data})
return make_response(payload, response.status_code)
else:
return make_response(jsonify({'preview_data': {}}), HTTPStatus.INTERNAL_SERVER_ERROR)
```
### Custom Logic
`base_superset_preview_client` has an abstract method `post_to_sql_json()`. This method will contain whatever custom logic is needed to make a successful request to the `sql_json` enpoint based on the protections you have configured on this endpoint on your instance of Superset. For example, this may be where you have to append other values to the headers, or generate SQL queries based on your use case.
See the following [`example_superset_preview_client`](https://github.com/lyft/amundsenfrontendlibrary/tree/master/amundsen_application/base/examples/example_superset_preview_client.py) for an example implementation of `base_superset_preview_client` and `post_to_sql_json()`. This example assumes a local instance of Superset running on port 8088 with no security, authentication, or authorization configured on the endpoint.
## Usage
After implementing your custom Superset preview client class, point the `[preview_client]` entry point in your local `setup.py` to this class.
```
entry_points="""
...
[preview_client]
table_preview_client_class = amundsen_application.base.examples.example_superset_preview_client:SupersetPreviewClient
"""
```
Run `python3 setup.py install` in your virtual environment and restart the application for the entry point changes to take effect
......@@ -189,14 +189,14 @@ class SearchTest(unittest.TestCase):
with local_app.app_context():
# test single tag with query term
search_term = 'tag:hive test_table'
expected = 'http://0.0.0.0:5001/search/field/tag/field_val' \
'/hive?page_index=1&query_term=test_table'
expected = local_app.config['SEARCHSERVICE_BASE'] + \
'/search/field/tag/field_val/hive?page_index=1&query_term=test_table'
self.assertEqual(_create_url_with_field(search_term=search_term,
page_index=1), expected)
# test single tag without query term
search_term = 'tag:hive'
expected = 'http://0.0.0.0:5001/search/field/tag/field_val' \
'/hive?page_index=1'
expected = local_app.config['SEARCHSERVICE_BASE'] + \
'/search/field/tag/field_val/hive?page_index=1'
self.assertEqual(_create_url_with_field(search_term=search_term,
page_index=1), expected)
import flask
import json
import unittest
from http import HTTPStatus
from requests import Response
from typing import Dict
from unittest.mock import Mock
from amundsen_application.base.base_superset_preview_client import BaseSupersetPreviewClient
app = flask.Flask(__name__)
app.config.from_object('amundsen_application.config.LocalConfig')
good_json_data = {
"columns": [
{
"agg": "count_distinct",
"is_date": False,
"type": "INT",
"name": "id",
"is_dim": False,
},
{
"is_date": False,
"type": "STRING",
"name": "name",
"is_dim": True,
}
],
"data": [
{"id": 1, "name": "Admin"},
{"id": 2, "name": "Public"},
{"id": 3, "name": "Alpha"},
]
}
expected_results = {
"columns": [
{
"column_type": "INT",
"column_name": "id",
},
{
"column_type": "STRING",
"column_name": "name",
}
],
"data": [
{"id": 1, "name": "Admin"},
{"id": 2, "name": "Public"},
{"id": 3, "name": "Alpha"},
],
"error_text": ""
}
bad_json_data = {
"columns": [
{
"agg": "count_distinct",
"is_date": False,
"type": "INT",
"name": "id",
"is_dim": False,
},
{
"is_date": False,
"type": "STRING",
"name": "name",
"is_dim": True,
}
],
"data": "Wrong type",
}
class MockClient(BaseSupersetPreviewClient):
def __init__(self) -> None:
super().__init__()
def post_to_sql_json(self, *, params: Dict, headers: Dict) -> Response:
mockresponse = Mock()
mockresponse.json.return_value = good_json_data
mockresponse.status_code = HTTPStatus.OK
return mockresponse
class MockBadDataClient(BaseSupersetPreviewClient):
def __init__(self) -> None:
self.headers = {}
def post_to_sql_json(self, *, params: Dict, headers: Dict) -> Response:
mockresponse = Mock()
mockresponse.json.return_value = bad_json_data
mockresponse.status_code = HTTPStatus.OK
return mockresponse
class MockExceptionClient(BaseSupersetPreviewClient):
def __init__(self) -> None:
super().__init__()
def post_to_sql_json(self, *, params: Dict, headers: Dict) -> Response:
mockresponse = Mock()
mockresponse.json.return_value = None
mockresponse.status_code = HTTPStatus.OK
return mockresponse
class SupersetPreviewClientTest(unittest.TestCase):
def test_get_preview_data_raise_exception(self) -> None:
"""
Test catching any exception raised in get_preview_data(), which should result in
a response with 500 error and empty preview_data payload
:return:
"""
with app.test_request_context():
response = MockExceptionClient().get_preview_data(params={})
self.assertEqual(json.loads(response.data).get('preview_data'), {})
self.assertEqual(response.status_code, HTTPStatus.INTERNAL_SERVER_ERROR)
def test_post_sql_json_incorrect_data_shape(self) -> None:
"""
Test catching errors in the data shape returned by post_sql_json(), which should result in
a response with 500 error and empty preview_data payload
:return:
"""
with app.test_request_context():
response = MockBadDataClient().get_preview_data(params={})
self.assertEqual(json.loads(response.data).get('preview_data'), {})
self.assertEqual(response.status_code, HTTPStatus.INTERNAL_SERVER_ERROR)
def test_post_sql_json_correct_data_shape(self) -> None:
"""
Test post_sql_json(), which should result in
a response with 500 error and empty preview_data payload
:return:
"""
with app.test_request_context():
response = MockClient().get_preview_data(params={}, optionalHeaders={'testKey': 'testValue'})
self.assertEqual(json.loads(response.data).get('preview_data'), expected_results)
self.assertEqual(response.status_code, HTTPStatus.OK)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment