Unverified Commit bb8daeb2 authored by Daniel's avatar Daniel Committed by GitHub

Use table 'key' in Metadata APIs (#191)

Previously all of our metadata APIs sent a set of fields cluster, database, schema, and table to identify the table record in metadata service. Those parameters were replaced by a single 'key'.
parent b12fc1e1
......@@ -11,7 +11,7 @@ from amundsen_application.log.action_log import action_logging
from amundsen_application.models.user import load_user, dump_user
from amundsen_application.api.utils.metadata_utils import get_table_key, marshall_table_partial
from amundsen_application.api.utils.metadata_utils import marshall_table_partial, marshall_table_full
from amundsen_application.api.utils.request_utils import get_query_param, request_metadata
......@@ -79,7 +79,7 @@ def get_table_metadata() -> Response:
TODO: Define an interface for envoy_client
"""
try:
table_key = get_table_key(request.args)
table_key = get_query_param(request.args, 'key')
list_item_index = get_query_param(request.args, 'index')
list_item_source = get_query_param(request.args, 'source')
......@@ -91,26 +91,9 @@ def get_table_metadata() -> Response:
return make_response(jsonify({'tableData': {}, 'msg': message}), HTTPStatus.INTERNAL_SERVER_ERROR)
def _get_partition_data(watermarks: Dict) -> Dict:
if watermarks:
high_watermark = next(filter(lambda x: x['watermark_type'] == 'high_watermark', watermarks))
if high_watermark:
return {
'is_partitioned': True,
'key': high_watermark['partition_key'],
'value': high_watermark['partition_value']
}
return {
'is_partitioned': False
}
@action_logging
def _get_table_metadata(*, table_key: str, index: int, source: str) -> Dict[str, Any]:
def _map_user_object_to_schema(u: Dict) -> Dict:
return dump_user(load_user(u))
results_dict = {
'tableData': {},
'msg': '',
......@@ -138,50 +121,12 @@ def _get_table_metadata(*, table_key: str, index: int, source: str) -> Dict[str,
return results_dict
try:
# Filter and parse the response dictionary from the metadata service
params = [
'columns',
'cluster',
'database',
'owners',
'is_view',
'schema',
'table_description',
'table_name',
'table_readers',
'table_writer',
'tags',
'watermarks',
'source',
]
results = {key: response.json().get(key, None) for key in params}
results['key'] = table_key
is_editable = results['schema'] not in app.config['UNEDITABLE_SCHEMAS']
results['is_editable'] = is_editable
# In the list of owners, sanitize each entry
results['owners'] = [_map_user_object_to_schema(owner) for owner in results['owners']]
# In the list of reader_objects, sanitize the reader value on each entry
readers = results['table_readers']
for reader_object in readers:
reader_object['reader'] = _map_user_object_to_schema(reader_object['reader'])
# If order is provided, we sort the column based on the pre-defined order
if app.config['COLUMN_STAT_ORDER']:
columns = results['columns']
for col in columns:
# the stat_type isn't defined in COLUMN_STAT_ORDER, we just use the max index for sorting
col['stats'].sort(key=lambda x: app.config['COLUMN_STAT_ORDER'].
get(x['stat_type'], len(app.config['COLUMN_STAT_ORDER'])))
col['is_editable'] = is_editable
# Temp code to make 'partition_key' and 'partition_value' part of the table
results['partition'] = _get_partition_data(results['watermarks'])
results_dict['tableData'] = results
table_data_raw = response.json()
# Ideally the response should include 'key' to begin with
table_data_raw['key'] = table_key
results_dict['tableData'] = marshall_table_full(table_data_raw)
results_dict['msg'] = 'Success'
return results_dict
except Exception as e:
......@@ -210,7 +155,7 @@ def _update_table_owner(*, table_key: str, method: str, owner: str) -> Dict[str,
def update_table_owner() -> Response:
try:
args = request.get_json()
table_key = get_table_key(args)
table_key = get_query_param(args, 'key')
owner = get_query_param(args, 'owner')
payload = jsonify(_update_table_owner(table_key=table_key, method=request.method, owner=owner))
......@@ -252,7 +197,7 @@ def get_last_indexed() -> Response:
def get_table_description() -> Response:
try:
table_endpoint = _get_table_endpoint()
table_key = get_table_key(request.args)
table_key = get_query_param(request.args, 'key')
url = '{0}/{1}/description'.format(table_endpoint, table_key)
......@@ -277,7 +222,7 @@ def get_table_description() -> Response:
def get_column_description() -> Response:
try:
table_endpoint = _get_table_endpoint()
table_key = get_table_key(request.args)
table_key = get_query_param(request.args, 'key')
column_name = get_query_param(request.args, 'column_name')
......@@ -311,7 +256,7 @@ def put_table_description() -> Response:
args = request.get_json()
table_endpoint = _get_table_endpoint()
table_key = get_table_key(args)
table_key = get_query_param(args, 'key')
description = get_query_param(args, 'description')
description = ' '.join(description.split())
......@@ -345,7 +290,7 @@ def put_column_description() -> Response:
try:
args = request.get_json()
table_key = get_table_key(args)
table_key = get_query_param(args, 'key')
table_endpoint = _get_table_endpoint()
column_name = get_query_param(args, 'column_name')
......@@ -414,7 +359,7 @@ def update_table_tags() -> Response:
method = request.method
table_endpoint = _get_table_endpoint()
table_key = get_table_key(args)
table_key = get_query_param(args, 'key')
tag = get_query_param(args, 'tag')
......
from typing import Dict
from flask import current_app as app
from amundsen_application.api.utils.request_utils import get_query_param
def get_table_key(args: Dict) -> str:
"""
Extracts the 'key' for a table resource
:param args: Dict which includes 'db', 'cluster', 'schema', and 'table'
:return: the table key
"""
db = get_query_param(args, 'db')
cluster = get_query_param(args, 'cluster')
schema = get_query_param(args, 'schema')
table = get_query_param(args, 'table')
table_key = '{db}://{cluster}.{schema}/{table}'.format(**locals())
return table_key
from amundsen_application.models.user import load_user, dump_user
def marshall_table_partial(table: Dict) -> Dict:
......@@ -39,3 +26,74 @@ def marshall_table_partial(table: Dict) -> Dict:
'type': 'table',
'last_updated_epoch': table.get('last_updated_epoch', None),
}
def marshall_table_full(table: Dict) -> Dict:
"""
Forms the full version of a table Dict, with additional and sanitized fields
:param table: Table Dict from metadata service
:return: Table Dict with sanitized fields
"""
# Filter and parse the response dictionary from the metadata service
fields = [
'columns',
'cluster',
'database',
'is_view',
'key',
'owners',
'schema',
'source',
'table_description',
'table_name',
'table_readers',
'table_writer',
'tags',
'watermarks',
# 'last_updated_timestamp' Exists on the response from metadata but is not used.
# This should also be consolidated with 'last_updated_epoch' to have the same name and format.
]
results = {field: table.get(field, None) for field in fields}
is_editable = results['schema'] not in app.config['UNEDITABLE_SCHEMAS']
results['is_editable'] = is_editable
# In the list of owners, sanitize each entry
results['owners'] = [_map_user_object_to_schema(owner) for owner in results['owners']]
# In the list of reader_objects, sanitize the reader value on each entry
readers = results['table_readers']
for reader_object in readers:
reader_object['reader'] = _map_user_object_to_schema(reader_object['reader'])
# If order is provided, we sort the column based on the pre-defined order
if app.config['COLUMN_STAT_ORDER']:
columns = results['columns']
for col in columns:
# the stat_type isn't defined in COLUMN_STAT_ORDER, we just use the max index for sorting
col['stats'].sort(key=lambda x: app.config['COLUMN_STAT_ORDER'].
get(x['stat_type'], len(app.config['COLUMN_STAT_ORDER'])))
col['is_editable'] = is_editable
# Temp code to make 'partition_key' and 'partition_value' part of the table
results['partition'] = _get_partition_data(results['watermarks'])
return results
def _map_user_object_to_schema(u: Dict) -> Dict:
return dump_user(load_user(u))
def _get_partition_data(watermarks: Dict) -> Dict:
if watermarks:
high_watermark = next(filter(lambda x: x['watermark_type'] == 'high_watermark', watermarks))
if high_watermark:
return {
'is_partitioned': True,
'key': high_watermark['partition_key'],
'value': high_watermark['partition_value']
}
return {
'is_partitioned': False
}
......@@ -42,7 +42,7 @@ export interface StateFromProps {
}
export interface DispatchFromProps {
getTableData: (cluster: string, database: string, schema: string, tableName: string, searchIndex?: string, source?: string, ) => GetTableDataRequest;
getTableData: (key: string, searchIndex?: string, source?: string, ) => GetTableDataRequest;
getPreviewData: (queryParams: PreviewQueryParams) => void;
}
......@@ -57,9 +57,10 @@ interface TableDetailState {
export class TableDetail extends React.Component<TableDetailProps & RouteComponentProps<any>, TableDetailState> {
private cluster: string;
private database: string;
private displayName: string;
private key: string;
private schema: string;
private tableName: string;
private displayName: string;
public static defaultProps: TableDetailProps = {
getTableData: () => undefined,
getPreviewData: () => undefined,
......@@ -72,13 +73,13 @@ export class TableDetail extends React.Component<TableDetailProps & RouteCompone
is_editable: false,
is_view: false,
key: '',
partition: { is_partitioned: false },
schema: '',
source: { source: '', source_type: '' },
table_name: '',
table_description: '',
table_writer: { application_url: '', description: '', id: '', name: '' },
partition: { is_partitioned: false },
table_readers: [],
source: { source: '', source_type: '' },
watermarks: [],
},
};
......@@ -99,6 +100,13 @@ export class TableDetail extends React.Component<TableDetailProps & RouteCompone
this.tableName = params ? params.table : '';
this.displayName = params ? `${this.schema}.${this.tableName}` : '';
/*
This 'key' is the `table_uri` format described in metadataservice. Because it contains the '/' character,
we can't pass it as a single URL parameter without encodeURIComponent which makes ugly URLs.
DO NOT CHANGE
*/
this.key = params ? `${this.database}://${this.cluster}.${this.schema}/${this.tableName}` : '';
this.state = {
isLoading: props.isLoading,
statusCode: props.statusCode,
......@@ -116,7 +124,7 @@ export class TableDetail extends React.Component<TableDetailProps & RouteCompone
window.history.replaceState({}, '', `${window.location.origin}${window.location.pathname}`);
}
this.props.getTableData(this.cluster, this.database, this.schema, this.tableName, searchIndex, source);
this.props.getTableData(this.key, searchIndex, source);
this.props.getPreviewData({ database: this.database, schema: this.schema, tableName: this.tableName });
}
......
......@@ -5,9 +5,9 @@ import { filterFromObj, sortTagsAlphabetical } from 'ducks/utilMethods';
/**
* Generates the query string parameters needed for requests that act on a particular table resource.
*/
export function getTableParams(tableDataObject: TableMetadata | GetTableDataRequest): string {
const { cluster, database, schema, table_name } = tableDataObject;
return `db=${database}&cluster=${cluster}&schema=${schema}&table=${table_name}`;
export function getTableQueryParams(tableDataObject: TableMetadata | GetTableDataRequest): string {
const { key } = tableDataObject;
return `key=${encodeURIComponent(key)}`;
}
/**
......
......@@ -11,11 +11,11 @@ const API_PATH = '/api/metadata/v0';
/** HELPERS **/
import {
getTableParams, getTableDataFromResponseData, getTableOwnersFromResponseData, getTableTagsFromResponseData,
getTableQueryParams, getTableDataFromResponseData, getTableOwnersFromResponseData, getTableTagsFromResponseData,
} from './helpers';
export function metadataTableTags(tableData: TableMetadata) {
const tableParams = getTableParams(tableData);
const tableParams = getTableQueryParams(tableData);
return axios.get(`${API_PATH}/table?${tableParams}&index=&source=`)
.then((response: AxiosResponse<TableDataResponse>) => {
......@@ -33,10 +33,7 @@ export function metadataUpdateTableTags(action, tableData) {
method: tagObject.methodName,
url: `${API_PATH}/update_table_tags`,
data: {
cluster: tableData.cluster,
db: tableData.database,
schema: tableData.schema,
table: tableData.table_name,
key: tableData.key,
tag: tagObject.tagName,
},
}
......@@ -46,7 +43,7 @@ export function metadataUpdateTableTags(action, tableData) {
export function metadataGetTableData(action: GetTableDataRequest) {
const { searchIndex, source } = action;
const tableParams = getTableParams(action);
const tableParams = getTableQueryParams(action);
return axios.get(`${API_PATH}/table?${tableParams}&index=${searchIndex}&source=${source}`)
.then((response: AxiosResponse<TableDataResponse>) => {
......@@ -64,7 +61,7 @@ export function metadataGetTableData(action: GetTableDataRequest) {
}
export function metadataGetTableDescription(tableData: TableMetadata) {
const tableParams = getTableParams(tableData);
const tableParams = getTableQueryParams(tableData);
return axios.get(`${API_PATH}/v0/get_table_description?${tableParams}`)
.then((response: AxiosResponse<DescriptionResponse>) => {
tableData.table_description = response.data.description;
......@@ -82,17 +79,14 @@ export function metadataUpdateTableDescription(description: string, tableData: T
else {
return axios.put(`${API_PATH}/put_table_description`, {
description,
db: tableData.database,
cluster: tableData.cluster,
schema: tableData.schema,
table: tableData.table_name,
key: tableData.key,
source: 'user',
});
}
}
export function metadataTableOwners(tableData: TableMetadata) {
const tableParams = getTableParams(tableData);
const tableParams = getTableQueryParams(tableData);
return axios.get(`${API_PATH}/table?${tableParams}&index=&source=`)
.then((response: AxiosResponse<TableDataResponse>) => {
......@@ -104,17 +98,15 @@ export function metadataTableOwners(tableData: TableMetadata) {
}
/* TODO: Typing this method generates redux-saga related type errors that need more dedicated debugging */
export function metadataUpdateTableOwner(action, tableData) {
// TODO - Add 'key' to the action and remove 'tableData' as a param.
export function metadataUpdateTableOwner(action, tableData: TableMetadata) {
const updatePayloads = action.updateArray.map((item) => {
return {
method: item.method,
url: `${API_PATH}/update_table_owner`,
data: {
cluster: tableData.cluster,
db: tableData.database,
key: tableData.key,
owner: item.id,
schema: tableData.schema,
table: tableData.table_name,
},
}
});
......@@ -122,7 +114,7 @@ export function metadataUpdateTableOwner(action, tableData) {
}
export function metadataGetColumnDescription(columnIndex: number, tableData: TableMetadata) {
const tableParams = getTableParams(tableData);
const tableParams = getTableQueryParams(tableData);
const columnName = tableData.columns[columnIndex].name;
return axios.get(`${API_PATH}/get_column_description?${tableParams}&column_name=${columnName}`)
.then((response: AxiosResponse<DescriptionResponse>) => {
......@@ -142,11 +134,8 @@ export function metadataUpdateColumnDescription(description: string, columnIndex
const columnName = tableData.columns[columnIndex].name;
return axios.put(`${API_PATH}/put_column_description`, {
description,
db: tableData.database,
cluster: tableData.cluster,
column_name: columnName,
schema: tableData.schema,
table: tableData.table_name,
key: tableData.key,
source: 'user',
});
}
......
......@@ -38,14 +38,11 @@ export interface TableMetadataReducerState {
tableTags: TableTagsReducerState;
}
export function getTableData(cluster: string, database: string, schema: string, tableName: string, searchIndex?: string, source?: string): GetTableDataRequest {
export function getTableData(key: string, searchIndex?: string, source?: string): GetTableDataRequest {
return {
cluster,
database,
schema,
key,
searchIndex,
source,
table_name: tableName,
type: GetTableData.ACTION,
};
}
......
......@@ -22,12 +22,9 @@ export enum GetTableData {
}
export interface GetTableDataRequest {
type: GetTableData.ACTION;
cluster: string;
database: string;
schema: string;
key: string;
searchIndex?: string;
source?: string;
table_name: string;
}
export interface GetTableDataResponse {
type: GetTableData.SUCCESS | GetTableData.FAILURE;
......
......@@ -18,38 +18,30 @@ class MetadataTest(unittest.TestCase):
self.mock_popular_tables = {
'popular_tables': [
{
'table_name': 'test_table',
'schema': 'test_schema',
'database': 'test_db',
'cluster': 'test_cluster',
'table_description': 'This is a test'
'database': 'test_db',
'key': 'test_db://test_cluster.test_schema/test_table',
'schema': 'test_schema',
'table_description': 'This is a test',
'table_name': 'test_table',
'type': 'table',
}
]
}
self.expected_parsed_popular_tables = [
{
'name': 'test_table',
'cluster': 'test_cluster',
'database': 'test_db',
'description': 'This is a test',
'key': 'test_db://test_cluster.test_schema/test_table',
'schema_name': 'test_schema',
'type': 'table',
'name': 'test_table',
'last_updated_epoch': None,
}
]
self.mock_metadata = {
'database': 'test_db',
'cluster': 'test_cluster',
'schema': 'test_schema',
'table_name': 'test_table',
'table_description': 'This is a test',
'tags': [],
'table_readers': [
{'reader': {'email': 'test@test.com', 'first_name': None, 'last_name': None}, 'read_count': 100}
],
'owners': [],
'is_view': False,
'columns': [
{
'name': 'column_1',
......@@ -62,6 +54,17 @@ class MetadataTest(unittest.TestCase):
]
}
],
'database': 'test_db',
'is_view': False,
'key': 'test_db://test_cluster.test_schema/test_table',
'owners': [],
'schema': 'test_schema',
'table_name': 'test_table',
'table_description': 'This is a test',
'tags': [],
'table_readers': [
{'reader': {'email': 'test@test.com', 'first_name': None, 'last_name': None}, 'read_count': 100}
],
'watermarks': [
{'watermark_type': 'low_watermark', 'partition_key': 'ds', 'partition_value': '', 'create_time': ''},
{'watermark_type': 'high_watermark', 'partition_key': 'ds', 'partition_value': '', 'create_time': ''}
......@@ -295,10 +298,7 @@ class MetadataTest(unittest.TestCase):
response = test.get(
'/api/metadata/v0/table',
query_string=dict(
db='db',
cluster='cluster',
schema='schema',
table='table',
key='db://cluster.schema/table',
index='0',
source='test_source'
)
......@@ -320,10 +320,7 @@ class MetadataTest(unittest.TestCase):
response = test.put(
'/api/metadata/v0/update_table_owner',
json={
'db': 'db',
'cluster': 'cluster',
'schema': 'schema',
'table': 'table',
'key': 'db://cluster.schema/table',
'owner': 'test'
}
)
......@@ -370,7 +367,7 @@ class MetadataTest(unittest.TestCase):
with local_app.test_client() as test:
response = test.get(
'/api/metadata/v0/get_table_description',
query_string=dict(db='db', cluster='cluster', schema='schema', table='table')
query_string=dict(key='db://cluster.schema/table')
)
data = json.loads(response.data)
self.assertEqual(response.status_code, HTTPStatus.OK)
......@@ -389,7 +386,7 @@ class MetadataTest(unittest.TestCase):
with local_app.test_client() as test:
response = test.get(
'/api/metadata/v0/get_table_description',
query_string=dict(db='db', cluster='cluster', schema='schema', table='table')
query_string=dict(key='db://cluster.schema/table')
)
self.assertEqual(response.status_code, HTTPStatus.BAD_REQUEST)
......@@ -406,10 +403,7 @@ class MetadataTest(unittest.TestCase):
response = test.put(
'/api/metadata/v0/put_table_description',
json={
'db': 'db',
'cluster': 'cluster',
'schema': 'schema',
'table': 'table',
'key': 'db://cluster.schema/table',
'description': 'test',
'source': 'source'
}
......@@ -430,10 +424,7 @@ class MetadataTest(unittest.TestCase):
response = test.get(
'/api/metadata/v0/get_column_description',
query_string=dict(
db='db',
cluster='cluster',
schema='schema',
table='table',
key='db://cluster.schema/table',
index='0',
column_name='colA'
)
......@@ -457,10 +448,7 @@ class MetadataTest(unittest.TestCase):
response = test.get(
'/api/metadata/v0/get_column_description',
query_string=dict(
db='db',
cluster='cluster',
schema='schema',
table='table',
key='db://cluster.schema/table',
index='0',
column_name='colA'
)
......@@ -481,10 +469,7 @@ class MetadataTest(unittest.TestCase):
response = test.put(
'/api/metadata/v0/put_column_description',
json={
'db': 'db',
'cluster': 'cluster',
'schema': 'schema',
'table': 'table',
'key': 'db://cluster.schema/table',
'column_name': 'col',
'description': 'test',
'source': 'source'
......@@ -519,10 +504,7 @@ class MetadataTest(unittest.TestCase):
response = test.put(
'/api/metadata/v0/update_table_tags',
json={
'db': 'db',
'cluster': 'cluster',
'schema': 'schema',
'table': 'table',
'key': 'db://cluster.schema/table',
'tag': 'tag_5'
}
)
......@@ -541,10 +523,7 @@ class MetadataTest(unittest.TestCase):
response = test.delete(
'/api/metadata/v0/update_table_tags',
json={
'db': 'db',
'cluster': 'cluster',
'schema': 'schema',
'table': 'table',
'key': 'db://cluster.schema/table',
'tag': 'tag_5'
}
)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment