Unverified Commit 0b47694e authored by Dorian Johnson's avatar Dorian Johnson Committed by GitHub

Merge pull request from GHSA-47qg-q58v-7vrp

* app api: extract table editibility in metadata_utils into function

This wrapper determines if a pair of a schema and table are editable. We'd
like to use this logic elsewhere, so wrap into a function and test.
Signed-off-by: 's avatarDorian Johnson <2020@dorianj.net>

* app metadata_utils: minor code cleanup in _parse_editable_rule
Signed-off-by: 's avatarDorian Johnson <2020@dorianj.net>

* requirements.txt: add dataclasses

This allows using dataclasses for better type safety in 3.6. In 3.7 they
are built-in.
Signed-off-by: 's avatarDorian Johnson <2020@dorianj.net>

* server table metadata utils: add parser of table keys
Signed-off-by: 's avatarDorian Johnson <2020@dorianj.net>

* api put_table_description: test for editability
Signed-off-by: 's avatarDorian Johnson <2020@dorianj.net>

* api put_column_description: respect table editability
Signed-off-by: 's avatarDorian Johnson <2020@dorianj.net>
parent d6b89091
...@@ -14,8 +14,8 @@ from amundsen_application.log.action_log import action_logging ...@@ -14,8 +14,8 @@ from amundsen_application.log.action_log import action_logging
from amundsen_application.models.user import load_user, dump_user from amundsen_application.models.user import load_user, dump_user
from amundsen_application.api.utils.metadata_utils import marshall_table_partial, marshall_table_full,\ from amundsen_application.api.utils.metadata_utils import is_table_editable, marshall_table_partial,\
marshall_dashboard_partial, marshall_dashboard_full marshall_table_full,marshall_dashboard_partial, marshall_dashboard_full, TableUri
from amundsen_application.api.utils.request_utils import get_query_param, request_metadata from amundsen_application.api.utils.request_utils import get_query_param, request_metadata
...@@ -280,6 +280,10 @@ def put_table_description() -> Response: ...@@ -280,6 +280,10 @@ def put_table_description() -> Response:
description = get_query_param(args, 'description') description = get_query_param(args, 'description')
src = get_query_param(args, 'source') src = get_query_param(args, 'source')
table_uri = TableUri.from_uri(table_key)
if not is_table_editable(table_uri.schema, table_uri.table):
return make_response('', HTTPStatus.FORBIDDEN)
url = '{0}/{1}/description'.format(table_endpoint, table_key) url = '{0}/{1}/description'.format(table_endpoint, table_key)
_log_put_table_description(table_key=table_key, description=description, source=src) _log_put_table_description(table_key=table_key, description=description, source=src)
...@@ -316,6 +320,10 @@ def put_column_description() -> Response: ...@@ -316,6 +320,10 @@ def put_column_description() -> Response:
src = get_query_param(args, 'source') src = get_query_param(args, 'source')
table_uri = TableUri.from_uri(table_key)
if not is_table_editable(table_uri.schema, table_uri.table):
return make_response('', HTTPStatus.FORBIDDEN)
url = '{0}/{1}/column/{2}/description'.format(table_endpoint, table_key, column_name) url = '{0}/{1}/column/{2}/description'.format(table_endpoint, table_key, column_name)
_log_put_column_description(table_key=table_key, column_name=column_name, description=description, source=src) _log_put_column_description(table_key=table_key, column_name=column_name, description=description, source=src)
......
...@@ -2,7 +2,9 @@ ...@@ -2,7 +2,9 @@
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
import logging import logging
import urllib.parse
from dataclasses import dataclass
from typing import Any, Dict, List from typing import Any, Dict, List
from amundsen_common.models.dashboard import DashboardSummary, DashboardSummarySchema from amundsen_common.models.dashboard import DashboardSummary, DashboardSummarySchema
...@@ -14,6 +16,28 @@ from flask import current_app as app ...@@ -14,6 +16,28 @@ from flask import current_app as app
import re import re
@dataclass
class TableUri:
database: str
cluster: str
schema: str
table: str
def __str__(self) -> str:
return f"{self.database}://{self.cluster}.{self.schema}/{self.table}"
@classmethod
def from_uri(cls, uri: str) -> 'TableUri':
parsed = urllib.parse.urlparse(uri)
cluster, schema = parsed.netloc.rsplit('.', 1)
return TableUri(
database=parsed.scheme,
cluster=cluster,
schema=schema,
table=parsed.path.lstrip('/')
)
def marshall_table_partial(table_dict: Dict) -> Dict: def marshall_table_partial(table_dict: Dict) -> Dict:
""" """
Forms a short version of a table Dict, with selected fields and an added 'key' Forms a short version of a table Dict, with selected fields and an added 'key'
...@@ -48,19 +72,28 @@ def _parse_editable_rule(rule: MatchRuleObject, ...@@ -48,19 +72,28 @@ def _parse_editable_rule(rule: MatchRuleObject,
if rule.schema_regex and rule.table_name_regex: if rule.schema_regex and rule.table_name_regex:
match_schema = re.match(rule.schema_regex, schema) match_schema = re.match(rule.schema_regex, schema)
match_table = re.match(rule.table_name_regex, table) match_table = re.match(rule.table_name_regex, table)
if match_schema and match_table: return not (match_schema and match_table)
return False
return True
if rule.schema_regex: if rule.schema_regex:
match_schema = re.match(rule.schema_regex, schema) return not re.match(rule.schema_regex, schema)
if match_schema:
return False
return True
if rule.table_name_regex: if rule.table_name_regex:
match_table = re.match(rule.table_name_regex, table) return not re.match(rule.table_name_regex, table)
if match_table:
return False
return True return True
def is_table_editable(schema_name: str, table_name: str, cfg: any=None) -> bool:
if cfg is None:
cfg = app.config
if schema_name in cfg['UNEDITABLE_SCHEMAS']:
return False
for rule in cfg['UNEDITABLE_TABLE_DESCRIPTION_MATCH_RULES']:
if not _parse_editable_rule(rule, schema_name, table_name):
return False
return True return True
...@@ -76,17 +109,7 @@ def marshall_table_full(table_dict: Dict) -> Dict: ...@@ -76,17 +109,7 @@ def marshall_table_full(table_dict: Dict) -> Dict:
table: Table = schema.load(table_dict).data table: Table = schema.load(table_dict).data
results: Dict[str, Any] = schema.dump(table).data results: Dict[str, Any] = schema.dump(table).data
# Check if schema is uneditable results['is_editable'] = is_table_editable(results['schema'], results['name'])
is_editable_schema = results['schema'] not in app.config['UNEDITABLE_SCHEMAS']
# Check if Table Description is uneditable
is_editable_table = True
uneditable_table_desc_match_rules = app.config['UNEDITABLE_TABLE_DESCRIPTION_MATCH_RULES']
for rule in uneditable_table_desc_match_rules:
is_editable_table = is_editable_table and _parse_editable_rule(rule, results['schema'], results['name'])
is_editable = is_editable_schema and is_editable_table
results['is_editable'] = is_editable
# TODO - Cleanup https://github.com/lyft/amundsen/issues/296 # TODO - Cleanup https://github.com/lyft/amundsen/issues/296
# This code will try to supplement some missing data since the data here is incomplete. # This code will try to supplement some missing data since the data here is incomplete.
......
...@@ -84,3 +84,8 @@ asana==0.10.3 ...@@ -84,3 +84,8 @@ asana==0.10.3
# Retrying library # Retrying library
retrying>=1.3.3,<2.0 retrying>=1.3.3,<2.0
# Backport of PEP 557, Data Classes for Python 3.6.
# License: Apache 2.0
# Upstream url: https://pypi.org/project/dataclasses/
dataclasses==0.8; python_version < '3.7'
...@@ -4,12 +4,14 @@ ...@@ -4,12 +4,14 @@
import json import json
import responses import responses
import unittest import unittest
from unittest.mock import patch
from http import HTTPStatus from http import HTTPStatus
from amundsen_application import create_app from amundsen_application import create_app
from amundsen_application.api.metadata.v0 import TABLE_ENDPOINT, LAST_INDEXED_ENDPOINT,\ from amundsen_application.api.metadata.v0 import TABLE_ENDPOINT, LAST_INDEXED_ENDPOINT,\
POPULAR_TABLES_ENDPOINT, TAGS_ENDPOINT, USER_ENDPOINT, DASHBOARD_ENDPOINT POPULAR_TABLES_ENDPOINT, TAGS_ENDPOINT, USER_ENDPOINT, DASHBOARD_ENDPOINT
from amundsen_application.config import MatchRuleObject
from amundsen_application.tests.test_utils import TEST_USER_ID from amundsen_application.tests.test_utils import TEST_USER_ID
...@@ -656,6 +658,28 @@ class MetadataTest(unittest.TestCase): ...@@ -656,6 +658,28 @@ class MetadataTest(unittest.TestCase):
) )
self.assertEqual(response.status_code, HTTPStatus.OK) self.assertEqual(response.status_code, HTTPStatus.OK)
@responses.activate
def test_put_table_description_denied(self) -> None:
"""
Test put_table_description that should be rejected due to permissions.
:return:
"""
url = local_app.config['METADATASERVICE_BASE'] + TABLE_ENDPOINT + \
'/db://cluster.schema/table/column/colA/description'
responses.add(responses.GET, url, json={'description': 'This is a test'}, status=HTTPStatus.OK)
with patch.dict(local_app.config, {'UNEDITABLE_SCHEMAS': set(['schema'])}):
with local_app.test_client() as test:
response = test.put(
'/api/metadata/v0/put_table_description',
json={
'key': 'db://cluster.schema/table',
'description': 'test',
'source': 'source'
}
)
self.assertEqual(response.status_code, HTTPStatus.FORBIDDEN)
@responses.activate @responses.activate
def test_get_column_description_success(self) -> None: def test_get_column_description_success(self) -> None:
""" """
...@@ -723,6 +747,31 @@ class MetadataTest(unittest.TestCase): ...@@ -723,6 +747,31 @@ class MetadataTest(unittest.TestCase):
) )
self.assertEqual(response.status_code, HTTPStatus.OK) self.assertEqual(response.status_code, HTTPStatus.OK)
@responses.activate
def test_put_column_description_denied(self) -> None:
"""
Test put_column_description on an unwritable table.
:return:
"""
url = local_app.config['METADATASERVICE_BASE'] + TABLE_ENDPOINT + \
'/db://cluster.schema/an_uneditable_table/column/col/description'
responses.add(responses.PUT, url, json={}, status=HTTPStatus.OK)
rule = MatchRuleObject(table_name_regex=r".*uneditable_table.*")
with patch.dict(local_app.config, {'UNEDITABLE_TABLE_DESCRIPTION_MATCH_RULES': [rule]}):
with local_app.test_client() as test:
response = test.put(
'/api/metadata/v0/put_column_description',
json={
'key': 'db://cluster.schema/an_uneditable_table',
'column_name': 'col',
'description': 'test',
'source': 'source'
}
)
self.assertEqual(response.status_code, HTTPStatus.FORBIDDEN)
@responses.activate @responses.activate
def test_get_tags(self) -> None: def test_get_tags(self) -> None:
""" """
......
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
import unittest import unittest
from amundsen_application.api.utils.metadata_utils import _convert_prog_descriptions, _sort_prog_descriptions, \ from amundsen_application.api.utils.metadata_utils import _convert_prog_descriptions, _sort_prog_descriptions, \
_parse_editable_rule _parse_editable_rule, is_table_editable, TableUri
from amundsen_application.config import MatchRuleObject from amundsen_application.config import MatchRuleObject
from amundsen_application import create_app from amundsen_application import create_app
...@@ -144,3 +144,64 @@ class UneditableTableDescriptionTest(unittest.TestCase): ...@@ -144,3 +144,64 @@ class UneditableTableDescriptionTest(unittest.TestCase):
self.assertEqual(_parse_editable_rule(test_match_rule, 'schema1', 'test_table'), True) self.assertEqual(_parse_editable_rule(test_match_rule, 'schema1', 'test_table'), True)
self.assertEqual(_parse_editable_rule(test_match_rule, 'schema3', 'other_test_table'), True) self.assertEqual(_parse_editable_rule(test_match_rule, 'schema3', 'other_test_table'), True)
self.assertEqual(_parse_editable_rule(test_match_rule, 'schema3', 'test_table'), True) self.assertEqual(_parse_editable_rule(test_match_rule, 'schema3', 'test_table'), True)
class TableEditabilityWrapper(unittest.TestCase):
def setUp(self) -> None:
pass
def test_empty_allowed(self) -> None:
mockConfig = {
'UNEDITABLE_SCHEMAS': [],
'UNEDITABLE_TABLE_DESCRIPTION_MATCH_RULES': [],
}
self.assertTrue(is_table_editable('anyschema', 'anytable', mockConfig))
def test_schema(self) -> None:
mockConfig = {
'UNEDITABLE_SCHEMAS': ['uneditable_schema'],
'UNEDITABLE_TABLE_DESCRIPTION_MATCH_RULES': [],
}
self.assertTrue(is_table_editable('different_schema', 'anytable', mockConfig))
self.assertFalse(is_table_editable('uneditable_schema', 'anytable', mockConfig))
def test_schema_match_rule(self) -> None:
mockConfig = {
'UNEDITABLE_SCHEMAS': [''],
'UNEDITABLE_TABLE_DESCRIPTION_MATCH_RULES': [
MatchRuleObject(schema_regex=r"^(uneditable).*"),
],
}
self.assertTrue(is_table_editable('not_uneditable_schema', 'anytable', mockConfig))
self.assertFalse(is_table_editable('uneditable_schema', 'anytable', mockConfig))
def test_schema_table_match_rule(self) -> None:
mockConfig = {
'UNEDITABLE_SCHEMAS': [''],
'UNEDITABLE_TABLE_DESCRIPTION_MATCH_RULES': [
MatchRuleObject(schema_regex=r"^first.*", table_name_regex=r".*bad.*")
],
}
self.assertFalse(is_table_editable('first', 'bad', mockConfig))
self.assertFalse(is_table_editable('first', 'also_bad', mockConfig))
self.assertTrue(is_table_editable('first', 'good', mockConfig))
self.assertTrue(is_table_editable('not_first', 'bad', mockConfig))
self.assertTrue(is_table_editable('second', 'bad', mockConfig))
class TableUriObject(unittest.TestCase):
def test_simple_constructor(self) -> None:
uri = TableUri("db", "clstr", "schm", "tbl")
self.assertEqual(str(uri), 'db://clstr.schm/tbl')
def test_from_uri_factory(self) -> None:
uri = TableUri.from_uri("db://clstr.with.dots.schm/tbl/with/slashes")
self.assertEqual(uri.database, 'db')
self.assertEqual(uri.cluster, 'clstr.with.dots')
self.assertEqual(uri.schema, 'schm')
self.assertEqual(uri.table, 'tbl/with/slashes')
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment