Refactor tableESDocument to match schema in search service (#92)

8b9456f1 · Tao Feng · GitHub · 1f15f5a3 · 8b9456f1 · 8b9456f1
Unverified Commit 8b9456f1 authored Jun 19, 2019 by Tao Feng Committed by GitHub Jun 19, 2019
6 changed files
--- a/databuilder/extractor/neo4j_search_data_extractor.py
+++ b/databuilder/extractor/neo4j_search_data_extractor.py
@@ -27,13 +27,13 @@ class Neo4jSearchDataExtractor(Extractor):
        OPTIONAL MATCH (table)-[:TAGGED_BY]->(tags:Tag)
        OPTIONAL MATCH (table)-[:LAST_UPDATED_AT]->(time_stamp:Timestamp)
        RETURN db.name as database, cluster.name AS cluster, schema.name AS schema_name,
-        table.name AS table_name, table.key AS table_key, table_description.description AS table_description,
+        table.name AS name, table.key AS key, table_description.description AS description,
-        time_stamp.last_updated_timestamp AS table_last_updated_epoch,
+        time_stamp.last_updated_timestamp AS last_updated_epoch,
        EXTRACT(c in COLLECT(DISTINCT cols)| c.name) AS column_names,
        EXTRACT(cd IN COLLECT(DISTINCT col_description)| cd.description) AS column_descriptions,
        REDUCE(sum_r = 0, r in COLLECT(DISTINCT read)| sum_r + r.read_count) AS total_usage,
        COUNT(DISTINCT user.email) as unique_usage,
-        COLLECT(DISTINCT tags.key) as tag_names
+        COLLECT(DISTINCT tags.key) as tags
        ORDER BY table.name;
        """
    )

--- a/databuilder/models/table_elasticsearch_document.py
+++ b/databuilder/models/table_elasticsearch_document.py
@@ -11,27 +11,28 @@ class TableESDocument(ElasticsearchDocument):
                 database,  # type: str
                 cluster,  # type: str
                 schema_name,  # type: str
-                 table_name,  # type: str
+                 name,  # type: str
-                 table_key,  # type: str
+                 key,  # type: str
-                 table_description,  # type: str
+                 description,  # type: str
-                 table_last_updated_epoch,  # type: Optional[int]
+                 last_updated_epoch,  # type: Optional[int]
                 column_names,  # type: List[str]
                 column_descriptions,  # type: List[str]
                 total_usage,  # type: int
                 unique_usage,  # type: int
-                 tag_names,  # type: List[str]
+                 tags,  # type: List[str]
                 ):
        # type: (...) -> None
        self.database = database
        self.cluster = cluster
        self.schema_name = schema_name
-        self.table_name = table_name
+        self.name = name
-        self.table_key = table_key
+        self.key = key
-        self.table_description = table_description
+        self.description = description
-        self.table_last_updated_epoch = int(table_last_updated_epoch) if table_last_updated_epoch else None
+        # todo: use last_updated_timestamp to match the record in metadata
+        self.last_updated_epoch = int(last_updated_epoch) if last_updated_epoch else None
        self.column_names = column_names
        self.column_descriptions = column_descriptions
        self.total_usage = total_usage
        self.unique_usage = unique_usage
        # todo: will include tag_type once we have better understanding from UI flow.
-        self.tag_names = tag_names
+        self.tags = tags
--- a/setup.py
+++ b/setup.py
 from setuptools import setup, find_packages
-__version__ = '1.3.1'
+__version__ = '1.3.2'
 setup(

--- a/tests/unit/extractor/test_neo4j_extractor.py
+++ b/tests/unit/extractor/test_neo4j_extractor.py
@@ -103,15 +103,15 @@ class TestNeo4jExtractor(unittest.TestCase):
            result_dict = dict(database='test_database',
                               cluster='test_cluster',
                               schema_name='test_schema',
-                               table_name='test_table_name',
+                               name='test_table_name',
-                               table_key='test_table_key',
+                               key='test_table_key',
-                               table_description='test_table_description',
+                               description='test_table_description',
-                               table_last_updated_epoch=123456789,
+                               last_updated_epoch=123456789,
                               column_names=['test_col1', 'test_col2', 'test_col3'],
                               column_descriptions=['test_description1', 'test_description2', ''],
                               total_usage=100,
                               unique_usage=5,
-                               tag_names=['hive'])
+                               tags=['hive'])
            extractor.results = [result_dict]
            result_obj = extractor.extract()

--- a/tests/unit/loader/test_file_system_elasticsearch_json_loader.py
+++ b/tests/unit/loader/test_file_system_elasticsearch_json_loader.py
@@ -64,15 +64,15 @@ class TestFSElasticsearchJSONLoader(unittest.TestCase):
        data = dict(database='test_database',
                    cluster='test_cluster',
                    schema_name='test_schema',
-                    table_name='test_table',
+                    name='test_table',
-                    table_key='test_table_key',
+                    key='test_table_key',
-                    table_last_updated_epoch=123456789,
+                    last_updated_epoch=123456789,
-                    table_description='test_description',
+                    description='test_description',
                    column_names=['test_col1', 'test_col2'],
                    column_descriptions=['test_comment1', 'test_comment2'],
                    total_usage=10,
                    unique_usage=5,
-                    tag_names=['test_tag1', 'test_tag2'])
+                    tags=['test_tag1', 'test_tag2'])
        with self.assertRaises(Exception) as context:
            loader.load(data)  # type: ignore
@@ -92,25 +92,25 @@ class TestFSElasticsearchJSONLoader(unittest.TestCase):
        data = TableESDocument(database='test_database',
                               cluster='test_cluster',
                               schema_name='test_schema',
-                               table_name='test_table',
+                               name='test_table',
-                               table_key='test_table_key',
+                               key='test_table_key',
-                               table_last_updated_epoch=123456789,
+                               last_updated_epoch=123456789,
-                               table_description='test_description',
+                               description='test_description',
                               column_names=['test_col1', 'test_col2'],
                               column_descriptions=['test_comment1', 'test_comment2'],
                               total_usage=10,
                               unique_usage=5,
-                               tag_names=['test_tag1', 'test_tag2'])
+                               tags=['test_tag1', 'test_tag2'])
        loader.load(data)
        loader.close()
        expected = [
-            ('{"table_key": "test_table_key", "column_descriptions": ["test_comment1", "test_comment2"], '
+            ('{"key": "test_table_key", "column_descriptions": ["test_comment1", "test_comment2"], '
             '"schema_name": "test_schema", "database": "test_database", "cluster": "test_cluster", '
-             '"column_names": ["test_col1", "test_col2"], "table_name": "test_table", '
+             '"column_names": ["test_col1", "test_col2"], "name": "test_table", '
-             '"table_last_updated_epoch": 123456789,'
+             '"last_updated_epoch": 123456789,'
-             '"table_description": "test_description", "unique_usage": 5, "total_usage": 10, '
+             '"description": "test_description", "unique_usage": 5, "total_usage": 10, '
-             '"tag_names": ["test_tag1", "test_tag2"]}')
+             '"tags": ["test_tag1", "test_tag2"]}')
        ]
        self._check_results_helper(expected=expected)
@@ -128,27 +128,27 @@ class TestFSElasticsearchJSONLoader(unittest.TestCase):
        data = [TableESDocument(database='test_database',
                                cluster='test_cluster',
                                schema_name='test_schema',
-                                table_name='test_table',
+                                name='test_table',
-                                table_key='test_table_key',
+                                key='test_table_key',
-                                table_last_updated_epoch=123456789,
+                                last_updated_epoch=123456789,
-                                table_description='test_description',
+                                description='test_description',
                                column_names=['test_col1', 'test_col2'],
                                column_descriptions=['test_comment1', 'test_comment2'],
                                total_usage=10,
                                unique_usage=5,
-                                tag_names=['test_tag1', 'test_tag2'])] * 5
+                                tags=['test_tag1', 'test_tag2'])] * 5
        for d in data:
            loader.load(d)
        loader.close()
        expected = [
-            ('{"table_key": "test_table_key", "column_descriptions": ["test_comment1", "test_comment2"], '
+            ('{"key": "test_table_key", "column_descriptions": ["test_comment1", "test_comment2"], '
             '"schema_name": "test_schema", "database": "test_database", "cluster": "test_cluster", '
-             '"column_names": ["test_col1", "test_col2"], "table_name": "test_table", '
+             '"column_names": ["test_col1", "test_col2"], "name": "test_table", '
-             '"table_last_updated_epoch": 123456789,'
+             '"last_updated_epoch": 123456789,'
-             '"table_description": "test_description", "unique_usage": 5, "total_usage": 10, '
+             '"description": "test_description", "unique_usage": 5, "total_usage": 10, '
-             '"tag_names": ["test_tag1", "test_tag2"]}')
+             '"tags": ["test_tag1", "test_tag2"]}')
        ] * 5
        self._check_results_helper(expected=expected)
--- a/tests/unit/models/test_table_elasticsearch_document.py
+++ b/tests/unit/models/test_table_elasticsearch_document.py
@@ -14,28 +14,28 @@ class TestTableElasticsearchDocument(unittest.TestCase):
        test_obj = TableESDocument(database='test_database',
                                   cluster='test_cluster',
                                   schema_name='test_schema',
-                                   table_name='test_table',
+                                   name='test_table',
-                                   table_key='test_table_key',
+                                   key='test_table_key',
-                                   table_last_updated_epoch=123456789,
+                                   last_updated_epoch=123456789,
-                                   table_description='test_table_description',
+                                   description='test_table_description',
                                   column_names=['test_col1', 'test_col2'],
                                   column_descriptions=['test_description1', 'test_description2'],
                                   total_usage=100,
                                   unique_usage=10,
-                                   tag_names=['test'])
+                                   tags=['test'])
        expected_document_dict = {"database": "test_database",
                                  "cluster": "test_cluster",
                                  "schema_name": "test_schema",
-                                  "table_name": "test_table",
+                                  "name": "test_table",
-                                  "table_key": "test_table_key",
+                                  "key": "test_table_key",
-                                  "table_last_updated_epoch": 123456789,
+                                  "last_updated_epoch": 123456789,
-                                  "table_description": "test_table_description",
+                                  "description": "test_table_description",
                                  "column_names": ["test_col1", "test_col2"],
                                  "column_descriptions": ["test_description1", "test_description2"],
                                  "total_usage": 100,
                                  "unique_usage": 10,
-                                  "tag_names": ["test"]
+                                  "tags": ["test"]
                                  }
        result = test_obj.to_json()