Add sample table tag data to quickstart (#153)

* Add sample table tag data to quickstart * Add sample table tag data to quickstart

Add sample table tag data to quickstart (#153)
* Add sample table tag data to quickstart * Add sample table tag data to quickstart
020bb6f3 · Mikhail Ivanov · Tao Feng · 92a36201 · 020bb6f3 · 020bb6f3
Commit 020bb6f3 authored Oct 23, 2019 by Mikhail Ivanov Committed by Tao Feng Oct 23, 2019
5 changed files
--- a/databuilder/models/table_metadata.py
+++ b/databuilder/models/table_metadata.py
@@ -26,6 +26,8 @@ class TagMetadata:
    @staticmethod
    def get_tag_key(name):
        # type: (str) -> str
+        if not name:
+            return ''
        return TagMetadata.TAG_KEY_FORMAT.format(tag=name)


@@ -134,12 +136,11 @@ class TableMetadata(Neo4jCsvSerializable):
                 description,  # type: Union[str, None]
                 columns=None,  # type: Iterable[ColumnMetadata]
                 is_view=False,  # type: bool
-                 tags=None,  # type: List
+                 tags=None,  # type: Union[List, str]
                 **kwargs  # type: Dict
                 ):
        # type: (...) -> None
        """
-        TODO: Add owners
        :param database:
        :param cluster:
        :param schema_name:
@@ -147,6 +148,7 @@ class TableMetadata(Neo4jCsvSerializable):
        :param description:
        :param columns:
        :param is_view: Indicate whether the table is a view or not
+        :param tags:
        :param kwargs: Put additional attributes to the table model if there is any.
        """
        self.database = database
@@ -157,6 +159,10 @@ class TableMetadata(Neo4jCsvSerializable):
        self.columns = columns if columns else []
        self.is_view = is_view
        self.attrs = None
+        if isinstance(tags, str):
+            tags = tags.split(',')
+        if isinstance(tags, list):
+            tags = [tag.lower().strip() for tag in tags]
        self.tags = tags

        if kwargs:
@@ -167,13 +173,15 @@ class TableMetadata(Neo4jCsvSerializable):

    def __repr__(self):
        # type: () -> str
-        return 'TableMetadata({!r}, {!r}, {!r}, {!r}, {!r}, {!r}, {!r})'.format(self.database,
-                                                                                self.cluster,
-                                                                                self.schema_name,
-                                                                                self.name,
-                                                                                self.description,
-                                                                                self.columns,
-                                                                                self.is_view)
+        return 'TableMetadata({!r}, {!r}, {!r}, {!r} ' \
+            '{!r}, {!r}, {!r}, {!r})'.format(self.database,
+                                             self.cluster,
+                                             self.schema_name,
+                                             self.name,
+                                             self.description,
+                                             self.columns,
+                                             self.is_view,
+                                             self.tags)

    def _get_table_key(self):
        # type: () -> str

--- a/example/sample_data/sample_table.csv
+++ b/example/sample_data/sample_table.csv
-database,cluster,schema_name,table_name,table_desc
-hive,gold,test_schema,test_table1,"1st test table"
-dynamo,gold,test_schema,test_table2,"2nd test table"
+database,cluster,schema_name,table_name,table_desc,tags
+hive,gold,test_schema,test_table1,"1st test table","tag1,tag2"
+dynamo,gold,test_schema,test_table2,"2nd test table",
--- a/example/scripts/sample_data_loader.py
+++ b/example/scripts/sample_data_loader.py
@@ -62,7 +62,8 @@ def load_table_data_from_csv(file_name):
                    'cluster VARCHAR(64) NOT NULL, '
                    'schema_name VARCHAR(64) NOT NULL,'
                    'name VARCHAR(64) NOT NULL,'
-                    'description VARCHAR(64) NOT NULL)')
+                    'description VARCHAR(64) NOT NULL, '
+                    'tags VARCHAR(128) NOT NULL)')
        file_loc = 'example/sample_data/' + file_name
        with open(file_loc, 'r') as fin:
            dr = csv.DictReader(fin)
@@ -70,10 +71,11 @@ def load_table_data_from_csv(file_name):
                      i['cluster'],
                      i['schema_name'],
                      i['table_name'],
-                      i['table_desc']) for i in dr]
+                      i['table_desc'],
+                      i['tags']) for i in dr]

        cur.executemany("INSERT INTO test_table_metadata (database, cluster, "
-                        "schema_name, name, description) VALUES (?, ?, ?, ?, ?);", to_db)
+                        "schema_name, name, description, tags) VALUES (?, ?, ?, ?, ?, ?);", to_db)
        conn.commit()



--- a/setup.py
+++ b/setup.py
@@ -2,7 +2,7 @@ import os
 from setuptools import setup, find_packages


-__version__ = '1.4.8'
+__version__ = '1.4.9'


 requirements_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'requirements.txt')

--- a/tests/unit/models/test_table_metadata.py
+++ b/tests/unit/models/test_table_metadata.py
@@ -35,6 +35,9 @@ class TestTableMetadata(unittest.TestCase):
            ColumnMetadata('test_id1', 'description of test_table1', 'bigint', 0, ['col-tag1', 'col-tag2'])],
            is_view=False, tags=['tag1', 'tag2'], attr1='uri', attr2='attr2')

+        self.table_metadata5 = TableMetadata('hive', 'gold', 'test_schema5', 'test_table5', 'test_table5', [
+            ColumnMetadata('test_id1', 'description of test_table1', 'bigint', 0)], tags="tag3, tag4")
+
        self.expected_nodes_deduped = [
            {'name': 'test_table1', 'KEY': 'hive://gold.test_schema1/test_table1', 'LABEL': 'Table',
             'is_view:UNQUOTED': False},
@@ -196,6 +199,33 @@ class TestTableMetadata(unittest.TestCase):
        self.assertEqual(actual[6], expected_col_tag_rel1)
        self.assertEqual(actual[7], expected_col_tag_rel2)

+        # Test table tag field populated from str
+        node_row = self.table_metadata5.next_node()
+        actual = []
+        while node_row:
+            actual.append(node_row)
+            node_row = self.table_metadata5.next_node()
+
+        self.assertEqual(actual[2].get('LABEL'), 'Tag')
+        self.assertEqual(actual[2].get('KEY'), 'tag3')
+        self.assertEqual(actual[3].get('KEY'), 'tag4')
+
+        relation_row = self.table_metadata5.next_relation()
+        actual = []
+        while relation_row:
+            actual.append(relation_row)
+            relation_row = self.table_metadata5.next_relation()
+
+        # Table tag relationship
+        expected_tab_tag_rel3 = {'END_KEY': 'tag3', 'START_LABEL': 'Table', 'END_LABEL':
+                                 'Tag', 'START_KEY': 'hive://gold.test_schema5/test_table5',
+                                 'TYPE': 'TAGGED_BY', 'REVERSE_TYPE': 'TAG'}
+        expected_tab_tag_rel4 = {'END_KEY': 'tag4', 'START_LABEL': 'Table',
+                                 'END_LABEL': 'Tag', 'START_KEY': 'hive://gold.test_schema5/test_table5',
+                                 'TYPE': 'TAGGED_BY', 'REVERSE_TYPE': 'TAG'}
+        self.assertEqual(actual[2], expected_tab_tag_rel3)
+        self.assertEqual(actual[3], expected_tab_tag_rel4)
+

 if __name__ == '__main__':
    unittest.main()