Added Dashboard Elasticsearch mapping (#261)

* Added Dashboard ES mapping * Update * Added user ES mapping

Added Dashboard Elasticsearch mapping (#261)
* Added Dashboard ES mapping * Update * Added user ES mapping
c81aacf8 · Jin Hyuk Chang · GitHub · f4daa679 · c81aacf8 · c81aacf8
Unverified Commit c81aacf8 authored May 12, 2020 by Jin Hyuk Chang Committed by GitHub May 12, 2020
Showing with 212 additions and 93 deletions

elasticsearch_constants.py databuilder/publisher/elasticsearch_constants.py +207 -0

elasticsearch_publisher.py databuilder/publisher/elasticsearch_publisher.py +4 -92

setup.py setup.py +1 -1

No files found.
--- a/databuilder/publisher/elasticsearch_constants.py
+++ b/databuilder/publisher/elasticsearch_constants.py
+import textwrap
+# Documentation: https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping.html
+# Setting type to "text" for all fields that would be used in search
+# Using Simple Analyzer to convert all text into search terms
+# https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-simple-analyzer.html
+# Standard Analyzer is used for all text fields that don't explicitly specify an analyzer
+# https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-standard-analyzer.html
+# TODO use amundsencommon for this when this project is updated to py3
+TABLE_ELASTICSEARCH_INDEX_MAPPING = textwrap.dedent(
+    """
+    {
+    "mappings":{
+        "table":{
+          "properties": {
+            "name": {
+              "type":"text",
+              "analyzer": "simple",
+              "fields": {
+                "raw": {
+                  "type": "keyword"
+                }
+              }
+            },
+            "schema": {
+              "type":"text",
+              "analyzer": "simple",
+              "fields": {
+                "raw": {
+                  "type": "keyword"
+                }
+              }
+            },
+            "display_name": {
+              "type": "keyword"
+            },
+            "last_updated_timestamp": {
+              "type": "date",
+              "format": "epoch_second"
+            },
+            "description": {
+              "type": "text",
+              "analyzer": "simple"
+            },
+            "column_names": {
+              "type":"text",
+              "analyzer": "simple",
+              "fields": {
+                "raw": {
+                  "type": "keyword"
+                }
+              }
+            },
+            "column_descriptions": {
+              "type": "text",
+              "analyzer": "simple"
+            },
+            "tags": {
+              "type": "keyword"
+            },
+            "badges": {
+              "type": "keyword"
+            },
+            "cluster": {
+              "type": "text"
+            },
+            "database": {
+              "type": "text",
+              "analyzer": "simple",
+              "fields": {
+                "raw": {
+                  "type": "keyword"
+                }
+              }
+            },
+            "key": {
+              "type": "keyword"
+            },
+            "total_usage":{
+              "type": "long"
+            },
+            "unique_usage": {
+              "type": "long"
+            }
+          }
+        }
+      }
+    }
+    """
+)
+DASHBOARD_ELASTICSEARCH_INDEX_MAPPING = textwrap.dedent(
+    """
+    {
+        "mappings":{
+            "dashboard":{
+              "properties": {
+                "group_name": {
+                  "type":"text",
+                  "analyzer": "simple",
+                  "fields": {
+                    "raw": {
+                      "type": "keyword"
+                    }
+                  }
+                },
+                "name": {
+                  "type":"text",
+                  "analyzer": "simple",
+                  "fields": {
+                    "raw": {
+                      "type": "keyword"
+                    }
+                  }
+                },
+                "description": {
+                  "type":"text",
+                  "analyzer": "simple",
+                  "fields": {
+                    "raw": {
+                      "type": "keyword"
+                    }
+                  }
+                },
+                "group_description": {
+                  "type":"text",
+                  "analyzer": "simple",
+                  "fields": {
+                    "raw": {
+                      "type": "keyword"
+                    }
+                  }
+                },
+                "query_names": {
+                  "type":"text",
+                  "analyzer": "simple",
+                  "fields": {
+                    "raw": {
+                      "type": "keyword"
+                    }
+                  }
+                }
+              }
+            }
+          }
+        }
+    """
+)
+USER_ELASTICSEARCH_INDEX_MAPPING = textwrap.dedent(
+    """
+    {
+        "mappings":{
+            "user":{
+              "properties": {
+                "email": {
+                  "type":"text",
+                  "analyzer": "simple",
+                  "fields": {
+                    "raw": {
+                      "type": "keyword"
+                    }
+                  }
+                },
+                "first_name": {
+                  "type":"text",
+                  "analyzer": "simple",
+                  "fields": {
+                    "raw": {
+                      "type": "keyword"
+                    }
+                  }
+                },
+                "last_name": {
+                  "type":"text",
+                  "analyzer": "simple",
+                  "fields": {
+                    "raw": {
+                      "type": "keyword"
+                    }
+                  }
+                },
+                "full_name": {
+                  "type":"text",
+                  "analyzer": "simple",
+                  "fields": {
+                    "raw": {
+                      "type": "keyword"
+                    }
+                  }
+                },
+                "total_read":{
+                  "type": "long"
+                },
+                "total_own": {
+                  "type": "long"
+                },
+                "total_follow": {
+                  "type": "long"
+                }
+              }
+            }
+          }
+        }
+    """
+)
--- a/databuilder/publisher/elasticsearch_publisher.py
+++ b/databuilder/publisher/elasticsearch_publisher.py
 import json
 import logging
-import textwrap
-from typing import List  # noqa: F401
-from pyhocon import ConfigTree  # noqa: F401
 from elasticsearch.exceptions import NotFoundError
+from pyhocon import ConfigTree  # noqa: F401
+from typing import List  # noqa: F401
 from databuilder.publisher.base_publisher import Publisher
+from databuilder.publisher.elasticsearch_constants import TABLE_ELASTICSEARCH_INDEX_MAPPING
 LOGGER = logging.getLogger(__name__)
@@ -32,95 +32,7 @@ class ElasticsearchPublisher(Publisher):
    # config to control how many max documents to publish at a time
    ELASTICSEARCH_PUBLISHER_BATCH_SIZE = 'batch_size'
-    # Specifying default mapping for elasticsearch index
+    DEFAULT_ELASTICSEARCH_INDEX_MAPPING = TABLE_ELASTICSEARCH_INDEX_MAPPING
-    # Documentation: https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping.html
-    # Setting type to "text" for all fields that would be used in search
-    # Using Simple Analyzer to convert all text into search terms
-    # https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-simple-analyzer.html
-    # Standard Analyzer is used for all text fields that don't explicitly specify an analyzer
-    # https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-standard-analyzer.html
-    # TODO use amundsencommon for this when this project is updated to py3
-    DEFAULT_ELASTICSEARCH_INDEX_MAPPING = textwrap.dedent(
-        """
-        {
-        "mappings":{
-            "table":{
-              "properties": {
-                "name": {
-                  "type":"text",
-                  "analyzer": "simple",
-                  "fields": {
-                    "raw": {
-                      "type": "keyword"
-                    }
-                  }
-                },
-                "schema": {
-                  "type":"text",
-                  "analyzer": "simple",
-                  "fields": {
-                    "raw": {
-                      "type": "keyword"
-                    }
-                  }
-                },
-                "display_name": {
-                  "type": "keyword"
-                },
-                "last_updated_timestamp": {
-                  "type": "date",
-                  "format": "epoch_second"
-                },
-                "description": {
-                  "type": "text",
-                  "analyzer": "simple"
-                },
-                "column_names": {
-                  "type":"text",
-                  "analyzer": "simple",
-                  "fields": {
-                    "raw": {
-                      "type": "keyword"
-                    }
-                  }
-                },
-                "column_descriptions": {
-                  "type": "text",
-                  "analyzer": "simple"
-                },
-                "tags": {
-                  "type": "keyword"
-                },
-                "badges": {
-                  "type": "keyword"
-                },
-                "cluster": {
-                  "type": "text"
-                },
-                "database": {
-                  "type": "text",
-                  "analyzer": "simple",
-                  "fields": {
-                    "raw": {
-                      "type": "keyword"
-                    }
-                  }
-                },
-                "key": {
-                  "type": "keyword"
-                },
-                "total_usage":{
-                  "type": "long"
-                },
-                "unique_usage": {
-                  "type": "long"
-                }
-              }
-            }
-          }
-        }
-        """
-    )
    def __init__(self):
        # type: () -> None

--- a/setup.py
+++ b/setup.py
@@ -2,7 +2,7 @@ import os
 from setuptools import setup, find_packages
-__version__ = '2.5.13'
+__version__ = '2.5.14'
 requirements_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'requirements.txt')
 with open(requirements_path) as requirements_file: