Neo4j Publisher to support desired state of relation (#69)

* [AMD-120] Add relation pre-processor in Neo4jPublisher * Update * Added DeleteRelationPreprocessor * Added DeleteRelationPreprocessor * Update * Update

Neo4j Publisher to support desired state of relation (#69)
* [AMD-120] Add relation pre-processor in Neo4jPublisher * Update * Added DeleteRelationPreprocessor * Added DeleteRelationPreprocessor * Update * Update
edce3cbb · Jin Hyuk Chang · GitHub · 014690ea · edce3cbb · edce3cbb
Unverified Commit edce3cbb authored Jun 05, 2019 by Jin Hyuk Chang Committed by GitHub Jun 05, 2019
5 changed files
--- a/databuilder/publisher/neo4j_csv_publisher.py
+++ b/databuilder/publisher/neo4j_csv_publisher.py
@@ -13,6 +13,8 @@ from pyhocon import ConfigTree  # noqa: F401
 from typing import Set, List  # noqa: F401

 from databuilder.publisher.base_publisher import Publisher
+from databuilder.publisher.neo4j_preprocessor import NoopRelationPreprocessor
+

 # Config keys
 # A directory that contains CSV files for nodes
@@ -23,6 +25,8 @@ RELATION_FILES_DIR = 'relation_files_directory'
 NEO4J_END_POINT_KEY = 'neo4j_endpoint'
 # A transaction size that determines how often it commits.
 NEO4J_TRANSCATION_SIZE = 'neo4j_transaction_size'
+# A progress report frequency that determines how often it report the progress.
+NEO4J_PROGRESS_REPORT_FREQUENCY = 'neo4j_progress_report_frequency'
 # A boolean flag to make it fail if relationship is not created
 NEO4J_RELATIONSHIP_CREATION_CONFIRM = 'neo4j_relationship_creation_confirm'

@@ -40,6 +44,8 @@ JOB_PUBLISH_TAG = 'job_publish_tag'
 # Neo4j property name for published tag
 PUBLISHED_TAG_PROPERTY_NAME = 'published_tag'

+RELATION_PREPROCESSOR = 'relation_preprocessor'
+
 # CSV HEADER
 # A header with this suffix will be pass to Neo4j statement without quote
 UNQUOTED_SUFFIX = ':UNQUOTED'
@@ -69,8 +75,10 @@ RELATION_REQUIRED_KEYS = {RELATION_START_LABEL, RELATION_START_KEY,
                          RELATION_TYPE, RELATION_REVERSE_TYPE}

 DEFAULT_CONFIG = ConfigFactory.from_dict({NEO4J_TRANSCATION_SIZE: 500,
+                                          NEO4J_PROGRESS_REPORT_FREQUENCY: 500,
                                          NEO4J_RELATIONSHIP_CREATION_CONFIRM: False,
-                                          NEO4J_MAX_CONN_LIFE_TIME_SEC: 50})
+                                          NEO4J_MAX_CONN_LIFE_TIME_SEC: 50,
+                                          RELATION_PREPROCESSOR: NoopRelationPreprocessor()})

 NODE_MERGE_TEMPLATE = Template("""MERGE (node:$LABEL {key: '${KEY}'})
 ON CREATE SET ${create_prop_body}
@@ -107,6 +115,8 @@ class Neo4jCsvPublisher(Publisher):
        # type: (ConfigTree) -> None
        conf = conf.with_fallback(DEFAULT_CONFIG)

+        self._count = 0  # type: int
+        self._progress_report_frequency = conf.get_int(NEO4J_PROGRESS_REPORT_FREQUENCY)
        self._node_files = self._list_files(conf, NODE_FILES_DIR)
        self._node_files_iter = iter(self._node_files)

@@ -129,6 +139,8 @@ class Neo4jCsvPublisher(Publisher):
        if not self.publish_tag:
            raise Exception('{} should not be empty'.format(JOB_PUBLISH_TAG))

+        self._relation_preprocessor = conf.get(RELATION_PREPROCESSOR)
+
        LOGGER.info('Publishing Node csv files {}, and Relation CSV files {}'
                    .format(self._node_files, self._relation_files))

@@ -146,7 +158,7 @@ class Neo4jCsvPublisher(Publisher):
        path = conf.get_string(path_key)
        return [join(path, f) for f in listdir(path) if isfile(join(path, f))]

-    def publish_impl(self):
+    def publish_impl(self):  # noqa: C901
        # type: () -> None
        """
        Publishes Nodes first and then Relations
@@ -160,23 +172,33 @@ class Neo4jCsvPublisher(Publisher):
            self._create_indices(node_file=node_file)

        LOGGER.info('Publishing Node files: {}'.format(self._node_files))
-        while True:
-            try:
-                node_file = next(self._node_files_iter)
-                self._publish_node(node_file)
-            except StopIteration:
-                break
-
-        LOGGER.info('Publishing Relationship files: {}'.format(self._relation_files))
-        while True:
-            try:
-                relation_file = next(self._relation_files_iter)
-                self._publish_relation(relation_file)
-            except StopIteration:
-                break
-
-        # TODO: Add statsd support
-        LOGGER.info('Successfully published. Elapsed: {} seconds'.format(time.time() - start))
+        try:
+            tx = self._session.begin_transaction()
+            while True:
+                try:
+                    node_file = next(self._node_files_iter)
+                    tx = self._publish_node(node_file, tx=tx)
+                except StopIteration:
+                    break
+
+            LOGGER.info('Publishing Relationship files: {}'.format(self._relation_files))
+            while True:
+                try:
+                    relation_file = next(self._relation_files_iter)
+                    tx = self._publish_relation(relation_file, tx=tx)
+                except StopIteration:
+                    break
+
+            tx.commit()
+            LOGGER.info('Committed total {} statements'.format(self._count))
+
+            # TODO: Add statsd support
+            LOGGER.info('Successfully published. Elapsed: {} seconds'.format(time.time() - start))
+        except Exception as e:
+            LOGGER.exception('Failed to publish. Rolling back.')
+            if not tx.closed():
+                tx.rollback()
+            raise e

    def get_scope(self):
        # type: () -> str
@@ -200,8 +222,8 @@ class Neo4jCsvPublisher(Publisher):

        LOGGER.info('Indices have been created.')

-    def _publish_node(self, node_file):
-        # type: (str) -> None
+    def _publish_node(self, node_file, tx):
+        # type: (str, Transaction) -> Transaction
        """
        Iterate over the csv records of a file, each csv record transform to Merge statement and will be executed.
        All nodes should have a unique key, and this method will try to create unique index on the LABEL when it sees
@@ -218,14 +240,12 @@ class Neo4jCsvPublisher(Publisher):
        :param node_file:
        :return:
        """
-        tx = self._session.begin_transaction()
+
        with open(node_file, 'r') as node_csv:
            for count, node_record in enumerate(csv.DictReader(node_csv)):
                stmt = self.create_node_merge_statement(node_record=node_record)
-                tx = self._execute_statement(stmt, tx, count)
-
-        tx.commit()
-        LOGGER.info('Committed {} records'.format(count + 1))
+                tx = self._execute_statement(stmt, tx)
+        return tx

    def is_create_only_node(self, node_record):
        # type: (dict) -> bool
@@ -257,8 +277,8 @@ class Neo4jCsvPublisher(Publisher):

        return NODE_MERGE_TEMPLATE.substitute(params)

-    def _publish_relation(self, relation_file):
-        # type: (str) -> None
+    def _publish_relation(self, relation_file, tx):
+        # type: (str, Transaction) -> Transaction
        """
        Creates relation between two nodes.
        (In Amundsen, all relation is bi-directional)
@@ -273,15 +293,33 @@ class Neo4jCsvPublisher(Publisher):
        :return:
        """

-        tx = self._session.begin_transaction()
+        if self._relation_preprocessor.is_perform_preprocess():
+            LOGGER.info('Pre-processing relation with {}'.format(self._relation_preprocessor))
+
+            count = 0
+            with open(relation_file, 'r') as relation_csv:
+                for rel_record in csv.DictReader(relation_csv):
+                    stmt, params = self._relation_preprocessor.preprocess_cypher(
+                        start_label=rel_record[RELATION_START_LABEL],
+                        end_label=rel_record[RELATION_END_LABEL],
+                        start_key=rel_record[RELATION_START_KEY],
+                        end_key=rel_record[RELATION_END_KEY],
+                        relation=rel_record[RELATION_TYPE],
+                        reverse_relation=rel_record[RELATION_REVERSE_TYPE])
+
+                    if stmt:
+                        tx = self._execute_statement(stmt, tx=tx, params=params)
+                        count += 1
+
+            LOGGER.info('Executed pre-processing Cypher statement {} times'.format(count))
+
        with open(relation_file, 'r') as relation_csv:
            for count, rel_record in enumerate(csv.DictReader(relation_csv)):
                stmt = self.create_relationship_merge_statement(rel_record=rel_record)
-                tx = self._execute_statement(stmt, tx, count,
+                tx = self._execute_statement(stmt, tx,
                                             expect_result=self._confirm_rel_created)

-        tx.commit()
-        LOGGER.info('Committed {} records'.format(count + 1))
+        return tx

    def create_relationship_merge_statement(self, rel_record):
        # type: (dict) -> str
@@ -352,9 +390,9 @@ ON MATCH SET {update_prop_body}""".format(create_prop_body=create_prop_body,
    def _execute_statement(self,
                           stmt,
                           tx,
-                           count,
+                           params=None,
                           expect_result=False):
-        # type: (str, Transaction, int, bool) -> Transaction
+        # type: (str, Transaction, bool) -> Transaction

        """
        Executes statement against Neo4j. If execution fails, it rollsback and raise exception.
@@ -367,20 +405,24 @@ ON MATCH SET {update_prop_body}""".format(create_prop_body=create_prop_body,
        """
        try:
            if LOGGER.isEnabledFor(logging.DEBUG):
-                LOGGER.debug('Executing statement: {}'.format(stmt))
+                LOGGER.debug('Executing statement: {} with params {}'.format(stmt, params))

            if six.PY2:
-                result = tx.run(unicode(stmt, errors='ignore'))  # noqa
+                result = tx.run(unicode(stmt, errors='ignore'), parameters=params)  # noqa
            else:
-                result = tx.run(str(stmt).encode('utf-8', 'ignore'))
+                result = tx.run(str(stmt).encode('utf-8', 'ignore'), parameters=params)
            if expect_result and not result.single():
                raise RuntimeError('Failed to executed statement: {}'.format(stmt))

-            if count > 1 and count % self._transaction_size == 0:
+            self._count += 1
+            if self._count > 1 and self._count % self._transaction_size == 0:
                tx.commit()
-                LOGGER.info('Committed {} records so far'.format(count))
+                LOGGER.info('Committed {} statements so far'.format(self._count))
                return self._session.begin_transaction()

+            if self._count > 1 and self._count % self._progress_report_frequency == 0:
+                LOGGER.info('Processed {} statements so far'.format(self._count))
+
            return tx
        except Exception as e:
            LOGGER.exception('Failed to execute Cypher query')

--- a/databuilder/publisher/neo4j_preprocessor.py
+++ b/databuilder/publisher/neo4j_preprocessor.py
+import abc
+
+import logging
+import six
+import textwrap
+
+LOGGER = logging.getLogger(__name__)
+
+
+@six.add_metaclass(abc.ABCMeta)
+class RelationPreprocessor(object):
+    """
+    A Preprocessor for relations. Prior to publish Neo4j relations, RelationPreprocessor will be used for
+    pre-processing.
+    Neo4j Publisher will iterate through relation file and call preprocess_cypher to perform any pre-process requested.
+
+    For example, if you need current job's relation data to be desired state, you can add delete statement in
+    pre-process_cypher method. With preprocess_cypher defined, and with long transaction size, Neo4j publisher will
+    atomically apply desired state.
+
+
+    """
+
+    def preprocess_cypher(self,
+                          start_label,
+                          end_label,
+                          start_key,
+                          end_key,
+                          relation,
+                          reverse_relation):
+        # type: (str, str, str, str, str, str) -> Tuple[str, Dict[str, str]]
+        """
+        Provides a Cypher statement that will be executed before publishing relations.
+        :param start_label:
+        :param end_label:
+        :param start_key:
+        :param end_key:
+        :param relation:
+        :param reverse_relation:
+        :return:
+        """
+        if self.filter(start_label=start_label,
+                       end_label=end_label,
+                       start_key=start_key,
+                       end_key=end_key,
+                       relation=relation,
+                       reverse_relation=reverse_relation):
+            return self.preprocess_cypher_impl(start_label=start_label,
+                                               end_label=end_label,
+                                               start_key=start_key,
+                                               end_key=end_key,
+                                               relation=relation,
+                                               reverse_relation=reverse_relation)
+
+    @abc.abstractmethod
+    def preprocess_cypher_impl(self,
+                               start_label,
+                               end_label,
+                               start_key,
+                               end_key,
+                               relation,
+                               reverse_relation):
+        # type: (str, str, str, str, str, str) -> Tuple[str, Dict[str, str]]
+        """
+        Provides a Cypher statement that will be executed before publishing relations.
+        :param start_label:
+        :param end_label:
+        :param relation:
+        :param reverse_relation:
+        :return: A Cypher statement
+        """
+        pass
+
+    def filter(self,
+               start_label,
+               end_label,
+               start_key,
+               end_key,
+               relation,
+               reverse_relation):
+        # type: (str, str, str, str, str, str) -> bool
+        """
+        A method that filters pre-processing in record level. Returns True if it needs preprocessing, otherwise False.
+        :param start_label:
+        :param end_label:
+        :param start_key:
+        :param end_key:
+        :param relation:
+        :param reverse_relation:
+        :return: bool. True if it needs preprocessing, otherwise False.
+        """
+        True
+
+    @abc.abstractmethod
+    def is_perform_preprocess(self):
+        # type: () -> bool
+        """
+        A method for Neo4j Publisher to determine whether to perform pre-processing or not. Regard this method as a
+        global filter.
+        :return: True if you want to enable the pre-processing.
+        """
+        pass
+
+
+class NoopRelationPreprocessor(RelationPreprocessor):
+
+    def preprocess_cypher_impl(self,
+                               start_label,
+                               end_label,
+                               start_key,
+                               end_key,
+                               relation,
+                               reverse_relation):
+        # type: (str, str, str, str, str, str) -> Tuple[str, Dict[str, str]]
+        pass
+
+    def is_perform_preprocess(self):
+        # type: () -> bool
+        return False
+
+
+class DeleteRelationPreprocessor(RelationPreprocessor):
+    """
+    A Relation Pre-processor that delete relationship before Neo4jPublisher publishes relations.
+
+    Example use case: Take an example of an external privacy service trying to push personal identifiable
+    identification (PII) tag into Amundsen. It is fine to push set of PII tags for the first push, but it becomes a
+    challenge when it comes to following update as external service does not know current PII state in Amundsen.
+
+    The easy solution is for external service to know desired state (certain columns should have certain PII tags),
+    and push that information.
+    Now the challenge is how Amundsen apply desired state. This is where DeleteRelationPreprocessor comes into the
+    picture. We can utilize DeleteRelationPreprocessor to let it delete certain relations in the job,
+    and let Neo4jPublisher update to desired state. Should there be a small window (between delete and update) that
+    Amundsen data is not complete, you can increase Neo4jPublisher's transaction size to make it atomic. However,
+    note that you should not set transaction size too big as Neo4j uses memory to store transaction and this use case
+    is proper for small size of batch job.
+    """
+    RELATION_MERGE_TEMPLATE = textwrap.dedent("""
+    MATCH (n1:{start_label} {{key: $start_key }})-[r]-(n2:{end_label} {{key: $end_key }})
+    {where_clause}
+    WITH r LIMIT 2
+    DELETE r
+    RETURN count(*) as count;
+    """)
+
+    def __init__(self, label_tuples=None, where_clause=''):
+        # type: (List[Tuple[str, str]], str) -> None
+        super(DeleteRelationPreprocessor, self).__init__()
+        self._label_tuples = set(label_tuples) if label_tuples else set()
+
+        reversed_label_tuples = [(t2, t1) for t1, t2 in self._label_tuples]
+        self._label_tuples.update(reversed_label_tuples)
+        self._where_clause = where_clause
+
+    def preprocess_cypher_impl(self,
+                               start_label,
+                               end_label,
+                               start_key,
+                               end_key,
+                               relation,
+                               reverse_relation):
+        # type: (str, str, str, str, str, str) -> Tuple[str, Dict[str, str]]
+        """
+        Provides DELETE Relation Cypher query on specific relation.
+        :param start_label:
+        :param end_label:
+        :param start_key:
+        :param end_key:
+        :param relation:
+        :param reverse_relation:
+        :return:
+        """
+
+        if not (start_label or end_label or start_key or end_key):
+            raise Exception('all labels and keys are required: {}'.format(locals()))
+
+        params = {'start_key': start_key, 'end_key': end_key}
+        return DeleteRelationPreprocessor.RELATION_MERGE_TEMPLATE.format(start_label=start_label,
+                                                                         end_label=end_label,
+                                                                         where_clause=self._where_clause), params
+
+    def is_perform_preprocess(self):
+        # type: () -> bool
+        return True
+
+    def filter(self,
+               start_label,
+               end_label,
+               start_key,
+               end_key,
+               relation,
+               reverse_relation):
+        # type: (str, str, str, str, str, str) -> bool
+        """
+        If pair of labels is what client requested passed through label_tuples, filter will return True meaning that
+        it needs to be pre-processed.
+        :param start_label:
+        :param end_label:
+        :param start_key:
+        :param end_key:
+        :param relation:
+        :param reverse_relation:
+        :return: bool. True if it needs preprocessing, otherwise False.
+        """
+        if self._label_tuples and (start_label, end_label) not in self._label_tuples:
+            return False
+
+        return True
--- a/databuilder/task/task.py
+++ b/databuilder/task/task.py
@@ -12,11 +12,18 @@ from databuilder.transformer.base_transformer \
 from databuilder.utils.closer import Closer


+LOGGER = logging.getLogger(__name__)
+
+
 class DefaultTask(Task):
    """
    A default task expecting to extract, transform and load.

    """
+
+    # Determines the frequency of the log on task progress
+    PROGRESS_REPORT_FREQUENCY = 'progress_report_frequency'
+
    def __init__(self,
                 extractor,
                 loader,
@@ -33,6 +40,9 @@ class DefaultTask(Task):

    def init(self, conf):
        # type: (ConfigTree) -> None
+        self._progress_report_frequency = \
+            conf.get_int('{}.{}'.format(self.get_scope(), DefaultTask.PROGRESS_REPORT_FREQUENCY), 500)
+
        self.extractor.init(Scoped.get_scoped_conf(conf, self.extractor.get_scope()))
        self.transformer.init(Scoped.get_scoped_conf(conf, self.transformer.get_scope()))
        self.loader.init(Scoped.get_scoped_conf(conf, self.loader.get_scope()))
@@ -43,15 +53,19 @@ class DefaultTask(Task):
        Runs a task
        :return:
        """
-        logging.info('Running a task')
+        LOGGER.info('Running a task')
        try:
            record = self.extractor.extract()
-
+            count = 1
            while record:
                record = self.transformer.transform(record)
                if not record:
                    continue
                self.loader.load(record)
                record = self.extractor.extract()
+                count += 1
+                if count > 0 and count % self._progress_report_frequency == 0:
+                    LOGGER.info('Extracted {} records so far'.format(count))
+
        finally:
            self._closer.close()
--- a/tests/unit/publisher/test_neo4j_csv_publisher.py
+++ b/tests/unit/publisher/test_neo4j_csv_publisher.py
@@ -16,7 +16,7 @@ class TestPublish(unittest.TestCase):
    def setUp(self):
        # type: () -> None
        logging.basicConfig(level=logging.INFO)
-        self._resource_path = '{}/../resources/csv_publisher'\
+        self._resource_path = '{}/../resources/csv_publisher' \
            .format(os.path.join(os.path.dirname(__file__)))

    def test_publisher(self):
@@ -36,12 +36,9 @@ class TestPublish(unittest.TestCase):
            publisher = Neo4jCsvPublisher()

            conf = ConfigFactory.from_dict(
-                {neo4j_csv_publisher.NEO4J_END_POINT_KEY:
-                 'dummy://999.999.999.999:7687/',
-                 neo4j_csv_publisher.NODE_FILES_DIR:
-                 '{}/nodes'.format(self._resource_path),
-                 neo4j_csv_publisher.RELATION_FILES_DIR:
-                 '{}/relations'.format(self._resource_path),
+                {neo4j_csv_publisher.NEO4J_END_POINT_KEY: 'dummy://999.999.999.999:7687/',
+                 neo4j_csv_publisher.NODE_FILES_DIR: '{}/nodes'.format(self._resource_path),
+                 neo4j_csv_publisher.RELATION_FILES_DIR: '{}/relations'.format(self._resource_path),
                 neo4j_csv_publisher.NEO4J_USER: 'neo4j_user',
                 neo4j_csv_publisher.NEO4J_PASSWORD: 'neo4j_password',
                 neo4j_csv_publisher.JOB_PUBLISH_TAG: '{}'.format(uuid.uuid4())}
@@ -52,7 +49,44 @@ class TestPublish(unittest.TestCase):
            self.assertEqual(mock_run.call_count, 6)

            # 2 node files, 1 relation file
-            self.assertEqual(mock_commit.call_count, 3)
+            self.assertEqual(mock_commit.call_count, 1)
+
+    def test_preprocessor(self):
+        # type: () -> None
+        with patch.object(GraphDatabase, 'driver') as mock_driver:
+            mock_session = MagicMock()
+            mock_driver.return_value.session.return_value = mock_session
+
+            mock_transaction = MagicMock()
+            mock_session.begin_transaction.return_value = mock_transaction
+
+            mock_run = MagicMock()
+            mock_transaction.run = mock_run
+            mock_commit = MagicMock()
+            mock_transaction.commit = mock_commit
+
+            mock_preprocessor = MagicMock()
+            mock_preprocessor.is_perform_preprocess.return_value = MagicMock(return_value=True)
+            mock_preprocessor.preprocess_cypher.return_value = ('MATCH (f:Foo) RETURN f', {})
+
+            publisher = Neo4jCsvPublisher()
+
+            conf = ConfigFactory.from_dict(
+                {neo4j_csv_publisher.NEO4J_END_POINT_KEY: 'dummy://999.999.999.999:7687/',
+                 neo4j_csv_publisher.NODE_FILES_DIR: '{}/nodes'.format(self._resource_path),
+                 neo4j_csv_publisher.RELATION_FILES_DIR: '{}/relations'.format(self._resource_path),
+                 neo4j_csv_publisher.RELATION_PREPROCESSOR: mock_preprocessor,
+                 neo4j_csv_publisher.NEO4J_USER: 'neo4j_user',
+                 neo4j_csv_publisher.NEO4J_PASSWORD: 'neo4j_password',
+                 neo4j_csv_publisher.JOB_PUBLISH_TAG: '{}'.format(uuid.uuid4())}
+            )
+            publisher.init(conf)
+            publisher.publish()
+
+            self.assertEqual(mock_run.call_count, 8)
+
+            # 2 node files, 1 relation file
+            self.assertEqual(mock_commit.call_count, 1)


 if __name__ == '__main__':

--- a/tests/unit/publisher/test_neo4j_preprocessor.py
+++ b/tests/unit/publisher/test_neo4j_preprocessor.py
+import textwrap
+import unittest
+import uuid
+
+from databuilder.publisher.neo4j_preprocessor import NoopRelationPreprocessor, DeleteRelationPreprocessor
+
+
+class TestNeo4jPreprocessor(unittest.TestCase):
+
+    def testNoopRelationPreprocessor(self):
+        # type () -> None
+        preprocessor = NoopRelationPreprocessor()
+
+        self.assertTrue(not preprocessor.is_perform_preprocess())
+
+    def testDeleteRelationPreprocessor(self):  # noqa: W293
+        preprocessor = DeleteRelationPreprocessor()
+
+        self.assertTrue(preprocessor.is_perform_preprocess())
+
+        preprocessor.filter(start_label='foo_label',
+                            end_label='bar_label',
+                            start_key='foo_key',
+                            end_key='bar_key',
+                            relation='foo_relation',
+                            reverse_relation='bar_relation')
+
+        self.assertTrue(preprocessor.filter(start_label=str(uuid.uuid4()),
+                                            end_label=str(uuid.uuid4()),
+                                            start_key=str(uuid.uuid4()),
+                                            end_key=str(uuid.uuid4()),
+                                            relation=str(uuid.uuid4()),
+                                            reverse_relation=str(uuid.uuid4())))
+
+        actual = preprocessor.preprocess_cypher(start_label='foo_label',
+                                                end_label='bar_label',
+                                                start_key='foo_key',
+                                                end_key='bar_key',
+                                                relation='foo_relation',
+                                                reverse_relation='bar_relation')
+
+        expected = (textwrap.dedent("""
+    MATCH (n1:foo_label {key: $start_key })-[r]-(n2:bar_label {key: $end_key })
+
+    WITH r LIMIT 2
+    DELETE r
+    RETURN count(*) as count;
+    """), {'start_key': 'foo_key', 'end_key': 'bar_key'})
+
+        self.assertEqual(expected, actual)
+
+    def testDeleteRelationPreprocessorFilter(self):
+        preprocessor = DeleteRelationPreprocessor(label_tuples=[('foo', 'bar')])
+
+        self.assertTrue(preprocessor.filter(start_label='foo',
+                                            end_label='bar',
+                                            start_key=str(uuid.uuid4()),
+                                            end_key=str(uuid.uuid4()),
+                                            relation=str(uuid.uuid4()),
+                                            reverse_relation=str(uuid.uuid4())))
+
+        self.assertTrue(preprocessor.filter(start_label='bar',
+                                            end_label='foo',
+                                            start_key=str(uuid.uuid4()),
+                                            end_key=str(uuid.uuid4()),
+                                            relation=str(uuid.uuid4()),
+                                            reverse_relation=str(uuid.uuid4())))
+
+        self.assertFalse(preprocessor.filter(start_label='foz',
+                                             end_label='baz',
+                                             start_key=str(uuid.uuid4()),
+                                             end_key=str(uuid.uuid4()),
+                                             relation=str(uuid.uuid4()),
+                                             reverse_relation=str(uuid.uuid4())))
+
+
+if __name__ == '__main__':
+    unittest.main()