Unverified Commit 6d5ffa49 authored by Jacob Kim's avatar Jacob Kim Committed by GitHub

Fix Sample Scripts and Data (#199)

* Change schema_name -> schema in sample_table_programmatic_source.csv

* Update schema_name -> schema in sample_data_loader

* Change name -> full_name in  sample_user.csv

* Change name -> full_name in sample_data_loader.py

* Retrigger CLA Check
parent 6c4bd0ba
database,cluster,schema_name,name,description,tags,description_source database,cluster,schema,name,description,tags,description_source
hive,gold,test_schema,test_table1,"**Size**: 50T\n\n**Monthly Cost**: $5000","expensive","s3_crawler" hive,gold,test_schema,test_table1,"**Size**: 50T\n\n**Monthly Cost**: $5000","expensive","s3_crawler"
dynamo,gold,test_schema,test_table2,"**Size**: 1T\n\n**Monthly Cost**: $50","cheap","s3_crawler" dynamo,gold,test_schema,test_table2,"**Size**: 1T\n\n**Monthly Cost**: $50","cheap","s3_crawler"
hive,gold,test_schema,test_table1,"### Quality Report:\n --- \n Ipsus enom. Ipsus enom ipsus lorenum.\n ---\n[![Build Status](https://api.travis-ci.com/lyft/amundsendatabuilder.svg?branch=master)](https://travis-ci.com/lyft/amundsendatabuilder)","low_quality","quality_service" hive,gold,test_schema,test_table1,"### Quality Report:\n --- \n Ipsus enom. Ipsus enom ipsus lorenum.\n ---\n[![Build Status](https://api.travis-ci.com/lyft/amundsendatabuilder.svg?branch=master)](https://travis-ci.com/lyft/amundsendatabuilder)","low_quality","quality_service"
\ No newline at end of file
email,first_name,last_name,name,github_username,team_name,employee_type,manager_email,slack_id email,first_name,last_name,full_name,github_username,team_name,employee_type,manager_email,slack_id
roald.amundsen@example.org,Roald,Amundsen,"Roald Amundsen",lyft,"Team Amundsen",sailor,"phboss@example.org",ramundzn roald.amundsen@example.org,Roald,Amundsen,"Roald Amundsen",lyft,"Team Amundsen",sailor,"phboss@example.org",ramundzn
chrisc@example.org,Christopher,Columbus,"Christopher Columbus",ChristopherColumbusFAKE,"Team Amundsen",sailor,"phboss@example.org",chrisc chrisc@example.org,Christopher,Columbus,"Christopher Columbus",ChristopherColumbusFAKE,"Team Amundsen",sailor,"phboss@example.org",chrisc
buzz@example.org, Buzz, Aldrin,"Buzz Aldrin",BuzzAldrinFAKE,"Team Amundsen",astronaut,"phboss@example.org",buzz buzz@example.org,Buzz,Aldrin,"Buzz Aldrin",BuzzAldrinFAKE,"Team Amundsen",astronaut,"phboss@example.org",buzz
...@@ -190,7 +190,7 @@ def load_user_data_from_csv(file_name): ...@@ -190,7 +190,7 @@ def load_user_data_from_csv(file_name):
'(email VARCHAR(64) NOT NULL , ' '(email VARCHAR(64) NOT NULL , '
'first_name VARCHAR(64) NOT NULL , ' 'first_name VARCHAR(64) NOT NULL , '
'last_name VARCHAR(64) NOT NULL , ' 'last_name VARCHAR(64) NOT NULL , '
'name VARCHAR(64) NOT NULL , ' 'full_name VARCHAR(64) NOT NULL , '
'github_username VARCHAR(64) NOT NULL , ' 'github_username VARCHAR(64) NOT NULL , '
'team_name VARCHAR(64) NOT NULL, ' 'team_name VARCHAR(64) NOT NULL, '
'employee_type VARCHAR(64) NOT NULL,' 'employee_type VARCHAR(64) NOT NULL,'
...@@ -202,7 +202,7 @@ def load_user_data_from_csv(file_name): ...@@ -202,7 +202,7 @@ def load_user_data_from_csv(file_name):
to_db = [(i['email'], to_db = [(i['email'],
i['first_name'], i['first_name'],
i['last_name'], i['last_name'],
i['name'], i['full_name'],
i['github_username'], i['github_username'],
i['team_name'], i['team_name'],
i['employee_type'], i['employee_type'],
...@@ -210,7 +210,7 @@ def load_user_data_from_csv(file_name): ...@@ -210,7 +210,7 @@ def load_user_data_from_csv(file_name):
i['slack_id']) for i in dr] i['slack_id']) for i in dr]
cur.executemany("INSERT INTO test_user_metadata (" cur.executemany("INSERT INTO test_user_metadata ("
"email, first_name, last_name, name, github_username, " "email, first_name, last_name, full_name, github_username, "
"team_name, employee_type, " "team_name, employee_type, "
"manager_email, slack_id ) VALUES " "manager_email, slack_id ) VALUES "
"(?, ?, ?, ?, ?, ?, ?, ?, ?);", to_db) "(?, ?, ?, ?, ?, ?, ?, ?, ?);", to_db)
...@@ -434,7 +434,7 @@ def create_last_updated_job(): ...@@ -434,7 +434,7 @@ def create_last_updated_job():
'publisher.neo4j.{}'.format(neo4j_csv_publisher.NEO4J_PASSWORD): 'publisher.neo4j.{}'.format(neo4j_csv_publisher.NEO4J_PASSWORD):
neo4j_password, neo4j_password,
'publisher.neo4j.{}'.format(neo4j_csv_publisher.JOB_PUBLISH_TAG): 'publisher.neo4j.{}'.format(neo4j_csv_publisher.JOB_PUBLISH_TAG):
'unique_lastupdated_tag', # should use unique tag here like {ds} 'unique_last_updated_tag', # should use unique tag here like {ds}
}) })
job = DefaultJob(conf=job_config, job = DefaultJob(conf=job_config,
...@@ -543,7 +543,7 @@ class CSVTableColumnExtractor(Extractor): ...@@ -543,7 +543,7 @@ class CSVTableColumnExtractor(Extractor):
for column_dict in self.columns: for column_dict in self.columns:
db = column_dict['database'] db = column_dict['database']
cluster = column_dict['cluster'] cluster = column_dict['cluster']
schema = column_dict['schema_name'] schema = column_dict['schema']
table = column_dict['table_name'] table = column_dict['table_name']
id = self._get_key(db, cluster, schema, table) id = self._get_key(db, cluster, schema, table)
column = ColumnMetadata( column = ColumnMetadata(
...@@ -562,7 +562,7 @@ class CSVTableColumnExtractor(Extractor): ...@@ -562,7 +562,7 @@ class CSVTableColumnExtractor(Extractor):
for table_dict in tables: for table_dict in tables:
db = table_dict['database'] db = table_dict['database']
cluster = table_dict['cluster'] cluster = table_dict['cluster']
schema = table_dict['schema_name'] schema = table_dict['schema']
table = table_dict['name'] table = table_dict['name']
id = self._get_key(db, cluster, schema, table) id = self._get_key(db, cluster, schema, table)
columns = parsed_columns[id] columns = parsed_columns[id]
...@@ -570,7 +570,7 @@ class CSVTableColumnExtractor(Extractor): ...@@ -570,7 +570,7 @@ class CSVTableColumnExtractor(Extractor):
columns = [] columns = []
table = TableMetadata(database=table_dict['database'], table = TableMetadata(database=table_dict['database'],
cluster=table_dict['cluster'], cluster=table_dict['cluster'],
schema_name=table_dict['schema_name'], schema=table_dict['schema'],
name=table_dict['name'], name=table_dict['name'],
description=table_dict['description'], description=table_dict['description'],
columns=columns, columns=columns,
...@@ -732,7 +732,7 @@ if __name__ == "__main__": ...@@ -732,7 +732,7 @@ if __name__ == "__main__":
with user, a, b, c, read, own, follow, manager with user, a, b, c, read, own, follow, manager
where user.full_name is not null where user.full_name is not null
return user.email as email, user.first_name as first_name, user.last_name as last_name, return user.email as email, user.first_name as first_name, user.last_name as last_name,
user.full_name as name, user.github_username as github_username, user.team_name as team_name, user.full_name as full_name, user.github_username as github_username, user.team_name as team_name,
user.employee_type as employee_type, manager.email as manager_email, user.slack_id as slack_id, user.employee_type as employee_type, manager.email as manager_email, user.slack_id as slack_id,
user.is_active as is_active, user.is_active as is_active,
REDUCE(sum_r = 0, r in COLLECT(DISTINCT read)| sum_r + r.read_count) AS total_read, REDUCE(sum_r = 0, r in COLLECT(DISTINCT read)| sum_r + r.read_count) AS total_read,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment