Commit 0558cf49 authored by Santan Thottempudi's avatar Santan Thottempudi

Initial commit

parents
File added
This diff is collapsed.
gitrepo @ e484aa53
Subproject commit e484aa5316fb4bb36fc2cfef5f7fb936a779c89a
import os
import json
import git
import csv
import pandas as pd
import numpy as np
#Ask for File Location Input
while True:
try:
mychoice = int(input("Please enter select a number choice. Where is the JSON file located: \n 1) Local. 2) GitRepo. : "))
except ValueError:
print('Your input is invalid. Please enter a number.')
if mychoice == 1:
localpath = input("Please enter the absolute file path of directory where JSON file is located. Do not include filename.: ")
filename = input("Please enter the filename including file extension: ")
filepath = (os.path.join(localpath, filename))
print("File path: "+filepath)
try:
print("Opening File.")
myfile = open(filepath, 'r')
print("File Opened!")
print("Reading File.")
try:
filedata = myfile.read()
print("File Read!")
print("Checking if data is valid JSON.")
try:
json.loads(filedata)
print("Data is valid JSON!")
print(filedata)
print("Converting JSON into CSV.")
with open(filepath) as json_file:
json_data = json.load(json_file)
csvfilename = input("Please enter a name for the CSV file:")
fullcsvfilename = csvfilename+".csv"
# now we will open a file for writing
data_file = open(fullcsvfilename, 'w')
# create the csv writer object
csv_writer = csv.writer(data_file)
# Counter variable used for writing
# headers to the CSV file
count = 0
for emp in json_data:
if count == 0:
# Writing headers of CSV file
header = emp.keys()
csv_writer.writerow(header)
count += 1
# Writing data of CSV file
csv_writer.writerow(emp.values())
data_file.close()
# read CSV file
# numcolumns = pd.read_csv(file, nrows=1).select_dtypes("number").columns
results = pd.read_csv(fullcsvfilename)
# numeric_data = results.select_dtypes(include=[np.number])
# string_data = results.select_dtypes(exclude=[np.number])
# count no. of lines
print("Number of lines present:",len(results))
# find the maximum of each numeric column
# maxValues = numeric_data.max()
# print(maxValues)
#find the maximum of each string colums
def output_analysis(file_dir):
df = pd.read_csv(file_dir)
header = list(df.columns.values)
for i in range(0, len(header) - 1):
string_length = df[header[i]].astype(str).map(len)
result = df.loc[string_length.argmax(), header[i]]
if not str(result).replace('.', '', 1).isdigit():
print("{} : {}".format(header[i], result))
else:
print("{} : {}".format(header[i], df[header[i]].max()))
print(output_analysis(fullcsvfilename))
except IOError:
print("The data in "+filename+" is not valid JSON.")
except IOError:
print("Error: "+filename+" cannot be read!")
except IOError:
print("Error: Please enter the correct file path and file name!")
break
elif mychoice == 2:
giturl = input("Please enter the url of the Git Repo: ")
filename = input("Please enter the name of the json file including file extension: ")
repopath = input("Please enter the absolute file path for directory where you want to clone the Git Repo: ")
gitfilepath = (os.path.join(repopath, filename))
print("Repository Path: "+repopath)
try:
print("Cloning Repo.")
repo = git.Repo.clone_from(giturl, repopath)
print("Repo Cloned!")
try:
print("Opening File.")
myfile = open(gitfilepath, 'r')
print("File Opened!")
print("Reading File.")
try:
filedata = myfile.read()
print("File Read!")
print("Checking if data is valid JSON.")
try:
json.loads(filedata)
print("Data is valid JSON!")
print(filedata)
print("Converting JSON into CSV.")
with open(gitfilepath) as json_file:
json_data = json.load(json_file)
csvfilename = input("Please enter a name for the CSV file including :")
fullcsvfilename = csvfilename+".csv"
# now we will open a file for writing
data_file = open(fullcsvfilename, 'w')
# create the csv writer object
csv_writer = csv.writer(data_file)
# Counter variable used for writing
# headers to the CSV file
count = 0
for emp in json_data:
if count == 0:
# Writing headers of CSV file
header = emp.keys()
csv_writer.writerow(header)
count += 1
# Writing data of CSV file
csv_writer.writerow(emp.values())
data_file.close()
# read CSV file
# numcolumns = pd.read_csv(file, nrows=1).select_dtypes("number").columns
results = pd.read_csv(fullcsvfilename)
# count no. of lines
print("Number of lines present:",len(results))
# find the maximum of each column
def output_analysis(file_dir):
df = pd.read_csv(file_dir)
header = list(df.columns.values)
for i in range(0, len(header) - 1):
string_length = df[header[i]].astype(str).map(len)
result = df.loc[string_length.argmax(), header[i]]
if not str(result).replace('.', '', 1).isdigit():
print("{} : {}".format(header[i], result))
else:
print("{} : {}".format(header[i], df[header[i]].max()))
print(output_analysis(fullcsvfilename))
except IOError:
print("Error: The data in "+filename+" is not valid JSON.")
except IOError:
print("Error: "+filename+" cannot be read!")
except IOError:
print("Error: Please enter the correct file path and file name!")
except:
print("Error: URL Incorrect, Repository Does Not Exist, or Repo Cannot Be Accessed!")
break
else:
print("Invalid Choice")
# elif choice != ('A', 'B')
# print("Incorrect Input. Please select 'A' or 'B'.")
# continue
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment