RandomUsefulStuff/csv_to_json.py

103 lines
2.7 KiB
Python
Raw Permalink Normal View History

#!/usr/bin/env python3
import json, sys, os, csv
infile = ""
outfile = ""
separator = ","
quote = '"'
nofirstrow = False
EXIT_SUCCESS=0
EXIT_USAGE=1
EXIT_RUNTIME=2
if len(sys.argv)<2 or len(sys.argv)>3:
sys.stderr.write(
"Usage: "+sys.argv[0]+""" INFILE [OUTFILE]
INFILE: input file, CSV
OUTFILE (optional): output file, JSON
Environment Variables:
SEPARATOR: field delimiter, default comma
QUOTE: used to quote strings containing SEPARATOR, default '"'
NOFIRSTROW: true or false, disables using the first row as table headers,
default false
"""
)
sys.stderr.flush()
sys.exit(EXIT_USAGE)
if os.path.isfile(sys.argv[1]):
infile = sys.argv[1]
else:
print("Not a file: "+sys.argv[1], file=sys.stderr)
sys.exit(EXIT_RUNTIME)
# manually specified name
if len(sys.argv)>2:
# check if parent directory of specified file exists
if os.path.isdir(os.path.abspath(os.path.join(sys.argv[2], os.pardir))):
outfile = sys.argv[2]
else:
print("Missing parent dir for: "+sys.argv[2], file=sys.stderr)
sys.exit(EXIT_RUNTIME)
# attempt to automatically determine name
else:
if len(sys.argv[1])>4 and sys.argv[1][-4:]==".csv":
outfile = sys.argv[1][:-4]+".json"
else:
print("Cannot automatically determine destination file name for: "+sys.argv[1], file=sys.stderr)
sys.exit(EXIT_RUNTIME)
if os.path.exists(outfile):
print("Destination file exists, refusing to operate: "+outfile, file=sys.stderr)
print("To overwrite, explicitly specify destination filename on the command line.")
sys.exit(EXIT_RUNTIME)
separator = os.environ.get("SEPARATOR", ",")
if len(separator)>1:
print("Warning: Separator from environment appears to be longer than one character. This might cause issues.")
quote = os.environ.get("QUOTE", '"')
if len(quote)>1:
print("Warning: Quote from environment appears to be longer than one character. This might cause issues.")
if os.environ.get("NOFIRSTROW", "false").lower() == "false":
nofirstrow = False
elif os.environ["NOFIRSTROW"].lower() == "true":
nofirstrow = True
else:
print("Environment variable NOFIRSTROW is set but neither true nor false.", file=sys.stderr)
sys.exit(EXIT_USAGE)
infileobject = open(infile, "r")
csvreader = csv.reader(infileobject, delimiter=separator, quotechar=quote)
columns = []
rows = []
for row in csvreader:
if columns == []:
# assume we are reading the first row
if nofirstrow:
columns = list(range(len(row)))
else:
columns = row
continue
rows = rows + [{}]
i = 0
while i<len(columns):
rows[-1][columns[i]] = row[i]
i = i+1
infileobject.close()
outfileobject = open(outfile, "w");
outfileobject.write(json.dumps(rows))
outfileobject.close()