diff --git a/csv_to_json.py b/csv_to_json.py new file mode 100644 index 0000000..016112b --- /dev/null +++ b/csv_to_json.py @@ -0,0 +1,99 @@ +import json, sys, os, csv + +infile = "" +outfile = "" +separator = "," +quote = '"' + +nofirstrow = False + +EXIT_SUCCESS=0 +EXIT_USAGE=1 +EXIT_RUNTIME=2 + +if len(sys.argv)<2 or len(sys.argv)>3: + sys.stderr.write( +"Usage: "+sys.argv[0]+""" INFILE [OUTFILE] + + INFILE: input file, CSV + OUTFILE (optional): output file, JSON + +Environment Variables: + + SEPARATOR: field delimiter, default comma + QUOTE: used to quote strings containing SEPARATOR, default '"' + NOFIRSTROW: true or false, disables using the first row as table headers, + default false +""" + ) + sys.stderr.flush() + sys.exit(EXIT_USAGE) + +if os.path.isfile(sys.argv[1]): + infile = sys.argv[1] +else: + print("Not a file: "+sys.argv[1], file=sys.stderr) + sys.exit(EXIT_RUNTIME) + +# manually specified name +if len(sys.argv)>2: + # check if parent directory of specified file exists + if os.path.isdir(os.path.abspath(os.path.join(sys.argv[2], os.pardir))): + outfile = sys.argv[2] + else: + print("Missing parent dir for: "+sys.argv[2], file=sys.stderr) + sys.exit(EXIT_RUNTIME) + +# attempt to automatically determine name +else: + if len(sys.argv[1])>4 and sys.argv[1][-4:]==".csv": + outfile = sys.argv[1][:-4]+".json" + else: + print("Cannot automatically determine destination file name for: "+sys.argv[1], file=sys.stderr) + sys.exit(EXIT_RUNTIME) + if os.path.exists(outfile): + print("Destination file exists, refusing to operate: "+outfile, file=sys.stderr) + print("To overwrite, explicitly specify destination filename on the command line.") + sys.exit(EXIT_RUNTIME) + +separator = os.environ.get("SEPARATOR", ",") +if len(separator)>1: + print("Warning: Separator from environment appears to be longer than one character. This might cause issues.") + +quote = os.environ.get("QUOTE", '"') +if len(quote)>1: + print("Warning: Quote from environment appears to be longer than one character. This might cause issues.") + +if os.environ.get("NOFIRSTROW", "false").lower() == "false": + nofirstrow = False +elif os.environ["NOFIRSTROW"].lower() == "true": + nofirstrow = True +else: + print("Environment variable NOFIRSTROW is set but neither true nor false.", file=sys.stderr) + sys.exit(EXIT_USAGE) + +infileobject = open(infile, "r") +csvreader = csv.reader(infileobject, delimiter=separator, quotechar=quote) + +columns = [] +rows = [] +for row in csvreader: + if columns == []: + # assume we are reading the first row + if nofirstrow: + columns = list(range(len(row))) + else: + columns = row + continue + + rows = rows + [{}] + + i = 0 + while i