filled in license placeholders, added filetags.py
parent
def44e7f5e
commit
b5cf010314
29
LICENSE
29
LICENSE
|
@ -1,4 +1,4 @@
|
|||
Copyright (c) <year> <owner>. All rights reserved.
|
||||
Copyright (c) 2021 Jan Danielzick (aka BodgeMaster). All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
@ -10,22 +10,15 @@ this list of conditions and the following disclaimer.
|
|||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
3. All advertising materials mentioning features or use of this software must
|
||||
display the following acknowledgement:
|
||||
|
||||
This product includes software developed by the organization .
|
||||
|
||||
4. Neither the name of the copyright holder nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this software without
|
||||
specific prior written permission.
|
||||
3. Don't be a dick.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY COPYRIGHT HOLDER "AS IS" AND ANY EXPRESS OR IMPLIED
|
||||
WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
|
||||
AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL COPYRIGHT
|
||||
HOLDER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
|
||||
OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
|
||||
GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
|
||||
DAMAGE.
|
||||
WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
|
||||
SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
|
||||
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
|
||||
IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
|
||||
OF SUCH DAMAGE.
|
||||
|
|
|
@ -0,0 +1,303 @@
|
|||
#!/usr/bin/python3
|
||||
|
||||
import sys, hashlib, os, sqlite3, shutil
|
||||
|
||||
################################################################################
|
||||
# program wide constants
|
||||
################################################################################
|
||||
|
||||
GENERAL_ERROR=1
|
||||
USAGE_ERROR=2
|
||||
PATH_ERROR=3
|
||||
|
||||
################################################################################
|
||||
# functions
|
||||
################################################################################
|
||||
|
||||
def calculate_file_hash(algorithm, file_path, segment_size=4096):
|
||||
file_descriptor = open(file_path, "rb")
|
||||
hash_object = hashlib.new(algorithm)
|
||||
segment = file_descriptor.read(segment_size)
|
||||
while not segment==b'':
|
||||
hash_object.update(segment)
|
||||
segment = file_descriptor.read(segment_size)
|
||||
return hash_object.hexdigest()
|
||||
|
||||
def calculate_parity(byte_string):
|
||||
parity_byte = 0b00000000
|
||||
for i in range(len(byte_string)):
|
||||
parity_byte = parity_byte ^ byte_string[i]
|
||||
return parity_byte
|
||||
|
||||
def gegerate_parity_file(input_path, parity_bytes, output_path):
|
||||
input_file = open(input_path, "rb")
|
||||
output_file = open(output_path, "wb")
|
||||
|
||||
segment = input_file.read(parity_bytes)
|
||||
while not segment==b'':
|
||||
output_file.write((calculate_parity(segment)).to_bytes(1, byteorder='big'))
|
||||
segment = input_file.read(parity_bytes)
|
||||
|
||||
input_file.close()
|
||||
output_file.close()
|
||||
|
||||
def create_container(storage_directory, parity=False, parity_bytes=512, checksum_algorithm='sha512', compress=False):
|
||||
# prepare storage directory
|
||||
os.makedirs(storage_directory)
|
||||
os.mkdir(os.path.join(storage_directory, "objects"))
|
||||
if (parity):
|
||||
os.mkdir(os.path.join(storage_directory, "parity"))
|
||||
|
||||
db = sqlite3.connect(os.path.join(storage_directory, "container.sqlite"))
|
||||
cursor = db.cursor()
|
||||
# settings
|
||||
cursor.execute("CREATE TABLE settings (option TEXT, value TEXT);")
|
||||
cursor.execute("INSERT INTO settings VALUES ('parity', ?);", (str(parity),))
|
||||
cursor.execute("INSERT INTO settings VALUES ('parity_bytes', ?);", (str(parity_bytes),))
|
||||
cursor.execute("INSERT INTO settings VALUES ('checksum_algorithm', ?);", (checksum_algorithm,))
|
||||
cursor.execute("INSERT INTO settings VALUES ('compress', ?);", (str(compress),))
|
||||
# container
|
||||
cursor.execute("CREATE TABLE hashes (id INTEGER PRIMARY KEY, hash TEXT UNIQUE);") # primary key somehow makes this similar to auto-increment (not actually auto increment but good enough for my use)
|
||||
cursor.execute("CREATE TABLE tags (id INTEGER, tag TEXT);")
|
||||
db.commit()
|
||||
db.close()
|
||||
|
||||
def add_tag(storage_directory, file_hash, tag):
|
||||
db = sqlite3.connect(os.path.join(storage_directory, "container.sqlite"))
|
||||
cursor = db.cursor()
|
||||
|
||||
cursor.execute("SELECT id FROM hashes WHERE hash=?", (file_hash,))
|
||||
row = cursor.fetchone()
|
||||
if row == None:
|
||||
cursor.execute("INSERT INTO hashes (hash) VALUES (?)", (file_hash,))
|
||||
db.commit()
|
||||
cursor.execute("SELECT id FROM hashes WHERE hash=?", (file_hash,))
|
||||
row = cursor.fetchone()
|
||||
internal_id = row[0]
|
||||
|
||||
cursor.execute("SELECT tag FROM tags WHERE id=?", (internal_id,))
|
||||
rows = cursor.fetchall()
|
||||
tags = []
|
||||
for row in rows:
|
||||
tags = tags+[row[0]]
|
||||
|
||||
tag_already_present = True
|
||||
if not tag in tags:
|
||||
tag_already_present = False
|
||||
cursor.execute("INSERT INTO tags (id, tag) VALUES (?, ?)", (internal_id, tag))
|
||||
db.commit()
|
||||
|
||||
db.close()
|
||||
return not tag_already_present
|
||||
|
||||
def load_container_settings(storage_directory):
|
||||
if not os.path.isfile(os.path.join(storage_directory, "container.sqlite")):
|
||||
return (PATH_ERROR, None, None, None, None)
|
||||
|
||||
db = sqlite3.connect(os.path.join(storage_directory, "container.sqlite"))
|
||||
cursor = db.cursor()
|
||||
cursor.execute("SELECT option, value FROM settings")
|
||||
|
||||
# set default values and then read the db, just in case...
|
||||
parity = False
|
||||
parity_bytes = 512
|
||||
checksum_algorithm = "sha512"
|
||||
compress = False
|
||||
for row in cursor.fetchall():
|
||||
if row[0]=="parity":
|
||||
parity = row[1]==str(True)
|
||||
if row[0]=="parity_bytes":
|
||||
parity_bytes = int(row[1])
|
||||
if row[0]=="checksum_algorithm":
|
||||
checksum_algorithm = row[1]
|
||||
if row[0]=="compress":
|
||||
compress = row[1]==str(True)
|
||||
db.close()
|
||||
|
||||
# check storage container against settings
|
||||
if parity and not os.path.isdir(os.path.join(storage_directory, "parity")):
|
||||
return (GENERAL_ERROR, None, None, checksum_algorithm, compress)
|
||||
return (0, parity, parity_bytes, checksum_algorithm, compress)
|
||||
|
||||
################################################################################
|
||||
# main program
|
||||
################################################################################
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
USAGE="""Usage:
|
||||
"""+sys.argv[0]+""" create <storage directory> [parity=<on|off>] [parity-bytes=<number of bytes for each parity byte>] [checksum-algorithm=<algorithm>] [compress=<on|off>] - set up a new storage directory
|
||||
"""+sys.argv[0]+""" add <storage directory> <hash|file> <tags ...> - add tags to a file in the storage, if file is not already in the storage, same as add+file
|
||||
"""+sys.argv[0]+""" add+file <storage directory> <file> <tags ...> - copy a file to the storage and add tags
|
||||
"""+sys.argv[0]+""" add+file+move <storage directory> <file> <tags ...> - move a file to the storage and add tags
|
||||
"""+sys.argv[0]+""" remove <storage directory> <hash|unique tag or tag set> - remove a file from the storage, return error if not found or multiple found
|
||||
"""+sys.argv[0]+""" remove+multi <storage directory> <exact tag or set of exact tags> - remove a ll found files from the storage, return error if not found
|
||||
"""+sys.argv[0]+""" search <storage directory> <tags or partial tags> - return paths and tags of all found files
|
||||
"""+sys.argv[0]+""" search+first <storage directory> <tags or partial tags> - return hash and tags of first found file
|
||||
"""+sys.argv[0]+""" search+unique <storage directory> <tags or partial tags> - return hash and tags of the found file, return error if not found or multiple found
|
||||
"""+sys.argv[0]+""" lookup <storage directory> <hash|exact tag|set of exact tags> - return paths and tags of all found files
|
||||
"""+sys.argv[0]+""" lookup+first <storage directory> <hash|exact tag|set of exact tags> - return hash and tags of first found file
|
||||
"""+sys.argv[0]+""" lookup+unique <storage directory> <hash|exact tag|set of exact tags> - return hash and tags of the found file, return error if not found or multiple found
|
||||
"""+sys.argv[0]+""" link <storage directory> <hash> <location> - add a symlink in <location> that points to the referenced file
|
||||
"""+sys.argv[0]+""" check <storage directory> <hash> - check file contents against hash
|
||||
"""+sys.argv[0]+""" check+parity <storage directory> <hash> - check file contents against hash and parity file
|
||||
"""+sys.argv[0]+""" check+all <storage directory> - check all files against their hashes
|
||||
"""+sys.argv[0]+""" check+all+parity <storage directory> - check all files against their hashes and parity files
|
||||
"""+sys.argv[0]+""" update <storage directory> <hash> - update the hash (and parity if applickable) of the specified file (specify by previous hash)
|
||||
"""+sys.argv[0]+""" update+all <storage directory> - update hashes (and parities if applickable) of all mismatching files
|
||||
"""+sys.argv[0]+""" fix <storage directory> <hash> - attempt to fix the file using parity
|
||||
"""+sys.argv[0]+""" fix+all <storage directory> - attempt to fix all files using parity
|
||||
"""+sys.argv[0]+""" help - display this message
|
||||
"""
|
||||
#TODO: +path modifier for things that return a hash to return the path to the stored file instead
|
||||
#TODO: condense modifiers onto the same lines as the main subcommand where possible
|
||||
#TODO: clarification of <> and []
|
||||
#TODO: subcommand to change container settings
|
||||
VALID_COMMANDS=["create", "add", "remove", "search", "lookup", "link", "check", "update", "fix", "help"]
|
||||
|
||||
try:
|
||||
command = sys.argv[1].split("+")
|
||||
except IndexError:
|
||||
print("No command specified.", file=sys.stderr)
|
||||
print(USAGE, file=sys.stderr)
|
||||
sys.exit(USAGE_ERROR)
|
||||
if not command[0] in VALID_COMMANDS:
|
||||
print("Invalid command: "+command[0], file=sys.stderr)
|
||||
print(USAGE, file=sys.stderr)
|
||||
sys.exit(USAGE_ERROR)
|
||||
|
||||
# help subcommand
|
||||
if command[0] == "help":
|
||||
print(USAGE)
|
||||
sys.exit(0)
|
||||
|
||||
# create subcommand: create a new directory containing a folder for stored objects, one for parity files and one for
|
||||
# arguments: <storage directory> [parity=<on|off>] [parity-bytes=<number of bytes for each parity byte>] [checksum-algorithm=<algorithm>] [compress=<on|off>]
|
||||
if command[0] == "create":
|
||||
if len(sys.argv)<3:
|
||||
print("Too few arguments!", file=sys.stderr)
|
||||
print(USAGE, file=sys.stderr)
|
||||
sys.exit(USAGE_ERROR)
|
||||
|
||||
storage_directory=sys.argv[2]
|
||||
if os.path.exists(storage_directory):
|
||||
print("Target path already exists. Please choose a different location.", file=sys.stderr)
|
||||
sys.exit(GENERAL_ERROR)
|
||||
|
||||
# default options
|
||||
parity = False
|
||||
parity_bytes = 512
|
||||
checksum_algorithm = "sha512"
|
||||
compress = False
|
||||
# check for command line options
|
||||
if len(sys.argv)>3:
|
||||
arguments = sys.argv[3:]
|
||||
for argument in arguments:
|
||||
if not len(argument.split("="))==2:
|
||||
print("Arguments to \"create\" always follow the scheme <option>=<value>.", file=sys.stderr)
|
||||
print(USAGE, file=sys.stderr)
|
||||
sys.exit(USAGE_ERROR)
|
||||
option = argument.split("=")[0]
|
||||
value = argument.split("=")[1]
|
||||
if not option in ["parity", "parity-bytes", "checksum-algorithm", "compress"]:
|
||||
print("Unknown option: "+option, file=sys.stderr)
|
||||
print(USAGE, file=sys.stderr)
|
||||
sys.exit(USAGE_ERROR)
|
||||
if option=="parity":
|
||||
if not value in ["on", "off"]:
|
||||
print("Option \"parity\" accepts either \"on\" or \"off\".", file=sys.stderr)
|
||||
sys.exit(USAGE_ERROR)
|
||||
if value=="on":
|
||||
parity = True
|
||||
if option=="parity-bytes":
|
||||
try:
|
||||
parity_bytes = int(value)
|
||||
except ValueError:
|
||||
print("Option \"parity-bytes\" only accepts integers.", file=sys.stderr)
|
||||
sys.exit(USAGE_ERROR)
|
||||
if option=="checksum-algorithm":
|
||||
if not value in hashlib.algorithms_available:
|
||||
print("Chacksum algorithm \""+value+"\" not available.")
|
||||
sys.exit(USAGE_ERROR)
|
||||
checksum_algorithm = value
|
||||
if option=="compress":
|
||||
if not value in ["on", "off"]:
|
||||
print("Option \"compress\" accepts either \"on\" or \"off\".", file=sys.stderr)
|
||||
sys.exit(USAGE_ERROR)
|
||||
if value=="on":
|
||||
compress = True
|
||||
|
||||
create_container(storage_directory, parity=parity, parity_bytes=parity_bytes, checksum_algorithm=checksum_algorithm, compress=compress)
|
||||
sys.exit(0)
|
||||
|
||||
# add subcommand: add a file to the storage container or add tags to it
|
||||
# arguments:
|
||||
# <storage directory> <hash|file> <tags ...>
|
||||
# modifiers:
|
||||
# file - requires a file path; adds a new file (or if file already in storage adds tags to that file), checks for collisions by comparing file size
|
||||
# move - requires file modifier; moves the file to the storage dir instead of copying it
|
||||
if command[0] == "add":
|
||||
if not len(sys.argv)>5:
|
||||
print("Too few arguments!", file=sys.stderr)
|
||||
print(USAGE, file=sys.stderr)
|
||||
sys.exit(USAGE_ERROR)
|
||||
|
||||
storage_directory = sys.argv[2]
|
||||
status, parity, parity_bytes, checksum_algorithm, compress = load_container_settings(storage_directory)
|
||||
if not status==0:
|
||||
if status==PATH_ERROR:
|
||||
print("Invalid storage directory!", file=sys.stderr)
|
||||
print(USAGE, file=sys.stderr)
|
||||
if status==GENERAL_ERROR:
|
||||
print("Verifying container settings failed.", file=sys.stderr)
|
||||
sys.exit(status)
|
||||
suffix=""
|
||||
if compress:
|
||||
suffix=".xz"
|
||||
|
||||
file_hash_or_path = sys.argv[3]
|
||||
hash_allowed=True
|
||||
if 'file' in command:
|
||||
hash_allowed=False
|
||||
if not any([hash_allowed and os.path.isfile(os.path.join(storage_directory, "objects", file_hash_or_path)), os.path.isfile(file_hash_or_path)]):
|
||||
print("Unknown file!", file=sys.stderr)
|
||||
print(USAGE, file=sys.stderr)
|
||||
sys.exit(PATH_ERROR)
|
||||
|
||||
tags = sys.argv[4:]
|
||||
|
||||
if hash_allowed and os.path.isfile(os.path.join(storage_directory, "objects", file_hash_or_path+suffix)) and not os.path.isfile(file_hash_or_path):
|
||||
#that last part is needed bc os.path.join automatically discards everything before the last element containing an absolute path so if file_hash_or_path is an absolute path that would be what join gives back
|
||||
file_hash = file_hash_or_path
|
||||
print("File already in storage.")
|
||||
else:
|
||||
file_hash = calculate_file_hash(checksum_algorithm, file_hash_or_path)
|
||||
if os.path.isfile(os.path.join(storage_directory, "objects", file_hash+suffix)):
|
||||
print("File already in storage.")
|
||||
#this assumes that the storage directory has not been tampered with or corrupted, FIXME!
|
||||
if 'move' in command:
|
||||
print("Removing external file.")
|
||||
os.remove(file_hash_or_path)
|
||||
else:
|
||||
if 'move' in command:
|
||||
print("Moving file to storage.")
|
||||
shutil.move(file_hash_or_path, os.path.join(storage_directory, "objects", file_hash))
|
||||
else:
|
||||
print("Copying file to storage.")
|
||||
shutil.copyfile(file_hash_or_path, os.path.join(storage_directory, "objects", file_hash))
|
||||
if parity:
|
||||
gegerate_parity_file(os.path.join(storage_directory, "objects", file_hash), parity_bytes, os.path.join(storage_directory, "parity", file_hash))
|
||||
if compress:
|
||||
xz_process = os.popen("xz --best -T0 "+os.path.join(storage_directory, "objects", file_hash))
|
||||
if parity:
|
||||
xz_process = os.popen("xz --best -T0 "+os.path.join(storage_directory, "parity", file_hash))
|
||||
|
||||
for tag in tags:
|
||||
print("Adding tag: "+tag)
|
||||
if add_tag(storage_directory, file_hash, tag):
|
||||
print("Added.")
|
||||
else:
|
||||
print("Tag already present.")
|
||||
|
||||
|
||||
# this line is here to work around a bug in Xed
|
Loading…
Reference in New Issue