Compare commits
3 Commits
b5cf010314
...
ebabc961fb
Author | SHA1 | Date |
---|---|---|
BodgeMaster | ebabc961fb | |
BodgeMaster | 86ae33a18d | |
BodgeMaster | 0928d90472 |
108
filetags.py
108
filetags.py
|
@ -90,6 +90,49 @@ def add_tag(storage_directory, file_hash, tag):
|
||||||
db.close()
|
db.close()
|
||||||
return not tag_already_present
|
return not tag_already_present
|
||||||
|
|
||||||
|
def get_tags_by_hash(storage_directory, file_hash):
|
||||||
|
db = sqlite3.connect(os.path.join(storage_directory, "container.sqlite"))
|
||||||
|
cursor = db.cursor()
|
||||||
|
|
||||||
|
tags = None
|
||||||
|
cursor.execute("SELECT id FROM hashes WHERE hash=?", (file_hash,))
|
||||||
|
row = cursor.fetchone()
|
||||||
|
if not row == None:
|
||||||
|
internal_id = row[0]
|
||||||
|
cursor.execute("SELECT tag FROM tags WHERE id=?", (internal_id,))
|
||||||
|
rows = cursor.fetchall()
|
||||||
|
tags = []
|
||||||
|
for row in rows:
|
||||||
|
tags = tags+[row[0]]
|
||||||
|
|
||||||
|
db.close()
|
||||||
|
return tags
|
||||||
|
|
||||||
|
def get_hashes_by_tag(storage_directory, tag):
|
||||||
|
db = sqlite3.connect(os.path.join(storage_directory, "container.sqlite"))
|
||||||
|
cursor = db.cursor()
|
||||||
|
|
||||||
|
cursor.execute("SELECT id FROM tags WHERE tag=?", (tag,))
|
||||||
|
rows = cursor.fetchall()
|
||||||
|
internal_ids = []
|
||||||
|
for row in rows:
|
||||||
|
internal_ids = internal_ids+[row[0]]
|
||||||
|
|
||||||
|
file_hashes = []
|
||||||
|
for internal_id in internal_ids:
|
||||||
|
cursor.execute("SELECT hash FROM hashes WHERE id=?", (internal_id,))
|
||||||
|
row = cursor.fetchone()
|
||||||
|
file_hashes = file_hashes+[row[0]]
|
||||||
|
|
||||||
|
return file_hashes
|
||||||
|
|
||||||
|
def file_is_in_storage(storage_directory, file_hash_or_path, compress):
|
||||||
|
suffix=""
|
||||||
|
if compress:
|
||||||
|
suffix=".xz"
|
||||||
|
#that last part is needed bc os.path.join automatically discards everything before the last element containing an absolute path so if file_hash_or_path is an absolute path that would be what join gives back
|
||||||
|
return os.path.isfile(os.path.join(storage_directory, "objects", file_hash_or_path+suffix)) and not os.path.isfile(file_hash_or_path)
|
||||||
|
|
||||||
def load_container_settings(storage_directory):
|
def load_container_settings(storage_directory):
|
||||||
if not os.path.isfile(os.path.join(storage_directory, "container.sqlite")):
|
if not os.path.isfile(os.path.join(storage_directory, "container.sqlite")):
|
||||||
return (PATH_ERROR, None, None, None, None)
|
return (PATH_ERROR, None, None, None, None)
|
||||||
|
@ -115,6 +158,7 @@ def load_container_settings(storage_directory):
|
||||||
db.close()
|
db.close()
|
||||||
|
|
||||||
# check storage container against settings
|
# check storage container against settings
|
||||||
|
#TODO: check compression ?
|
||||||
if parity and not os.path.isdir(os.path.join(storage_directory, "parity")):
|
if parity and not os.path.isdir(os.path.join(storage_directory, "parity")):
|
||||||
return (GENERAL_ERROR, None, None, checksum_algorithm, compress)
|
return (GENERAL_ERROR, None, None, checksum_algorithm, compress)
|
||||||
return (0, parity, parity_bytes, checksum_algorithm, compress)
|
return (0, parity, parity_bytes, checksum_algorithm, compress)
|
||||||
|
@ -135,7 +179,7 @@ if __name__ == "__main__":
|
||||||
"""+sys.argv[0]+""" search <storage directory> <tags or partial tags> - return paths and tags of all found files
|
"""+sys.argv[0]+""" search <storage directory> <tags or partial tags> - return paths and tags of all found files
|
||||||
"""+sys.argv[0]+""" search+first <storage directory> <tags or partial tags> - return hash and tags of first found file
|
"""+sys.argv[0]+""" search+first <storage directory> <tags or partial tags> - return hash and tags of first found file
|
||||||
"""+sys.argv[0]+""" search+unique <storage directory> <tags or partial tags> - return hash and tags of the found file, return error if not found or multiple found
|
"""+sys.argv[0]+""" search+unique <storage directory> <tags or partial tags> - return hash and tags of the found file, return error if not found or multiple found
|
||||||
"""+sys.argv[0]+""" lookup <storage directory> <hash|exact tag|set of exact tags> - return paths and tags of all found files
|
"""+sys.argv[0]+""" lookup <storage directory> <hash|exact tag|set of exact tags> - return hash and tags of all found files
|
||||||
"""+sys.argv[0]+""" lookup+first <storage directory> <hash|exact tag|set of exact tags> - return hash and tags of first found file
|
"""+sys.argv[0]+""" lookup+first <storage directory> <hash|exact tag|set of exact tags> - return hash and tags of first found file
|
||||||
"""+sys.argv[0]+""" lookup+unique <storage directory> <hash|exact tag|set of exact tags> - return hash and tags of the found file, return error if not found or multiple found
|
"""+sys.argv[0]+""" lookup+unique <storage directory> <hash|exact tag|set of exact tags> - return hash and tags of the found file, return error if not found or multiple found
|
||||||
"""+sys.argv[0]+""" link <storage directory> <hash> <location> - add a symlink in <location> that points to the referenced file
|
"""+sys.argv[0]+""" link <storage directory> <hash> <location> - add a symlink in <location> that points to the referenced file
|
||||||
|
@ -150,6 +194,7 @@ if __name__ == "__main__":
|
||||||
"""+sys.argv[0]+""" help - display this message
|
"""+sys.argv[0]+""" help - display this message
|
||||||
"""
|
"""
|
||||||
#TODO: +path modifier for things that return a hash to return the path to the stored file instead
|
#TODO: +path modifier for things that return a hash to return the path to the stored file instead
|
||||||
|
#TODO: +hash and +tags modifier for lookup
|
||||||
#TODO: condense modifiers onto the same lines as the main subcommand where possible
|
#TODO: condense modifiers onto the same lines as the main subcommand where possible
|
||||||
#TODO: clarification of <> and []
|
#TODO: clarification of <> and []
|
||||||
#TODO: subcommand to change container settings
|
#TODO: subcommand to change container settings
|
||||||
|
@ -158,7 +203,7 @@ if __name__ == "__main__":
|
||||||
try:
|
try:
|
||||||
command = sys.argv[1].split("+")
|
command = sys.argv[1].split("+")
|
||||||
except IndexError:
|
except IndexError:
|
||||||
print("No command specified.", file=sys.stderr)
|
print("No subcommand specified.", file=sys.stderr)
|
||||||
print(USAGE, file=sys.stderr)
|
print(USAGE, file=sys.stderr)
|
||||||
sys.exit(USAGE_ERROR)
|
sys.exit(USAGE_ERROR)
|
||||||
if not command[0] in VALID_COMMANDS:
|
if not command[0] in VALID_COMMANDS:
|
||||||
|
@ -237,7 +282,7 @@ if __name__ == "__main__":
|
||||||
# file - requires a file path; adds a new file (or if file already in storage adds tags to that file), checks for collisions by comparing file size
|
# file - requires a file path; adds a new file (or if file already in storage adds tags to that file), checks for collisions by comparing file size
|
||||||
# move - requires file modifier; moves the file to the storage dir instead of copying it
|
# move - requires file modifier; moves the file to the storage dir instead of copying it
|
||||||
if command[0] == "add":
|
if command[0] == "add":
|
||||||
if not len(sys.argv)>5:
|
if len(sys.argv)<5:
|
||||||
print("Too few arguments!", file=sys.stderr)
|
print("Too few arguments!", file=sys.stderr)
|
||||||
print(USAGE, file=sys.stderr)
|
print(USAGE, file=sys.stderr)
|
||||||
sys.exit(USAGE_ERROR)
|
sys.exit(USAGE_ERROR)
|
||||||
|
@ -251,28 +296,24 @@ if __name__ == "__main__":
|
||||||
if status==GENERAL_ERROR:
|
if status==GENERAL_ERROR:
|
||||||
print("Verifying container settings failed.", file=sys.stderr)
|
print("Verifying container settings failed.", file=sys.stderr)
|
||||||
sys.exit(status)
|
sys.exit(status)
|
||||||
suffix=""
|
|
||||||
if compress:
|
|
||||||
suffix=".xz"
|
|
||||||
|
|
||||||
file_hash_or_path = sys.argv[3]
|
file_hash_or_path = sys.argv[3]
|
||||||
hash_allowed=True
|
hash_allowed=True
|
||||||
if 'file' in command:
|
if 'file' in command:
|
||||||
hash_allowed=False
|
hash_allowed=False
|
||||||
if not any([hash_allowed and os.path.isfile(os.path.join(storage_directory, "objects", file_hash_or_path)), os.path.isfile(file_hash_or_path)]):
|
if not any([hash_allowed and file_is_in_storage(storage_directory, file_hash_or_path, compress), os.path.isfile(file_hash_or_path)]):
|
||||||
print("Unknown file!", file=sys.stderr)
|
print("Unknown file!", file=sys.stderr)
|
||||||
print(USAGE, file=sys.stderr)
|
print(USAGE, file=sys.stderr)
|
||||||
sys.exit(PATH_ERROR)
|
sys.exit(PATH_ERROR)
|
||||||
|
|
||||||
tags = sys.argv[4:]
|
tags = sys.argv[4:]
|
||||||
|
|
||||||
if hash_allowed and os.path.isfile(os.path.join(storage_directory, "objects", file_hash_or_path+suffix)) and not os.path.isfile(file_hash_or_path):
|
if hash_allowed and file_is_in_storage(storage_directory, file_hash_or_path, compress):
|
||||||
#that last part is needed bc os.path.join automatically discards everything before the last element containing an absolute path so if file_hash_or_path is an absolute path that would be what join gives back
|
|
||||||
file_hash = file_hash_or_path
|
file_hash = file_hash_or_path
|
||||||
print("File already in storage.")
|
print("File already in storage.")
|
||||||
else:
|
else:
|
||||||
file_hash = calculate_file_hash(checksum_algorithm, file_hash_or_path)
|
file_hash = calculate_file_hash(checksum_algorithm, file_hash_or_path)
|
||||||
if os.path.isfile(os.path.join(storage_directory, "objects", file_hash+suffix)):
|
if file_is_in_storage(storage_directory, file_hash_or_path, compress):
|
||||||
print("File already in storage.")
|
print("File already in storage.")
|
||||||
#this assumes that the storage directory has not been tampered with or corrupted, FIXME!
|
#this assumes that the storage directory has not been tampered with or corrupted, FIXME!
|
||||||
if 'move' in command:
|
if 'move' in command:
|
||||||
|
@ -288,6 +329,7 @@ if __name__ == "__main__":
|
||||||
if parity:
|
if parity:
|
||||||
gegerate_parity_file(os.path.join(storage_directory, "objects", file_hash), parity_bytes, os.path.join(storage_directory, "parity", file_hash))
|
gegerate_parity_file(os.path.join(storage_directory, "objects", file_hash), parity_bytes, os.path.join(storage_directory, "parity", file_hash))
|
||||||
if compress:
|
if compress:
|
||||||
|
print("Compressing...")
|
||||||
xz_process = os.popen("xz --best -T0 "+os.path.join(storage_directory, "objects", file_hash))
|
xz_process = os.popen("xz --best -T0 "+os.path.join(storage_directory, "objects", file_hash))
|
||||||
if parity:
|
if parity:
|
||||||
xz_process = os.popen("xz --best -T0 "+os.path.join(storage_directory, "parity", file_hash))
|
xz_process = os.popen("xz --best -T0 "+os.path.join(storage_directory, "parity", file_hash))
|
||||||
|
@ -299,5 +341,51 @@ if __name__ == "__main__":
|
||||||
else:
|
else:
|
||||||
print("Tag already present.")
|
print("Tag already present.")
|
||||||
|
|
||||||
|
# lookup subcommand: return hash and tags of found files
|
||||||
|
# arguments: <storage directory> <hash|exact tag|set of exact tags>
|
||||||
|
# modifiers:
|
||||||
|
# first - only return one file
|
||||||
|
# unique - return error if not found or multiple found
|
||||||
|
#TODO: modifiers
|
||||||
|
if command[0] == "lookup":
|
||||||
|
if len(sys.argv)<4:
|
||||||
|
print("Too few arguments!", file=sys.stderr)
|
||||||
|
print(USAGE, file=sys.stderr)
|
||||||
|
sys.exit(USAGE_ERROR)
|
||||||
|
|
||||||
|
storage_directory = sys.argv[2]
|
||||||
|
status, parity, parity_bytes, checksum_algorithm, compress = load_container_settings(storage_directory)
|
||||||
|
if not status==0:
|
||||||
|
if status==PATH_ERROR:
|
||||||
|
print("Invalid storage directory!", file=sys.stderr)
|
||||||
|
print(USAGE, file=sys.stderr)
|
||||||
|
if status==GENERAL_ERROR:
|
||||||
|
print("Verifying container settings failed.", file=sys.stderr)
|
||||||
|
sys.exit(status)
|
||||||
|
|
||||||
|
file_tags_or_hash = sys.argv[3:]
|
||||||
|
if file_is_in_storage(storage_directory, file_tags_or_hash[0], compress):
|
||||||
|
tags = get_tags_by_hash(storage_directory, file_tags_or_hash[0])
|
||||||
|
print("Tags for file:")
|
||||||
|
print(tags)
|
||||||
|
|
||||||
|
# create a two dimensional array of all the files associated with each individual tag
|
||||||
|
file_hash_lists = []
|
||||||
|
for tag in file_tags_or_hash:
|
||||||
|
file_hash_lists = file_hash_lists + [get_hashes_by_tag(storage_directory, tag)]
|
||||||
|
# take the first of the arrays in the two dimensional array
|
||||||
|
common_file_hashes = file_hash_lists[0]
|
||||||
|
# iterate over the two dimensional array
|
||||||
|
for file_hash_list in file_hash_lists:
|
||||||
|
# check each element in common_file_hashes to ensure it is also in all other arrays in the two dimensional array, remove if it isn’t
|
||||||
|
for file_hash in common_file_hashes:
|
||||||
|
if not file_hash in file_hash_list:
|
||||||
|
common_file_hashes.remove(file_hash)
|
||||||
|
|
||||||
|
if not common_file_hashes == []:
|
||||||
|
print("Files for tag(s):")
|
||||||
|
print(common_file_hashes)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# this line is here to work around a bug in Xed
|
# this line is here to work around a bug in Xed
|
||||||
|
|
Loading…
Reference in New Issue