From 0928d90472b1d338efe2158527d083f039482093 Mon Sep 17 00:00:00 2001 From: BodgeMaster <> Date: Sat, 11 Dec 2021 08:01:13 +0100 Subject: [PATCH] added lookup function --- filetags.py | 109 +++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 99 insertions(+), 10 deletions(-) diff --git a/filetags.py b/filetags.py index 9676464..a6a5f7c 100644 --- a/filetags.py +++ b/filetags.py @@ -90,6 +90,49 @@ def add_tag(storage_directory, file_hash, tag): db.close() return not tag_already_present +def get_tags_by_hash(storage_directory, file_hash): + db = sqlite3.connect(os.path.join(storage_directory, "container.sqlite")) + cursor = db.cursor() + + tags = None + cursor.execute("SELECT id FROM hashes WHERE hash=?", (file_hash,)) + row = cursor.fetchone() + if not row == None: + internal_id = row[0] + cursor.execute("SELECT tag FROM tags WHERE id=?", (internal_id,)) + rows = cursor.fetchall() + tags = [] + for row in rows: + tags = tags+[row[0]] + + db.close() + return tags + +def get_hashes_by_tag(storage_directory, tag): + db = sqlite3.connect(os.path.join(storage_directory, "container.sqlite")) + cursor = db.cursor() + + cursor.execute("SELECT id FROM tags WHERE tag=?", (tag,)) + rows = cursor.fetchall() + internal_ids = [] + for row in rows: + internal_ids = internal_ids+[row[0]] + + file_hashes = [] + for internal_id in internal_ids: + cursor.execute("SELECT hash FROM hashes WHERE id=?", (internal_id,)) + row = cursor.fetchone() + file_hashes = file_hashes+[row[0]] + + return file_hashes + +def file_is_in_storage(storage_directory, file_hash_or_path, compress): + suffix="" + if compress: + suffix=".xz" + #that last part is needed bc os.path.join automatically discards everything before the last element containing an absolute path so if file_hash_or_path is an absolute path that would be what join gives back + return os.path.isfile(os.path.join(storage_directory, "objects", file_hash_or_path+suffix)) and not os.path.isfile(file_hash_or_path) + def load_container_settings(storage_directory): if not os.path.isfile(os.path.join(storage_directory, "container.sqlite")): return (PATH_ERROR, None, None, None, None) @@ -115,6 +158,7 @@ def load_container_settings(storage_directory): db.close() # check storage container against settings + #TODO: check compression ? if parity and not os.path.isdir(os.path.join(storage_directory, "parity")): return (GENERAL_ERROR, None, None, checksum_algorithm, compress) return (0, parity, parity_bytes, checksum_algorithm, compress) @@ -135,7 +179,7 @@ if __name__ == "__main__": """+sys.argv[0]+""" search - return paths and tags of all found files """+sys.argv[0]+""" search+first - return hash and tags of first found file """+sys.argv[0]+""" search+unique - return hash and tags of the found file, return error if not found or multiple found - """+sys.argv[0]+""" lookup - return paths and tags of all found files + """+sys.argv[0]+""" lookup - return hash and tags of all found files """+sys.argv[0]+""" lookup+first - return hash and tags of first found file """+sys.argv[0]+""" lookup+unique - return hash and tags of the found file, return error if not found or multiple found """+sys.argv[0]+""" link - add a symlink in that points to the referenced file @@ -158,7 +202,7 @@ if __name__ == "__main__": try: command = sys.argv[1].split("+") except IndexError: - print("No command specified.", file=sys.stderr) + print("No subcommand specified.", file=sys.stderr) print(USAGE, file=sys.stderr) sys.exit(USAGE_ERROR) if not command[0] in VALID_COMMANDS: @@ -237,7 +281,7 @@ if __name__ == "__main__": # file - requires a file path; adds a new file (or if file already in storage adds tags to that file), checks for collisions by comparing file size # move - requires file modifier; moves the file to the storage dir instead of copying it if command[0] == "add": - if not len(sys.argv)>5: + if len(sys.argv)<5: print("Too few arguments!", file=sys.stderr) print(USAGE, file=sys.stderr) sys.exit(USAGE_ERROR) @@ -251,28 +295,24 @@ if __name__ == "__main__": if status==GENERAL_ERROR: print("Verifying container settings failed.", file=sys.stderr) sys.exit(status) - suffix="" - if compress: - suffix=".xz" file_hash_or_path = sys.argv[3] hash_allowed=True if 'file' in command: hash_allowed=False - if not any([hash_allowed and os.path.isfile(os.path.join(storage_directory, "objects", file_hash_or_path)), os.path.isfile(file_hash_or_path)]): + if not any([hash_allowed and file_is_in_storage(storage_directory, file_hash_or_path, compress), os.path.isfile(file_hash_or_path)]): print("Unknown file!", file=sys.stderr) print(USAGE, file=sys.stderr) sys.exit(PATH_ERROR) tags = sys.argv[4:] - if hash_allowed and os.path.isfile(os.path.join(storage_directory, "objects", file_hash_or_path+suffix)) and not os.path.isfile(file_hash_or_path): - #that last part is needed bc os.path.join automatically discards everything before the last element containing an absolute path so if file_hash_or_path is an absolute path that would be what join gives back + if hash_allowed and file_is_in_storage(storage_directory, file_hash_or_path, compress): file_hash = file_hash_or_path print("File already in storage.") else: file_hash = calculate_file_hash(checksum_algorithm, file_hash_or_path) - if os.path.isfile(os.path.join(storage_directory, "objects", file_hash+suffix)): + if file_is_in_storage(storage_directory, file_hash_or_path, compress): print("File already in storage.") #this assumes that the storage directory has not been tampered with or corrupted, FIXME! if 'move' in command: @@ -288,6 +328,7 @@ if __name__ == "__main__": if parity: gegerate_parity_file(os.path.join(storage_directory, "objects", file_hash), parity_bytes, os.path.join(storage_directory, "parity", file_hash)) if compress: + print("Compressing...") xz_process = os.popen("xz --best -T0 "+os.path.join(storage_directory, "objects", file_hash)) if parity: xz_process = os.popen("xz --best -T0 "+os.path.join(storage_directory, "parity", file_hash)) @@ -299,5 +340,53 @@ if __name__ == "__main__": else: print("Tag already present.") + # lookup subcommand: return hash and tags of found files + # arguments: + # modifiers: + # first - only return one file + # unique - return error if not found or multiple found + if command[0] == "lookup": + if len(sys.argv)<4: + print("Too few arguments!", file=sys.stderr) + print(USAGE, file=sys.stderr) + sys.exit(USAGE_ERROR) + + storage_directory = sys.argv[2] + status, parity, parity_bytes, checksum_algorithm, compress = load_container_settings(storage_directory) + if not status==0: + if status==PATH_ERROR: + print("Invalid storage directory!", file=sys.stderr) + print(USAGE, file=sys.stderr) + if status==GENERAL_ERROR: + print("Verifying container settings failed.", file=sys.stderr) + sys.exit(status) + + file_tags_or_hash = sys.argv[3:] + #TODO: basically everything + if file_is_in_storage(storage_directory, file_tags_or_hash[0], compress): + tags = get_tags_by_hash(storage_directory, file_tags_or_hash[0]) + print("Tags for file:") + for tag in tags: + print(tag) + + # create a two dimensional array of all the files associated with each individual tag + file_hash_lists = [] + for tag in file_tags_or_hash: + file_hash_lists = get_hashes_by_tag(storage_directory, tag) + # take the first of the arrays in the two dimensional array + common_file_hashes = file_hash_lists[0] + # iterate over the two dimensional array + for file_hash_list in file_hash_lists: + # check each element in common_file_hashes to ensure it is also in all other arrays in the two dimensional array, remove if it isn’t + for file_hash in common_file_hashes: + if not file_hash in file_hash_list: + common_file_hashes.remove(file_hash) + + if not common_file_hashes == []: + print("Files for tag(s):") + for file_hash in common_file_hashes: + print(file_hash) + + # this line is here to work around a bug in Xed