Compare commits
	
		
			3 Commits 
		
	
	
		
			b5cf010314
			...
			ebabc961fb
		
	
	| Author | SHA1 | Date | 
|---|---|---|
|  BodgeMaster | ebabc961fb | |
|  BodgeMaster | 86ae33a18d | |
|  BodgeMaster | 0928d90472 | 
							
								
								
									
										108
									
								
								filetags.py
								
								
								
								
							
							
						
						
									
										108
									
								
								filetags.py
								
								
								
								
							|  | @ -90,6 +90,49 @@ def add_tag(storage_directory, file_hash, tag): | |||
|   db.close() | ||||
|   return not tag_already_present | ||||
| 
 | ||||
| def get_tags_by_hash(storage_directory, file_hash): | ||||
|   db = sqlite3.connect(os.path.join(storage_directory, "container.sqlite")) | ||||
|   cursor = db.cursor() | ||||
| 
 | ||||
|   tags = None | ||||
|   cursor.execute("SELECT id FROM hashes WHERE hash=?", (file_hash,)) | ||||
|   row = cursor.fetchone() | ||||
|   if not row == None: | ||||
|     internal_id = row[0] | ||||
|     cursor.execute("SELECT tag FROM tags WHERE id=?", (internal_id,)) | ||||
|     rows = cursor.fetchall() | ||||
|     tags = [] | ||||
|     for row in rows: | ||||
|       tags = tags+[row[0]] | ||||
| 
 | ||||
|   db.close() | ||||
|   return tags | ||||
| 
 | ||||
| def get_hashes_by_tag(storage_directory, tag): | ||||
|   db = sqlite3.connect(os.path.join(storage_directory, "container.sqlite")) | ||||
|   cursor = db.cursor() | ||||
| 
 | ||||
|   cursor.execute("SELECT id FROM tags WHERE tag=?", (tag,)) | ||||
|   rows = cursor.fetchall() | ||||
|   internal_ids = [] | ||||
|   for row in rows: | ||||
|     internal_ids = internal_ids+[row[0]] | ||||
| 
 | ||||
|   file_hashes = [] | ||||
|   for internal_id in internal_ids: | ||||
|     cursor.execute("SELECT hash FROM hashes WHERE id=?", (internal_id,)) | ||||
|     row = cursor.fetchone() | ||||
|     file_hashes = file_hashes+[row[0]] | ||||
| 
 | ||||
|   return file_hashes | ||||
| 
 | ||||
| def file_is_in_storage(storage_directory, file_hash_or_path, compress): | ||||
|   suffix="" | ||||
|   if compress: | ||||
|     suffix=".xz" | ||||
|   #that last part is needed bc os.path.join automatically discards everything before the last element containing an absolute path so if file_hash_or_path is an absolute path that would be what join gives back | ||||
|   return os.path.isfile(os.path.join(storage_directory, "objects", file_hash_or_path+suffix)) and not os.path.isfile(file_hash_or_path) | ||||
| 
 | ||||
| def load_container_settings(storage_directory): | ||||
|   if not os.path.isfile(os.path.join(storage_directory, "container.sqlite")): | ||||
|     return (PATH_ERROR, None, None, None, None) | ||||
|  | @ -115,6 +158,7 @@ def load_container_settings(storage_directory): | |||
|   db.close() | ||||
| 
 | ||||
|   # check storage container against settings | ||||
|   #TODO: check compression ? | ||||
|   if parity and not os.path.isdir(os.path.join(storage_directory, "parity")): | ||||
|     return (GENERAL_ERROR, None, None, checksum_algorithm, compress) | ||||
|   return (0, parity, parity_bytes, checksum_algorithm, compress) | ||||
|  | @ -135,7 +179,7 @@ if __name__ == "__main__": | |||
|   """+sys.argv[0]+""" search <storage directory> <tags or partial tags> - return paths and tags of all found files | ||||
|   """+sys.argv[0]+""" search+first <storage directory> <tags or partial tags> - return hash and tags of first found file | ||||
|   """+sys.argv[0]+""" search+unique <storage directory> <tags or partial tags> - return hash and tags of the found file, return error if not found or multiple found | ||||
|   """+sys.argv[0]+""" lookup <storage directory> <hash|exact tag|set of exact tags> - return paths and tags of all found files | ||||
|   """+sys.argv[0]+""" lookup <storage directory> <hash|exact tag|set of exact tags> - return hash and tags of all found files | ||||
|   """+sys.argv[0]+""" lookup+first <storage directory> <hash|exact tag|set of exact tags> - return hash and tags of first found file | ||||
|   """+sys.argv[0]+""" lookup+unique <storage directory> <hash|exact tag|set of exact tags> - return hash and tags of the found file, return error if not found or multiple found | ||||
|   """+sys.argv[0]+""" link <storage directory> <hash> <location> - add a symlink in <location> that points to the referenced file | ||||
|  | @ -150,6 +194,7 @@ if __name__ == "__main__": | |||
|   """+sys.argv[0]+""" help - display this message | ||||
|   """ | ||||
|   #TODO: +path modifier for things that return a hash to return the path to the stored file instead | ||||
|   #TODO: +hash and +tags modifier for lookup | ||||
|   #TODO: condense modifiers onto the same lines as the main subcommand where possible | ||||
|   #TODO: clarification of <> and [] | ||||
|   #TODO: subcommand to change container settings | ||||
|  | @ -158,7 +203,7 @@ if __name__ == "__main__": | |||
|   try: | ||||
|     command = sys.argv[1].split("+") | ||||
|   except IndexError: | ||||
|     print("No command specified.", file=sys.stderr) | ||||
|     print("No subcommand specified.", file=sys.stderr) | ||||
|     print(USAGE, file=sys.stderr) | ||||
|     sys.exit(USAGE_ERROR) | ||||
|   if not command[0] in VALID_COMMANDS: | ||||
|  | @ -237,7 +282,7 @@ if __name__ == "__main__": | |||
|   #  file - requires a file path; adds a new file (or if file already in storage adds tags to that file), checks for collisions by comparing file size | ||||
|   #  move - requires file modifier; moves the file to the storage dir instead of copying it | ||||
|   if command[0] == "add": | ||||
|     if not len(sys.argv)>5: | ||||
|     if len(sys.argv)<5: | ||||
|       print("Too few arguments!", file=sys.stderr) | ||||
|       print(USAGE, file=sys.stderr) | ||||
|       sys.exit(USAGE_ERROR) | ||||
|  | @ -251,28 +296,24 @@ if __name__ == "__main__": | |||
|       if status==GENERAL_ERROR: | ||||
|         print("Verifying container settings failed.", file=sys.stderr) | ||||
|       sys.exit(status) | ||||
|     suffix="" | ||||
|     if compress: | ||||
|       suffix=".xz" | ||||
| 
 | ||||
|     file_hash_or_path = sys.argv[3] | ||||
|     hash_allowed=True | ||||
|     if 'file' in command: | ||||
|       hash_allowed=False | ||||
|     if not any([hash_allowed and os.path.isfile(os.path.join(storage_directory, "objects", file_hash_or_path)), os.path.isfile(file_hash_or_path)]): | ||||
|     if not any([hash_allowed and file_is_in_storage(storage_directory, file_hash_or_path, compress), os.path.isfile(file_hash_or_path)]): | ||||
|       print("Unknown file!", file=sys.stderr) | ||||
|       print(USAGE, file=sys.stderr) | ||||
|       sys.exit(PATH_ERROR) | ||||
| 
 | ||||
|     tags = sys.argv[4:] | ||||
| 
 | ||||
|     if hash_allowed and os.path.isfile(os.path.join(storage_directory, "objects", file_hash_or_path+suffix)) and not os.path.isfile(file_hash_or_path): | ||||
|       #that last part is needed bc os.path.join automatically discards everything before the last element containing an absolute path so if file_hash_or_path is an absolute path that would be what join gives back | ||||
|     if hash_allowed and file_is_in_storage(storage_directory, file_hash_or_path, compress): | ||||
|       file_hash = file_hash_or_path | ||||
|       print("File already in storage.") | ||||
|     else: | ||||
|       file_hash = calculate_file_hash(checksum_algorithm, file_hash_or_path) | ||||
|       if os.path.isfile(os.path.join(storage_directory, "objects", file_hash+suffix)): | ||||
|       if file_is_in_storage(storage_directory, file_hash_or_path, compress): | ||||
|         print("File already in storage.") | ||||
|         #this assumes that the storage directory has not been tampered with or corrupted, FIXME! | ||||
|         if 'move' in command: | ||||
|  | @ -288,6 +329,7 @@ if __name__ == "__main__": | |||
|         if parity: | ||||
|           gegerate_parity_file(os.path.join(storage_directory, "objects", file_hash), parity_bytes, os.path.join(storage_directory, "parity", file_hash)) | ||||
|         if compress: | ||||
|           print("Compressing...") | ||||
|           xz_process = os.popen("xz --best -T0 "+os.path.join(storage_directory, "objects", file_hash)) | ||||
|           if parity: | ||||
|             xz_process = os.popen("xz --best -T0 "+os.path.join(storage_directory, "parity", file_hash)) | ||||
|  | @ -299,5 +341,51 @@ if __name__ == "__main__": | |||
|       else: | ||||
|         print("Tag already present.") | ||||
| 
 | ||||
|   # lookup subcommand: return hash and tags of found files | ||||
|   # arguments: <storage directory> <hash|exact tag|set of exact tags> | ||||
|   # modifiers: | ||||
|   #  first - only return one file | ||||
|   #  unique - return error if not found or multiple found | ||||
|   #TODO: modifiers | ||||
|   if command[0] == "lookup": | ||||
|     if len(sys.argv)<4: | ||||
|       print("Too few arguments!", file=sys.stderr) | ||||
|       print(USAGE, file=sys.stderr) | ||||
|       sys.exit(USAGE_ERROR) | ||||
| 
 | ||||
|     storage_directory = sys.argv[2] | ||||
|     status, parity, parity_bytes, checksum_algorithm, compress = load_container_settings(storage_directory) | ||||
|     if not status==0: | ||||
|       if status==PATH_ERROR: | ||||
|         print("Invalid storage directory!", file=sys.stderr) | ||||
|         print(USAGE, file=sys.stderr) | ||||
|       if status==GENERAL_ERROR: | ||||
|         print("Verifying container settings failed.", file=sys.stderr) | ||||
|       sys.exit(status) | ||||
| 
 | ||||
|     file_tags_or_hash = sys.argv[3:] | ||||
|     if file_is_in_storage(storage_directory, file_tags_or_hash[0], compress): | ||||
|       tags = get_tags_by_hash(storage_directory, file_tags_or_hash[0]) | ||||
|       print("Tags for file:") | ||||
|       print(tags) | ||||
| 
 | ||||
|     # create a two dimensional array of all the files associated with each individual tag | ||||
|     file_hash_lists = [] | ||||
|     for tag in file_tags_or_hash: | ||||
|       file_hash_lists = file_hash_lists + [get_hashes_by_tag(storage_directory, tag)] | ||||
|     # take the first of the arrays in the two dimensional array | ||||
|     common_file_hashes = file_hash_lists[0] | ||||
|     # iterate over the two dimensional array | ||||
|     for file_hash_list in file_hash_lists: | ||||
|       # check each element in common_file_hashes to ensure it is also in all other arrays in the two dimensional array, remove if it isn’t | ||||
|       for file_hash in common_file_hashes: | ||||
|         if not file_hash in file_hash_list: | ||||
|           common_file_hashes.remove(file_hash) | ||||
| 
 | ||||
|     if not common_file_hashes == []: | ||||
|       print("Files for tag(s):") | ||||
|       print(common_file_hashes) | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| # this line is here to work around a bug in Xed | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue