#import os import argparse import pathlib import toml from datetime import datetime from irods.session import iRODSSession from irods.column import Criterion from irods.models import DataObject, Collection parser = argparse.ArgumentParser(__file__) parser.add_argument("--confirm", help="auto continue script when a file entry fails validation", action="store_true") args = parser.parse_args() configFile = 'config.toml' fileList = 'upload_files.toml' logFile = str(pathlib.Path(__file__).name.strip('.py') + '.' + datetime.now().isoformat().split(".")[0] + '.log').replace(':','.') exampleConfig = """\ # required connect [connect] iRODS_host = '192.168.150.56' iRODS_port = '1247' iRODS_user = 'rods' iRODS_password = 'password' iRODS_zone = 'OCF' # required files [files] # required at least one entry [files.irods] path = 'C:\PHE_iRODS' extension = [".md", ".exe"] recursive = true collection = '/OCF/home/rods/test' # required for windows_create_date [files.irods.metadata] windows_create_date = true # optional additional metadata, U not required [files.irods.metadata.company] A = 'company' V = 'OCF' U = 'string' [files.irods.metadata.department] A = 'department' V = 'Cloud'\ """ def findConfig(exampleConfig, configFile): if not pathlib.Path(configFile).exists(): with open(configFile,'w') as config: config.write(exampleConfig) print('config missing, wrote example to ' + configFile + ' please modify this config and re-run') quit() else: try: configDict = toml.load(configFile) except: quit('invalid TOML format: ' + configFile) return configDict def parseConfig(config): # check missing entries reqEntry = ['connect', 'files'] for i in reqEntry: try: test = config[i] except: quit('missing config entry: ' + i) # check empty entries for i in reqEntry: if not len(config[i]): quit('empty config entry: ' + i) # check connect entry try: iRODS_host = config['connect']['iRODS_host'] iRODS_port = config['connect']['iRODS_port'] iRODS_user = config['connect']['iRODS_user'] iRODS_password = config['connect']['iRODS_password'] iRODS_zone = config['connect']['iRODS_zone'] except: quit('missing config entry: connect') # check files entries for i in config['files']: # check search and put entries try: filesPath = config['files'][i]['path'] filesExtension = config['files'][i]['extension'] filesRecursive = config['files'][i]['recursive'] filesCollection = config['files'][i]['collection'] except: quit('missing config entry in: files.' + i) # check extension is list if type(filesExtension) is not list: quit('extension is not a list entry in: files.' + i + '.extension') # check if recursive is bool if type(filesRecursive) is not bool: quit('recursive is not a boolean entry in: files.' + i + '.recursive') # check metadata windows_create_date try: files_metadata = config['files'][i]['metadata'] except: quit('missing config entry in: files.' + i + '.metadata') if not len(config['files'][i]['metadata']): quit('empty metadata entry in: files.' + i + '.metadata') try: test = config['files'][i]['metadata']['windows_create_date'] except: quit('missing config entry: files.' + i + '.metadata.windows_create_date') if type(config['files'][i]['metadata']['windows_create_date']) is not bool: quit('windows_create_date is not a boolean entry in metadata') # check metadata contain requisite fields for j in config['files'][i]['metadata']: if j != 'windows_create_date': try: meta_att = config['files'][i]['metadata'][j]['A'] meta_val = config['files'][i]['metadata'][j]['V'] except: quit('missing config entry in: files.' + i + '.metadata.' + j) def findFiles(dir, ext, recursive): files = [] for item in pathlib.Path(dir).iterdir(): if item.is_dir() and recursive: files = files + findFiles(item.absolute(), ext, recursive) elif item.is_file() and item.suffix in ext: files.append(str(item.absolute())) return(files) def createFileList(config, fileList): if not pathlib.Path(fileList).exists(): files = [] with open(fileList, "a") as file_object: output = {} record = 0 for i in config['files']: # collect required config params filesPath = config['files'][i]['path'] filesExtension = config['files'][i]['extension'] filesRecursive = config['files'][i]['recursive'] filesCollection = config['files'][i]['collection'] # check path exists, may want to check collection exists if not pathlib.Path(filesPath).exists(): quit('invalid path entry in: files.' + i + '.path') # find all files that match the rules files = files + findFiles(filesPath,filesExtension,filesRecursive) metas = [] for j in config['files'][i]['metadata']: if j != 'windows_create_date': meta = [] meta.append(config['files'][i]['metadata'][j]['A']) meta.append(config['files'][i]['metadata'][j]['V']) if 'U' in config['files'][i]['metadata'][j]: meta.append(config['files'][i]['metadata'][j]['U']) metas.append(meta) # update file /rule dict for k in files: record += 1 # add windows_create_date metadata if config['files'][i]['metadata']['windows_create_date']: winMeta = [] localFile = pathlib.Path(k) win_epoc = str(localFile.stat().st_ctime).split(".")[0] win_ctime = datetime.fromtimestamp(localFile.stat().st_ctime) win_date = str(datetime.date(win_ctime)) win_time = str((datetime.time(win_ctime))).split(".")[0] winMeta = [['date', win_date], ['time', win_time], ['date_epoc', win_epoc], ['year', str(format(win_ctime.year, '04'))], ['month',str(format(win_ctime.month, '02'))], ['day',str(format(win_ctime.day, '02'))]] newMeta = metas + winMeta output.update({ str(record): {'file': k, 'collection': filesCollection, 'metadata': newMeta}}) else: # build file dict { 1: { 'file' : 'C:\\file.py, 'collection': '/OCF/home/rods', 'metadata': [['A','V','U'],['A','V','U']] }} output.update({ str(record): {'file': k, 'collection': filesCollection, 'metadata': metas}}) # write file list as toml to easily be edited manually file_object.write(toml.dumps(output)) print('\nfile list did not exist, created ' + str(record) + ' entries: ' + fileList + '\n' + '\ncheck content, add/remove or use as a template for your own file list\n' + '\nrerun this script to continue\n') return 0 else: print('\nfile list exists: ' + fileList + '\n') return 1 # may want continue confirmation here def writeLog(message, entry): with open(logFile,'w') as log: log.write(message + toml.dumps(entry)) #print(message + toml.dumps(entry)) def getConnect(config): # create iRODS connection session object iRODS_host = config['connect']['iRODS_host'] iRODS_port = config['connect']['iRODS_port'] iRODS_user = config['connect']['iRODS_user'] iRODS_password = config['connect']['iRODS_password'] iRODS_zone = config['connect']['iRODS_zone'] sessioniRODS = iRODSSession(host=iRODS_host, port=iRODS_port, user=iRODS_user, password=iRODS_password, zone=iRODS_zone) sessioniRODS.connection_timeout = 300 return sessioniRODS def uploadFiles(fileContent, config): failedUpload = [] successUpload = [] logFailedUpload = {} logSuccessUpload = {} sessioniRODS = getConnect(config) with sessioniRODS as session: for i in fileContent: filePath = fileContent[i]['file'] objName = pathlib.Path(filePath).name objCollection = fileContent[i]['collection'] objPath = objCollection + '/' + objName metaData = fileContent[i]['metadata'] try: session.data_objects.put(filePath, objPath) # upload obj = session.data_objects.get(objPath) for j in metaData: A = j[0] V = j[1] if len(j) > 2: U = j[2] obj.metadata.add(A, V, U) else: obj.metadata.add(A, V) successUpload.append(i) print('\nuploaded file to iRODS :' + objPath) except: failedUpload.append(i) print('\nfailed to upload file to iRODS :' + objPath) #print(session.get_connection_refresh_time()) if len(successUpload): for i in successUpload: logSuccessUpload.update({i:fileContent[i]}) writeLog('\n#### iRODS successful upload / metadata tag ####\n\n', logSuccessUpload) if len(failedUpload): for i in failedUpload: logFailedUpload.update({i:fileContent[i]}) writeLog('\n#### iRODS failed upload / metadata tag ####\n\n', logFailedUpload) def prepUploadFiles(fileList, config): # validation / logging function fileContent = toml.load(fileList) sessioniRODS = getConnect(config) with sessioniRODS as session: # check missing iRODS collection / existing iRODS object / missing local directory / missing local file colNotFound = [] objFound = [] directoryNotFound = [] fileNotFound = [] for i in fileContent: file_path = fileContent[i]['file'] directory = pathlib.Path(file_path).parent obj_name = pathlib.Path(file_path).name obj_collection = fileContent[i]['collection'] # check collection exists, check object exists colQuery = session.query(Collection).filter(Criterion('=', Collection.name, obj_collection)) colExist = "" for j in colQuery: colExist = j[Collection.name] if not len(colExist): colNotFound.append(i) else: objQuery = session.query(DataObject).filter(Criterion('=', Collection.name, obj_collection)).filter(Criterion('=', DataObject.name, obj_name)) objExist = "" for k in objQuery: objExist = k[DataObject.name] if len(objExist): objFound.append(i) # check local path exists, check local file exists if not pathlib.Path(directory).exists(): directoryNotFound.append(i) elif not pathlib.Path(file_path).exists(): fileNotFound.append(i) # lists for possible combinations of rule failure missingColMissingDir = [value for value in colNotFound if value in directoryNotFound] missingColMissingFile = [value for value in colNotFound if value in fileNotFound] existingObjMissingDir = [value for value in objFound if value in directoryNotFound] existingObjMissingFile = [value for value in objFound if value in fileNotFound] # create lists of broken entries and log logMissingColMissingDir = {} logMissingColMissingFile = {} logExistingObjMissingDir = {} logExistingObjMissingFile = {} logColNotFound = {} logObjFound = {} logDirectoryNotFound = {} logFileNotFound = {} failedValidation = 0 removeFileContent = [] if len(missingColMissingDir): print('\nmissing iRODS collection AND missing local path, check log\n') for l in missingColMissingDir: colNotFound.remove(l) directoryNotFound.remove(l) removeFileContent.append(l) logMissingColMissingDir.update({l:fileContent[l]}) writeLog('\n#### Missing iRODS collection AND missing local path ####\n\n', logMissingColMissingDir) failedValidation += 1 if len(missingColMissingFile): print('\nmissing iRODS collection AND missing file in local path, check log\n') for m in missingColMissingFile: colNotFound.remove(m) fileNotFound.remove(m) removeFileContent.append(m) logMissingColMissingFile.update({m:fileContent[m]}) writeLog('\n#### Missing iRODS collection AND missing file in local path ####\n\n', logMissingColMissingFile) failedValidation += 1 if len(existingObjMissingDir): print('\nexisting iRODS object AND missing local path, check log\n') for n in existingObjMissingDir: objFound.remove(n) directoryNotFound.remove(n) removeFileContent.append(n) logExistingObjMissingDir.update({n:fileContent[n]}) writeLog('\n#### Existing iRODS object AND missing local path ####\n\n', logExistingObjMissingDir) failedValidation += 1 if len(existingObjMissingFile): print('\nexisting iRODS object AND missing file in local path, check log\n') for o in existingObjMissingFile: objFound.remove(o) fileNotFound.remove(o) removeFileContent.append(o) logExistingObjMissingFile.update({o:fileContent[o]}) writeLog('\n#### Existing iRODS object AND missing file in local path ####\n\n', logExistingObjMissingFile) failedValidation += 1 if len(colNotFound): print('\nmissing iRODS collection, check log\n') for p in colNotFound: removeFileContent.append(p) logColNotFound.update({p:fileContent[p]}) writeLog('\n#### Missing iRODS collection ####\n\n', logColNotFound) failedValidation += 1 if len(objFound): print('\nexisting iRODS object, check log\n') for q in objFound: removeFileContent.append(q) logObjFound.update({q:fileContent[q]}) writeLog('\n#### Existing iRODS object ####\n\n', logObjFound) failedValidation += 1 if len(directoryNotFound): print('\nmissing local path, check log\n') for r in directoryNotFound: removeFileContent.append(r) logDirectoryNotFound.update({r:fileContent[r]}) writeLog('\n#### Missing local path ####\n\n', logDirectoryNotFound) failedValidation += 1 if len(fileNotFound): print('\nmissing file in local path, check log\n') for s in fileNotFound: removeFileContent.append(s) logFileNotFound.update({s:fileContent[s]}) writeLog('\n#### Missing file in local path ####\n\n', logFileNotFound) failedValidation += 1 # continue if failedValidation: print('\nfailed validation for entry in ' + fileList + ', the entry will be skipped, check log ' + logFile + '\n') if not args.confirm: print('\n(to avoid this confirmation run the script with the argument \'--confirm\')\n') #answer = raw_input("do you want to continue? y/n").lower() # python2 answer = input("\ndo you want to continue? y/n ").lower() while True: if answer == 'y' or answer == 'yes': break else: quit() # sort/unique entries and remove from fileContent unique = [] if len(removeFileContent): for n in removeFileContent: if n not in unique: unique.append(n) removeFileContent = sorted(unique) if len(removeFileContent): for o in removeFileContent: fileContent.pop(o) #print(toml.dumps(fileContent)) return fileContent def main(): configDict = findConfig(exampleConfig, configFile) parseConfig(configDict) listExist = createFileList(configDict, fileList) if listExist: validFileList = prepUploadFiles(fileList, configDict) uploadFiles(validFileList, configDict) if __name__ == "__main__": main()