383 lines
17 KiB
Python
Executable File
383 lines
17 KiB
Python
Executable File
#import os
|
|
import argparse
|
|
import pathlib
|
|
import toml
|
|
from datetime import datetime
|
|
from irods.session import iRODSSession
|
|
from irods.column import Criterion
|
|
from irods.models import DataObject, Collection
|
|
|
|
parser = argparse.ArgumentParser(__file__)
|
|
parser.add_argument("--confirm", help="auto continue script when a file entry fails validation", action="store_true")
|
|
args = parser.parse_args()
|
|
configFile = 'config.toml'
|
|
fileList = 'upload_files.toml'
|
|
logFile = str(pathlib.Path(__file__).name.strip('.py') + '.' + datetime.now().isoformat().split(".")[0] + '.log').replace(':','.')
|
|
|
|
exampleConfig = """\
|
|
# required connect
|
|
[connect]
|
|
iRODS_host = '192.168.150.56'
|
|
iRODS_port = '1247'
|
|
iRODS_user = 'rods'
|
|
iRODS_password = 'password'
|
|
iRODS_zone = 'OCF'
|
|
# required files
|
|
[files]
|
|
# required at least one entry
|
|
[files.irods]
|
|
path = 'C:\PHE_iRODS'
|
|
extension = [".md", ".exe"]
|
|
recursive = true
|
|
collection = '/OCF/home/rods/test'
|
|
# required for windows_create_date
|
|
[files.irods.metadata]
|
|
windows_create_date = true
|
|
# optional additional metadata, U not required
|
|
[files.irods.metadata.company]
|
|
A = 'company'
|
|
V = 'OCF'
|
|
U = 'string'
|
|
[files.irods.metadata.department]
|
|
A = 'department'
|
|
V = 'Cloud'\
|
|
"""
|
|
|
|
def findConfig(exampleConfig, configFile):
|
|
if not pathlib.Path(configFile).exists():
|
|
with open(configFile,'w') as config:
|
|
config.write(exampleConfig)
|
|
print('config missing, wrote example to ' + configFile + ' please modify this config and re-run')
|
|
quit()
|
|
else:
|
|
try:
|
|
configDict = toml.load(configFile)
|
|
except:
|
|
quit('invalid TOML format: ' + configFile)
|
|
return configDict
|
|
|
|
def parseConfig(config):
|
|
# check missing entries
|
|
reqEntry = ['connect', 'files']
|
|
for i in reqEntry:
|
|
try:
|
|
test = config[i]
|
|
except:
|
|
quit('missing config entry: ' + i)
|
|
# check empty entries
|
|
for i in reqEntry:
|
|
if not len(config[i]):
|
|
quit('empty config entry: ' + i)
|
|
# check connect entry
|
|
try:
|
|
iRODS_host = config['connect']['iRODS_host']
|
|
iRODS_port = config['connect']['iRODS_port']
|
|
iRODS_user = config['connect']['iRODS_user']
|
|
iRODS_password = config['connect']['iRODS_password']
|
|
iRODS_zone = config['connect']['iRODS_zone']
|
|
except:
|
|
quit('missing config entry: connect')
|
|
# check files entries
|
|
for i in config['files']:
|
|
# check search and put entries
|
|
try:
|
|
filesPath = config['files'][i]['path']
|
|
filesExtension = config['files'][i]['extension']
|
|
filesRecursive = config['files'][i]['recursive']
|
|
filesCollection = config['files'][i]['collection']
|
|
except:
|
|
quit('missing config entry in: files.' + i)
|
|
# check extension is list
|
|
if type(filesExtension) is not list:
|
|
quit('extension is not a list entry in: files.' + i + '.extension')
|
|
# check if recursive is bool
|
|
if type(filesRecursive) is not bool:
|
|
quit('recursive is not a boolean entry in: files.' + i + '.recursive')
|
|
# check metadata windows_create_date
|
|
try:
|
|
files_metadata = config['files'][i]['metadata']
|
|
except:
|
|
quit('missing config entry in: files.' + i + '.metadata')
|
|
if not len(config['files'][i]['metadata']):
|
|
quit('empty metadata entry in: files.' + i + '.metadata')
|
|
try:
|
|
test = config['files'][i]['metadata']['windows_create_date']
|
|
except:
|
|
quit('missing config entry: files.' + i + '.metadata.windows_create_date')
|
|
if type(config['files'][i]['metadata']['windows_create_date']) is not bool:
|
|
quit('windows_create_date is not a boolean entry in metadata')
|
|
# check metadata contain requisite fields
|
|
for j in config['files'][i]['metadata']:
|
|
if j != 'windows_create_date':
|
|
try:
|
|
meta_att = config['files'][i]['metadata'][j]['A']
|
|
meta_val = config['files'][i]['metadata'][j]['V']
|
|
except:
|
|
quit('missing config entry in: files.' + i + '.metadata.' + j)
|
|
|
|
def findFiles(dir, ext, recursive):
|
|
files = []
|
|
for item in pathlib.Path(dir).iterdir():
|
|
if item.is_dir() and recursive:
|
|
files = files + findFiles(item.absolute(), ext, recursive)
|
|
elif item.is_file() and item.suffix in ext:
|
|
files.append(str(item.absolute()))
|
|
return(files)
|
|
|
|
def createFileList(config, fileList):
|
|
if not pathlib.Path(fileList).exists():
|
|
files = []
|
|
with open(fileList, "a") as file_object:
|
|
output = {}
|
|
record = 0
|
|
for i in config['files']:
|
|
# collect required config params
|
|
filesPath = config['files'][i]['path']
|
|
filesExtension = config['files'][i]['extension']
|
|
filesRecursive = config['files'][i]['recursive']
|
|
filesCollection = config['files'][i]['collection']
|
|
# check path exists, may want to check collection exists
|
|
if not pathlib.Path(filesPath).exists():
|
|
quit('invalid path entry in: files.' + i + '.path')
|
|
# find all files that match the rules
|
|
files = files + findFiles(filesPath,filesExtension,filesRecursive)
|
|
metas = []
|
|
for j in config['files'][i]['metadata']:
|
|
if j != 'windows_create_date':
|
|
meta = []
|
|
meta.append(config['files'][i]['metadata'][j]['A'])
|
|
meta.append(config['files'][i]['metadata'][j]['V'])
|
|
if 'U' in config['files'][i]['metadata'][j]:
|
|
meta.append(config['files'][i]['metadata'][j]['U'])
|
|
metas.append(meta)
|
|
# update file /rule dict
|
|
for k in files:
|
|
record += 1
|
|
# add windows_create_date metadata
|
|
if config['files'][i]['metadata']['windows_create_date']:
|
|
winMeta = []
|
|
localFile = pathlib.Path(k)
|
|
win_epoc = str(localFile.stat().st_ctime).split(".")[0]
|
|
win_ctime = datetime.fromtimestamp(localFile.stat().st_ctime)
|
|
win_date = str(datetime.date(win_ctime))
|
|
win_time = str((datetime.time(win_ctime))).split(".")[0]
|
|
winMeta = [['date', win_date], ['time', win_time], ['date_epoc', win_epoc], ['year', str(format(win_ctime.year, '04'))], ['month',str(format(win_ctime.month, '02'))], ['day',str(format(win_ctime.day, '02'))]]
|
|
newMeta = metas + winMeta
|
|
output.update({ str(record): {'file': k, 'collection': filesCollection, 'metadata': newMeta}})
|
|
else:
|
|
# build file dict { 1: { 'file' : 'C:\\file.py, 'collection': '/OCF/home/rods', 'metadata': [['A','V','U'],['A','V','U']] }}
|
|
output.update({ str(record): {'file': k, 'collection': filesCollection, 'metadata': metas}})
|
|
# write file list as toml to easily be edited manually
|
|
file_object.write(toml.dumps(output))
|
|
print('\nfile list did not exist, created ' + str(record) + ' entries: ' + fileList + '\n' + '\ncheck content, add/remove or use as a template for your own file list\n' + '\nrerun this script to continue\n')
|
|
return 0
|
|
else:
|
|
print('\nfile list exists: ' + fileList + '\n')
|
|
return 1
|
|
# may want continue confirmation here
|
|
|
|
def writeLog(message, entry):
|
|
with open(logFile,'w') as log:
|
|
log.write(message + toml.dumps(entry))
|
|
#print(message + toml.dumps(entry))
|
|
|
|
def getConnect(config):
|
|
# create iRODS connection session object
|
|
iRODS_host = config['connect']['iRODS_host']
|
|
iRODS_port = config['connect']['iRODS_port']
|
|
iRODS_user = config['connect']['iRODS_user']
|
|
iRODS_password = config['connect']['iRODS_password']
|
|
iRODS_zone = config['connect']['iRODS_zone']
|
|
sessioniRODS = iRODSSession(host=iRODS_host, port=iRODS_port, user=iRODS_user, password=iRODS_password, zone=iRODS_zone)
|
|
sessioniRODS.connection_timeout = 300
|
|
return sessioniRODS
|
|
|
|
def uploadFiles(fileContent, config):
|
|
failedUpload = []
|
|
successUpload = []
|
|
logFailedUpload = {}
|
|
logSuccessUpload = {}
|
|
sessioniRODS = getConnect(config)
|
|
with sessioniRODS as session:
|
|
for i in fileContent:
|
|
filePath = fileContent[i]['file']
|
|
objName = pathlib.Path(filePath).name
|
|
objCollection = fileContent[i]['collection']
|
|
objPath = objCollection + '/' + objName
|
|
metaData = fileContent[i]['metadata']
|
|
try:
|
|
session.data_objects.put(filePath, objPath) # upload
|
|
obj = session.data_objects.get(objPath)
|
|
for j in metaData:
|
|
A = j[0]
|
|
V = j[1]
|
|
if len(j) > 2:
|
|
U = j[2]
|
|
obj.metadata.add(A, V, U)
|
|
else:
|
|
obj.metadata.add(A, V)
|
|
successUpload.append(i)
|
|
print('\nuploaded file to iRODS :' + objPath)
|
|
except:
|
|
failedUpload.append(i)
|
|
print('\nfailed to upload file to iRODS :' + objPath)
|
|
#print(session.get_connection_refresh_time())
|
|
if len(successUpload):
|
|
for i in successUpload:
|
|
logSuccessUpload.update({i:fileContent[i]})
|
|
writeLog('\n#### iRODS successful upload / metadata tag ####\n\n', logSuccessUpload)
|
|
if len(failedUpload):
|
|
for i in failedUpload:
|
|
logFailedUpload.update({i:fileContent[i]})
|
|
writeLog('\n#### iRODS failed upload / metadata tag ####\n\n', logFailedUpload)
|
|
|
|
def prepUploadFiles(fileList, config):
|
|
# validation / logging function
|
|
fileContent = toml.load(fileList)
|
|
sessioniRODS = getConnect(config)
|
|
with sessioniRODS as session:
|
|
# check missing iRODS collection / existing iRODS object / missing local directory / missing local file
|
|
colNotFound = []
|
|
objFound = []
|
|
directoryNotFound = []
|
|
fileNotFound = []
|
|
for i in fileContent:
|
|
file_path = fileContent[i]['file']
|
|
directory = pathlib.Path(file_path).parent
|
|
obj_name = pathlib.Path(file_path).name
|
|
obj_collection = fileContent[i]['collection']
|
|
# check collection exists, check object exists
|
|
colQuery = session.query(Collection).filter(Criterion('=', Collection.name, obj_collection))
|
|
colExist = ""
|
|
for j in colQuery:
|
|
colExist = j[Collection.name]
|
|
if not len(colExist):
|
|
colNotFound.append(i)
|
|
else:
|
|
objQuery = session.query(DataObject).filter(Criterion('=', Collection.name, obj_collection)).filter(Criterion('=', DataObject.name, obj_name))
|
|
objExist = ""
|
|
for k in objQuery:
|
|
objExist = k[DataObject.name]
|
|
if len(objExist):
|
|
objFound.append(i)
|
|
# check local path exists, check local file exists
|
|
if not pathlib.Path(directory).exists():
|
|
directoryNotFound.append(i)
|
|
elif not pathlib.Path(file_path).exists():
|
|
fileNotFound.append(i)
|
|
# lists for possible combinations of rule failure
|
|
missingColMissingDir = [value for value in colNotFound if value in directoryNotFound]
|
|
missingColMissingFile = [value for value in colNotFound if value in fileNotFound]
|
|
existingObjMissingDir = [value for value in objFound if value in directoryNotFound]
|
|
existingObjMissingFile = [value for value in objFound if value in fileNotFound]
|
|
# create lists of broken entries and log
|
|
logMissingColMissingDir = {}
|
|
logMissingColMissingFile = {}
|
|
logExistingObjMissingDir = {}
|
|
logExistingObjMissingFile = {}
|
|
logColNotFound = {}
|
|
logObjFound = {}
|
|
logDirectoryNotFound = {}
|
|
logFileNotFound = {}
|
|
failedValidation = 0
|
|
removeFileContent = []
|
|
if len(missingColMissingDir):
|
|
print('\nmissing iRODS collection AND missing local path, check log\n')
|
|
for l in missingColMissingDir:
|
|
colNotFound.remove(l)
|
|
directoryNotFound.remove(l)
|
|
removeFileContent.append(l)
|
|
logMissingColMissingDir.update({l:fileContent[l]})
|
|
writeLog('\n#### Missing iRODS collection AND missing local path ####\n\n', logMissingColMissingDir)
|
|
failedValidation += 1
|
|
if len(missingColMissingFile):
|
|
print('\nmissing iRODS collection AND missing file in local path, check log\n')
|
|
for m in missingColMissingFile:
|
|
colNotFound.remove(m)
|
|
fileNotFound.remove(m)
|
|
removeFileContent.append(m)
|
|
logMissingColMissingFile.update({m:fileContent[m]})
|
|
writeLog('\n#### Missing iRODS collection AND missing file in local path ####\n\n', logMissingColMissingFile)
|
|
failedValidation += 1
|
|
if len(existingObjMissingDir):
|
|
print('\nexisting iRODS object AND missing local path, check log\n')
|
|
for n in existingObjMissingDir:
|
|
objFound.remove(n)
|
|
directoryNotFound.remove(n)
|
|
removeFileContent.append(n)
|
|
logExistingObjMissingDir.update({n:fileContent[n]})
|
|
writeLog('\n#### Existing iRODS object AND missing local path ####\n\n', logExistingObjMissingDir)
|
|
failedValidation += 1
|
|
if len(existingObjMissingFile):
|
|
print('\nexisting iRODS object AND missing file in local path, check log\n')
|
|
for o in existingObjMissingFile:
|
|
objFound.remove(o)
|
|
fileNotFound.remove(o)
|
|
removeFileContent.append(o)
|
|
logExistingObjMissingFile.update({o:fileContent[o]})
|
|
writeLog('\n#### Existing iRODS object AND missing file in local path ####\n\n', logExistingObjMissingFile)
|
|
failedValidation += 1
|
|
if len(colNotFound):
|
|
print('\nmissing iRODS collection, check log\n')
|
|
for p in colNotFound:
|
|
removeFileContent.append(p)
|
|
logColNotFound.update({p:fileContent[p]})
|
|
writeLog('\n#### Missing iRODS collection ####\n\n', logColNotFound)
|
|
failedValidation += 1
|
|
if len(objFound):
|
|
print('\nexisting iRODS object, check log\n')
|
|
for q in objFound:
|
|
removeFileContent.append(q)
|
|
logObjFound.update({q:fileContent[q]})
|
|
writeLog('\n#### Existing iRODS object ####\n\n', logObjFound)
|
|
failedValidation += 1
|
|
if len(directoryNotFound):
|
|
print('\nmissing local path, check log\n')
|
|
for r in directoryNotFound:
|
|
removeFileContent.append(r)
|
|
logDirectoryNotFound.update({r:fileContent[r]})
|
|
writeLog('\n#### Missing local path ####\n\n', logDirectoryNotFound)
|
|
failedValidation += 1
|
|
if len(fileNotFound):
|
|
print('\nmissing file in local path, check log\n')
|
|
for s in fileNotFound:
|
|
removeFileContent.append(s)
|
|
logFileNotFound.update({s:fileContent[s]})
|
|
writeLog('\n#### Missing file in local path ####\n\n', logFileNotFound)
|
|
failedValidation += 1
|
|
# continue
|
|
if failedValidation:
|
|
print('\nfailed validation for entry in ' + fileList + ', the entry will be skipped, check log ' + logFile + '\n')
|
|
if not args.confirm:
|
|
print('\n(to avoid this confirmation run the script with the argument \'--confirm\')\n')
|
|
#answer = raw_input("do you want to continue? y/n").lower() # python2
|
|
answer = input("\ndo you want to continue? y/n ").lower()
|
|
while True:
|
|
if answer == 'y' or answer == 'yes':
|
|
break
|
|
else:
|
|
quit()
|
|
# sort/unique entries and remove from fileContent
|
|
unique = []
|
|
if len(removeFileContent):
|
|
for n in removeFileContent:
|
|
if n not in unique:
|
|
unique.append(n)
|
|
removeFileContent = sorted(unique)
|
|
if len(removeFileContent):
|
|
for o in removeFileContent:
|
|
fileContent.pop(o)
|
|
#print(toml.dumps(fileContent))
|
|
return fileContent
|
|
|
|
def main():
|
|
configDict = findConfig(exampleConfig, configFile)
|
|
parseConfig(configDict)
|
|
listExist = createFileList(configDict, fileList)
|
|
if listExist:
|
|
validFileList = prepUploadFiles(fileList, configDict)
|
|
uploadFiles(validFileList, configDict)
|
|
|
|
if __name__ == "__main__":
|
|
main()
|