Skip to content

Commit

Permalink
Added a test mode so that the number of new files can be limited
Browse files Browse the repository at this point in the history
  • Loading branch information
plesubc committed Oct 16, 2024
1 parent 8eb3ee8 commit fef7765
Show file tree
Hide file tree
Showing 4 changed files with 57 additions and 33 deletions.
2 changes: 1 addition & 1 deletion src/dryad2dataverse/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
'''
import sys

VERSION = (0, 7, 4)
VERSION = (0, 7, 5)
__version__ = '.'.join([str(x) for x in VERSION])
USERAGENT = (f'dryad2dataverse/v{__version__} ({sys.platform.capitalize()}); '
f'Python {sys.version[:sys.version.find("(")-1]}')
77 changes: 48 additions & 29 deletions src/dryad2dataverse/scripts/dryadd.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import logging
import logging.handlers
import os
import pathlib
import shutil
import smtplib
import sys
Expand All @@ -27,7 +28,7 @@
import dryad2dataverse.transfer
from dryad2dataverse.handlers import SSLSMTPHandler

VERSION = (0, 5, 4)
VERSION = (0, 6, 2)
__version__ = '.'.join([str(x) for x in VERSION])

DRY = 'https://datadryad.org/api/v2'
Expand Down Expand Up @@ -372,6 +373,21 @@ def argp():
type=int,
dest='warn',
default=15)
parser.add_argument('--testmode-on',
help=('Turn on test mode. '
'Number of transfers will be limited '
'to the value in --testmode-limit '
'or 5 if you don\'t set --testmode-limit '),
action='store_true',
dest='testmode')
parser.add_argument('--testmode-limit',
help=('Test mode - only transfer first [n] '
'of the total number of (new) records. Old ones will '
'still be updated, though. '
'Default: 5'),
type=int,
default=5,
dest='testlimit')
parser.add_argument('--version', action='version',
version='%(prog)s '+__version__
+'; dryad2dataverse '+
Expand Down Expand Up @@ -416,7 +432,8 @@ def email_log(mailhost, fromaddr, toaddrs, credentials, port=465, secure=(),
'''
#pylint: disable=too-many-arguments
#Because consistency is for suckers and yahoo requires full hostname
subject = 'Dryad to Dataverse transfer error'
#subject = 'Dryad to Dataverse transfer error'
subject = 'Dryad to Dataverse logger message'
elog = logging.getLogger('email_log')
mailer = SSLSMTPHandler(mailhost=(mailhost, port),
fromaddr=fromaddr,
Expand Down Expand Up @@ -485,16 +502,13 @@ def checkwarn(val:int, **kwargs) -> None:
{'warn_too_many': bool}
'''
print(kwargs)
#print(vars(kwargs))
return
if not kwargs.get('warn_too_many'):
return
if val >= kwargs.get('warn',0):
mess = ('Large number of updates detected. '
f'{val} new studies exceeds threshold of {kwargs.get("warn", 0)}. '
'Program execution halted.')
subject = ('Dryad to Dataverse large update warning')
subject = 'Dryad to Dataverse large update warning'
for logme in kwargs.get('loggers'):
logme.warning(mess)
notify(msgtxt=(subject, mess),
Expand Down Expand Up @@ -539,9 +553,18 @@ def main(log='/var/log/dryadd.log', level=logging.WARNING):
monitor = dryad2dataverse.monitor.Monitor(args.dbase)
#copy the database to make a backup, because paranoia is your friend
if os.path.exists(dryad2dataverse.constants.DBASE):
shutil.copyfile(dryad2dataverse.constants.DBASE,
dryad2dataverse.constants.DBASE+'.'+
datetime.datetime.now().strftime('%Y-%m-%d-%H%M'))
bu_db = pathlib.Path(dryad2dataverse.constants.DBASE)
try:
shutil.copyfile( bu_db,
pathlib.Path(bu_db.parent,
bu_db.stem + '_' +
datetime.datetime.now().strftime('%Y-%m-%d-%H%M') +
bu_db.suffix)
)
except FileNotFoundError:
print(dryad2dataverse.constants.DBASE)
print(bu_db)
sys.exit()
#list comprehension includes untimestamped dbase name, hence 2+
fnames = glob.glob(os.path.abspath(dryad2dataverse.constants.DBASE)
+'*')
Expand All @@ -557,15 +580,24 @@ def main(log='/var/log/dryadd.log', level=logging.WARNING):
logger.info('Total new files: %s', len(updates))
elog.info('Total new files: %s', len(updates))

checkwarn(val=len(updates),
checkwarn(val=len(updates) if not args.testmode else
min(args.testlimit, len(updates)),
loggers=[logger],
**vars(args))
if args.testmode:
logger.warning('Test mode is ON - number of updates limited to %s', args.testlimit)
elog.warning('Test mode is ON - number of updates limited to %s', args.testlimit)

#update all the new files
verbo(args.verbosity, **{'Total to process': len(updates)})

try:
count = 0
testcount = 0
for doi in updates:
if args.testmode and (testcount >= args.testlimit):
logger.info('Test limit of %s reached', args.testlimit)
break
count += 1
logger.info('Start processing %s of %s', count, len(updates))
logger.info('DOI: %s, Dryad URL: https://datadryad.org/stash/dataset/%s',
Expand Down Expand Up @@ -611,6 +643,7 @@ def main(log='/var/log/dryadd.log', level=logging.WARNING):
transfer.set_correct_date()
notify(new_content(study),
**vars(args))
testcount+=1

elif update_type == 'updated':
logger.info('Updated metadata: %s', doi[0])
Expand Down Expand Up @@ -674,23 +707,9 @@ def main(log='/var/log/dryadd.log', level=logging.WARNING):
print(f'Error: {err}. Exiting. For details see log at {args.log}.')
sys.exit()

def main2(log='/var/log/dryadd.log', level=logging.WARNING):
'''
Main Dryad transfer daemon
log : str
path to logfile
level : int
log level, usually one of logging.LOGLEVEL (ie, logging.warning)
'''
#pylint: disable=too-many-branches
#pylint: disable=too-many-statements
#pylint: disable=too-many-locals
parser = argp()
args = parser.parse_args()
print(args)
checkwarn(val=26,
loggers=[],
**vars(args))
if __name__ == '__main__':
main2()
main()
_parser = argp()
_args = _parser.parse_args()
print('This is what you would have done had you actually run this')
print(_args)
7 changes: 5 additions & 2 deletions tests/tests_monitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,9 @@ def test_06_unchanged_files(self):
def test_07_added_files(self):
self.assertEqual.__self__.maxDiff = None
newdata=copy.copy(self.testCase.fileJson[0]['_embedded']['stash:files'][-1])
newdata['_links']['stash:file-download']['href'] = '/api/v2/files/999999/download'
#newdata['_links']['stash:file-download']['href'] = '/api/v2/files/999999/download'
#let's change API output for no reason!
newdata['_links']['stash:download']['href'] = '/api/v2/files/999999/download'
newdata['path'] = 'ubc_rand1.csv'
newdata['description'] = 'UBC random data 1'
newdata['digestType'] = 'md5'
Expand Down Expand Up @@ -100,7 +102,8 @@ def test_08_deleted_files(self):
'application/x-zip-compressed',
23787587,
'',
'', '')]}
#'', '')]}
'sha-256', 'cb1c4f28de8aaec8c8c4b1eca498eace1c2c8c847dd443bd5314d42a70d6e4ae')]}
self.assertEqual.__self__.maxDiff = None
self.assertEqual(expect, diff)
#and restore
Expand Down
4 changes: 3 additions & 1 deletion tests/tests_serializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,9 @@ def test_notes(self):
notes = {'typeName': 'notesText',
'typeClass':'primitive',
'multiple':False,
'value': '<p><b>Dryad version number:</b> 4</p>\n<p><b>Version status:</b> submitted</p>\n<p><b>Dryad curation status:</b> Published</p>\n<p><b>Sharing link:</b> https://datadryad.org/stash/share/anFoRwjUzvjvpH8RA2T0mNipNsVst0s0N5mFzcTTcJE</p>\n<p><b>Storage size:</b> 23874866</p>\n<p><b>Visibility:</b> public</p>\n'}
#'value': '<p><b>Dryad version number:</b> 4</p>\n<p><b>Version status:</b> submitted</p>\n<p><b>Dryad curation status:</b> Published</p>\n<p><b>Sharing link:</b> https://datadryad.org/stash/share/anFoRwjUzvjvpH8RA2T0mNipNsVst0s0N5mFzcTTcJE</p>\n<p><b>Storage size:</b> 23874866</p>\n<p><b>Visibility:</b> public</p>\n'}
#Why keep something the same over time? That's for suckers!
'value': '<p><b>Dryad version number:</b> 4</p>\n<p><b>Version status:</b> submitted</p>\n<p><b>Dryad curation status:</b> Published</p>\n<p><b>Sharing link:</b> http://datadryad.org/stash/dataset/doi:10.5061/dryad.2rbnzs7jp</p>\n<p><b>Storage size:</b> 23874866</p>\n<p><b>Visibility:</b> public</p>\n'}
self.assertEqual(self.testCase._convert_notes(self.testCase.dryadJson), notes )

def test_pub_date(self):
Expand Down

0 comments on commit fef7765

Please sign in to comment.