diff --git a/HISTORY.rst b/HISTORY.rst index d51d7fac..bfc714df 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -16,6 +16,9 @@ Unreleased - advancedsearch.php endpoint now supports IAS3 authorization. - ``ia upload`` now has a ``--keep-directories`` option to use the full local file paths as the remote name. +- ``ia upload --status-check`` now checks whether the item exceeds the maximum size. + It further now also checks S3 with the specific identifier and access key rather than whether S3 + is overloaded in general. **Bugfixes** diff --git a/internetarchive/cli/ia_upload.py b/internetarchive/cli/ia_upload.py index 2e6cd31e..077c6ffe 100644 --- a/internetarchive/cli/ia_upload.py +++ b/internetarchive/cli/ia_upload.py @@ -80,6 +80,9 @@ is_valid_metadata_key, json, validate_s3_identifier) +MAX_ITEM_SIZE = 2**40 # 1 TiB + + def _upload_files(item, files, upload_kwargs, prev_identifier=None, archive_session=None): """Helper function for calling :meth:`Item.upload`""" responses = [] @@ -160,19 +163,22 @@ def main(argv, session): sys.exit(1) # Status check. + if args['']: + item = session.get_item(args['']) if args['--status-check']: - if session.s3_is_overloaded(): + if session.s3_is_overloaded(identifier=args[''], access_key=session.access_key): print(f'warning: {args[""]} is over limit, and not accepting requests. ' 'Expect 503 SlowDown errors.', file=sys.stderr) sys.exit(1) + elif item.item_size >= MAX_ITEM_SIZE: + print(f'warning: {args[""]} is exceeding the maximum item size ' + 'and not accepting uploads.', file=sys.stderr) + sys.exit(1) else: print(f'success: {args[""]} is accepting requests.') sys.exit() - elif args['']: - item = session.get_item(args['']) - # Upload keyword arguments. if args['--size-hint']: args['--header']['x-archive-size-hint'] = args['--size-hint'] diff --git a/tests/cli/test_ia_upload.py b/tests/cli/test_ia_upload.py index 4af8ac74..6d1810d9 100644 --- a/tests/cli/test_ia_upload.py +++ b/tests/cli/test_ia_upload.py @@ -42,6 +42,7 @@ def test_ia_upload_invalid_identifier(capsys, caplog): def test_ia_upload_status_check(capsys): with IaRequestsMock() as rsps: + rsps.add_metadata_mock('nasa') rsps.add(responses.GET, f'{PROTOCOL}//s3.us.archive.org', body=STATUS_CHECK_RESPONSE, content_type='application/json') @@ -53,6 +54,7 @@ def test_ia_upload_status_check(capsys): j = json.loads(STATUS_CHECK_RESPONSE) j['over_limit'] = 1 rsps.reset() + rsps.add_metadata_mock('nasa') rsps.add(responses.GET, f'{PROTOCOL}//s3.us.archive.org', body=json.dumps(j), content_type='application/json') @@ -62,6 +64,21 @@ def test_ia_upload_status_check(capsys): assert ('warning: nasa is over limit, and not accepting requests. ' 'Expect 503 SlowDown errors.') in err + def fake_big_item(body): + body = json.loads(body) + body['item_size'] = 2**41 # 2 TiB + return json.dumps(body) + + rsps.reset() + rsps.add_metadata_mock('nasa', transform_body=fake_big_item) + rsps.add(responses.GET, f'{PROTOCOL}//s3.us.archive.org', + body=STATUS_CHECK_RESPONSE, + content_type='application/json') + + ia_call(['ia', 'upload', 'nasa', '--status-check'], expected_exit_code=1) + out, err = capsys.readouterr() + assert 'warning: nasa is exceeding the maximum item size and not accepting uploads.' in err + def test_ia_upload_debug(capsys, tmpdir_ch, nasa_mocker): with open('test.txt', 'w') as fh: