jjjake · JustAnotherArchivist · Feb 12, 2022 · cclauss · Apr 25, 2022 · JustAnotherArchivist
diff --git a/HISTORY.rst b/HISTORY.rst
@@ -16,6 +16,9 @@ Unreleased
 - advancedsearch.php endpoint now supports IAS3 authorization.
 - ``ia upload`` now has a ``--keep-directories`` option to use the full local file paths as the
   remote name.
+- ``ia upload <identifier> --status-check`` now checks whether the item exceeds the maximum size.
+  It further now also checks S3 with the specific identifier and access key rather than whether S3
+  is overloaded in general.
 
 **Bugfixes**
 

diff --git a/internetarchive/cli/ia_upload.py b/internetarchive/cli/ia_upload.py
@@ -80,6 +80,9 @@
                                    is_valid_metadata_key, json, validate_s3_identifier)
 
 
+MAX_ITEM_SIZE = 2**40  # 1 TiB
+
+
 def _upload_files(item, files, upload_kwargs, prev_identifier=None, archive_session=None):
     """Helper function for calling :meth:`Item.upload`"""
     responses = []
@@ -160,19 +163,22 @@ def main(argv, session):
             sys.exit(1)
 
     # Status check.
+    if args['<identifier>']:
+        item = session.get_item(args['<identifier>'])
     if args['--status-check']:
-        if session.s3_is_overloaded():
+        if session.s3_is_overloaded(identifier=args['<identifier>'], access_key=session.access_key):
             print(f'warning: {args["<identifier>"]} is over limit, and not accepting requests. '
                   'Expect 503 SlowDown errors.',
                   file=sys.stderr)
             sys.exit(1)
+        elif item.item_size >= MAX_ITEM_SIZE:
-        elif item.item_size >= MAX_ITEM_SIZE:
+        elif item.item_size > MAX_ITEM_SIZE:
-        elif item.item_size >= MAX_ITEM_SIZE:
+        elif item.item_size > MAX_ITEM_SIZE:
+            print(f'warning: {args["<identifier>"]} is exceeding the maximum item size '
+                  'and not accepting uploads.', file=sys.stderr)
+            sys.exit(1)
         else:
             print(f'success: {args["<identifier>"]} is accepting requests.')
             sys.exit()
 
-    elif args['<identifier>']:
-        item = session.get_item(args['<identifier>'])
-
     # Upload keyword arguments.
     if args['--size-hint']:
         args['--header']['x-archive-size-hint'] = args['--size-hint']

diff --git a/tests/cli/test_ia_upload.py b/tests/cli/test_ia_upload.py
@@ -42,6 +42,7 @@ def test_ia_upload_invalid_identifier(capsys, caplog):
 
 def test_ia_upload_status_check(capsys):
     with IaRequestsMock() as rsps:
+        rsps.add_metadata_mock('nasa')
         rsps.add(responses.GET, f'{PROTOCOL}//s3.us.archive.org',
                  body=STATUS_CHECK_RESPONSE,
                  content_type='application/json')
@@ -53,6 +54,7 @@ def test_ia_upload_status_check(capsys):
         j = json.loads(STATUS_CHECK_RESPONSE)
         j['over_limit'] = 1
         rsps.reset()
+        rsps.add_metadata_mock('nasa')
         rsps.add(responses.GET, f'{PROTOCOL}//s3.us.archive.org',
                  body=json.dumps(j),
                  content_type='application/json')
@@ -62,6 +64,21 @@ def test_ia_upload_status_check(capsys):
         assert ('warning: nasa is over limit, and not accepting requests. '
                 'Expect 503 SlowDown errors.') in err
 
+        def fake_big_item(body):
+            body = json.loads(body)
+            body['item_size'] = 2**41  # 2 TiB
+            return json.dumps(body)
+
+        rsps.reset()
+        rsps.add_metadata_mock('nasa', transform_body=fake_big_item)
+        rsps.add(responses.GET, f'{PROTOCOL}//s3.us.archive.org',
+                 body=STATUS_CHECK_RESPONSE,
+                 content_type='application/json')
+
+        ia_call(['ia', 'upload', 'nasa', '--status-check'], expected_exit_code=1)
+        out, err = capsys.readouterr()
+        assert 'warning: nasa is exceeding the maximum item size and not accepting uploads.' in err
+
 
 def test_ia_upload_debug(capsys, tmpdir_ch, nasa_mocker):
     with open('test.txt', 'w') as fh: