Skip to content

Commit

Permalink
Avoid uncessary file uploads
Browse files Browse the repository at this point in the history
  • Loading branch information
fchollet committed Jan 10, 2024
1 parent 38983ab commit f60aef5
Showing 1 changed file with 50 additions and 4 deletions.
54 changes: 50 additions & 4 deletions scripts/upload.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,31 @@
import boto3
from pathlib import Path
import mimetypes
import hashlib
import os
import json
from multiprocessing.pool import ThreadPool

AKEY = os.environ["AWS_S3_ACCESS_KEY"]
SKEY = os.environ["AWS_S3_SECRET_KEY"]

BUCKET = "keras.io"
USE_THREADING = True
HASH_CACHE = "contents_hashes.json"

s3 = boto3.client("s3", aws_access_key_id=AKEY, aws_secret_access_key=SKEY)


def hash_file(fpath):
h = hashlib.sha256()
b = bytearray(128 * 1024)
mv = memoryview(b)
with open(fpath, "rb", buffering=0) as f:
while n := f.readinto(mv):
h.update(mv[:n])
return h.hexdigest()[:8]


def upload_file(bucket, fpath, key_name, redirect=None):
print(f"...Upload to {bucket}:{key_name}")
mime = mimetypes.guess_type(fpath)[0]
Expand All @@ -23,6 +37,19 @@ def upload_file(bucket, fpath, key_name, redirect=None):
)


def load_hash_cache():
s3.download_file(BUCKET, HASH_CACHE, HASH_CACHE)
with open(HASH_CACHE) as f:
contents = f.read()
return json.loads(contents)


def save_hash_cache(hash_cache):
with open(HASH_CACHE, "w") as f:
f.write(json.dumps(hash_cache))
upload_file(BUCKET, HASH_CACHE, HASH_CACHE)


def wrapped_upload_file(args):
bucket, fpath, key_name = args
upload_file(bucket, fpath, key_name)
Expand All @@ -44,10 +71,10 @@ def cleanup(site_directory, redirect_directory):
s3.delete_object(Bucket=BUCKET, Key=key)


def upload_dir(directory, include_img=True):
def upload_dir(directory, include_img=True, hash_cache=None):
print(f"Uploading files from '{directory}'...")
all_targets = []
for dp, dn, fn in os.walk(directory):
for dp, _, fn in os.walk(directory):
if fn:
for f in fn:
fpath = os.path.join(dp, f)
Expand All @@ -60,17 +87,32 @@ def upload_dir(directory, include_img=True):
print("> " + fpath)
print(">>>>>> " + key_name)
all_targets.append((BUCKET, fpath, key_name))

if hash_cache is not None:
filtered_targets = []
new_hash_cache = {}
for bucket, fpath, key_name in all_targets:
new_hash = hash_file(fpath)
old_hash = hash_cache.get(key_name)
if new_hash != old_hash:
filtered_targets.append((bucket, fpath, key_name))
new_hash_cache[key_name] = new_hash
all_targets = filtered_targets

if USE_THREADING:
pool = ThreadPool(processes=8)
pool.map(wrapped_upload_file, all_targets)
else:
for args in all_targets:
wrapped_upload_file(args)

if hash_cache is not None:
return new_hash_cache


def upload_redirects(directory):
print("Uploading redirects...")
for dp, dn, fn in os.walk(directory):
for dp, _, fn in os.walk(directory):
if fn:
for f in fn:
fpath = os.path.join(dp, f)
Expand All @@ -87,4 +129,8 @@ def upload_redirects(directory):

if __name__ == "__main__":
root = Path(__file__).parent.parent.resolve()
upload_dir(os.path.join(root, "site"), include_img=True)
hash_cache = load_hash_cache()
hash_cache = upload_dir(
os.path.join(root, "site"), include_img=True, hash_cache=hash_cache
)
save_hash_cache(hash_cache)

0 comments on commit f60aef5

Please sign in to comment.