From c1b97292cbcb62b285a65ccc3793aba150e2768f Mon Sep 17 00:00:00 2001 From: Macneil Shonle Date: Mon, 7 Aug 2023 11:42:29 -0700 Subject: [PATCH 1/3] Support macOS for downloading MacOS doesn't have an `md5check` command, so we need to execute the `md5` command for each file separately. The Linux path should act the same as before. --- download.sh | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) mode change 100644 => 100755 download.sh diff --git a/download.sh b/download.sh old mode 100644 new mode 100755 index 8cfed9935..c9246d4fe --- a/download.sh +++ b/download.sh @@ -3,6 +3,26 @@ # Copyright (c) Meta Platforms, Inc. and affiliates. # This software may be used and distributed according to the terms of the Llama 2 Community License Agreement. +md5check () { + if [[ "$OSTYPE" == "darwin"* ]]; then + failed=0 + while read -r hash file; do + if [[ -n "$hash" ]]; then + if [[ "$(md5 -q "$file")" != "$hash" ]]; then + echo "$file: FAILED" + failed=1 + else + echo "$file: OK" + fi + fi + done < "$1" + return $failed + else + md5sum -c "$1" + return $? + fi +} + read -p "Enter the URL from email: " PRESIGNED_URL echo "" read -p "Enter the list of models to download without spaces (7B,13B,70B,7B-chat,13B-chat,70B-chat), or press Enter for all: " MODEL_SIZE @@ -20,7 +40,7 @@ wget ${PRESIGNED_URL/'*'/"USE_POLICY.md"} -O ${TARGET_FOLDER}"/USE_POLICY.md" echo "Downloading tokenizer" wget ${PRESIGNED_URL/'*'/"tokenizer.model"} -O ${TARGET_FOLDER}"/tokenizer.model" wget ${PRESIGNED_URL/'*'/"tokenizer_checklist.chk"} -O ${TARGET_FOLDER}"/tokenizer_checklist.chk" -(cd ${TARGET_FOLDER} && md5sum -c tokenizer_checklist.chk) +(cd ${TARGET_FOLDER} && md5check tokenizer_checklist.chk) for m in ${MODEL_SIZE//,/ } do @@ -55,6 +75,6 @@ do wget ${PRESIGNED_URL/'*'/"${MODEL_PATH}/params.json"} -O ${TARGET_FOLDER}"/${MODEL_PATH}/params.json" wget ${PRESIGNED_URL/'*'/"${MODEL_PATH}/checklist.chk"} -O ${TARGET_FOLDER}"/${MODEL_PATH}/checklist.chk" echo "Checking checksums" - (cd ${TARGET_FOLDER}"/${MODEL_PATH}" && md5sum -c checklist.chk) + (cd ${TARGET_FOLDER}"/${MODEL_PATH}" && md5check checklist.chk) done From b02220c36b36baa07da4ef2c52a2dab040325147 Mon Sep 17 00:00:00 2001 From: Macneil Shonle Date: Tue, 8 Aug 2023 16:23:22 -0700 Subject: [PATCH 2/3] Allow downloads to be continued Add a more obvious place for people to paste in their URL (if they want to edit the script) Sanity check the URL before proceeding Provide earlier feedback when large files are being checksumed --- download.sh | 126 +++++++++++++++++++++++++++++++++------------------- 1 file changed, 80 insertions(+), 46 deletions(-) diff --git a/download.sh b/download.sh index c9246d4fe..88ae83530 100755 --- a/download.sh +++ b/download.sh @@ -3,78 +3,112 @@ # Copyright (c) Meta Platforms, Inc. and affiliates. # This software may be used and distributed according to the terms of the Llama 2 Community License Agreement. -md5check () { +PRESIGNED_URL="" # URL from email +TARGET_FOLDER="." # where all files should end up + +# Download a file from the presigned URL, saving it to the target folder. +# Resumes downloading partially downloaded files. +# $1 - partial path of file to download +# $2 - target folder +download () { + local filename="$1" + local target_dir="$2" + [[ -d "$target_dir" ]] || mkdir -p "$target_dir" + wget --continue "${PRESIGNED_URL/'*'/"$filename"}" -O "${target_dir}/${filename}" +} + +# $1 - checklist file +md5check () { + if [[ ! -f "$1" ]]; then + echo "Missing checklist file: $1" >&2 + return 1 + fi if [[ "$OSTYPE" == "darwin"* ]]; then failed=0 while read -r hash file; do if [[ -n "$hash" ]]; then + printf "%s: " "$file" if [[ "$(md5 -q "$file")" != "$hash" ]]; then - echo "$file: FAILED" - failed=1 + echo "FAILED" + (( failed++ )) else - echo "$file: OK" + echo "OK" fi fi done < "$1" - return $failed + if (( failed > 0 )); then + echo "$failed files failed checksum" >&2 + return 1 + else + return 0 + fi else md5sum -c "$1" return $? fi } -read -p "Enter the URL from email: " PRESIGNED_URL -echo "" -read -p "Enter the list of models to download without spaces (7B,13B,70B,7B-chat,13B-chat,70B-chat), or press Enter for all: " MODEL_SIZE -TARGET_FOLDER="." # where all files should end up -mkdir -p ${TARGET_FOLDER} +# If the user didn't modify the script to include the URL, ask for it +if [[ -z "$PRESIGNED_URL" ]]; then + read -p "Enter the URL from email: " PRESIGNED_URL + echo "" +fi + +# Check if the PRESIGNED_URL has the form `https://..*...` (i.e. with one wildcard '*' in the path) +if ! echo "$PRESIGNED_URL" | grep -q -E '^https://[^*]+[*][^*]*'; then + echo "Invalid URL: $PRESIGNED_URL" >&2 + echo "Expected: The URL from the email, containing one asterisk '*'." >&2 + exit 1 +fi + +ALL_MODELS="7B,13B,70B,7B-chat,13B-chat,70B-chat" +read -p "Enter the list of models to download without spaces (${ALL_MODELS}), or press Enter for all: " MODEL_SIZE if [[ $MODEL_SIZE == "" ]]; then - MODEL_SIZE="7B,13B,70B,7B-chat,13B-chat,70B-chat" + MODEL_SIZE="$ALL_MODELS" fi +mkdir -p "$TARGET_FOLDER" + echo "Downloading LICENSE and Acceptable Usage Policy" -wget ${PRESIGNED_URL/'*'/"LICENSE"} -O ${TARGET_FOLDER}"/LICENSE" -wget ${PRESIGNED_URL/'*'/"USE_POLICY.md"} -O ${TARGET_FOLDER}"/USE_POLICY.md" +download "LICENSE" "$TARGET_FOLDER" +download "USE_POLICY.md" "$TARGET_FOLDER" echo "Downloading tokenizer" -wget ${PRESIGNED_URL/'*'/"tokenizer.model"} -O ${TARGET_FOLDER}"/tokenizer.model" -wget ${PRESIGNED_URL/'*'/"tokenizer_checklist.chk"} -O ${TARGET_FOLDER}"/tokenizer_checklist.chk" -(cd ${TARGET_FOLDER} && md5check tokenizer_checklist.chk) +download "tokenizer.model" "$TARGET_FOLDER" +download "tokenizer_checklist.chk" "$TARGET_FOLDER" +(cd "$TARGET_FOLDER" && md5check tokenizer_checklist.chk) -for m in ${MODEL_SIZE//,/ } -do - if [[ $m == "7B" ]]; then - SHARD=0 - MODEL_PATH="llama-2-7b" - elif [[ $m == "7B-chat" ]]; then - SHARD=0 - MODEL_PATH="llama-2-7b-chat" - elif [[ $m == "13B" ]]; then - SHARD=1 - MODEL_PATH="llama-2-13b" - elif [[ $m == "13B-chat" ]]; then - SHARD=1 - MODEL_PATH="llama-2-13b-chat" - elif [[ $m == "70B" ]]; then - SHARD=7 - MODEL_PATH="llama-2-70b" - elif [[ $m == "70B-chat" ]]; then - SHARD=7 - MODEL_PATH="llama-2-70b-chat" - fi +for m in ${MODEL_SIZE//,/ }; do + case $m in + "7B") + SHARD=0; MODEL_PATH="llama-2-7b" ;; + "7B-chat") + SHARD=0; MODEL_PATH="llama-2-7b-chat" ;; + "13B") + SHARD=1; MODEL_PATH="llama-2-13b" ;; + "13B-chat") + SHARD=1; MODEL_PATH="llama-2-13b-chat" ;; + "70B") + SHARD=7; MODEL_PATH="llama-2-70b" ;; + "70B-chat") + SHARD=7; MODEL_PATH="llama-2-70b-chat" ;; + *) + echo "Invalid model size: $m" >&2 + echo "Expected: one of $ALL_MODELS" >&2 + continue + ;; + esac - echo "Downloading ${MODEL_PATH}" - mkdir -p ${TARGET_FOLDER}"/${MODEL_PATH}" + echo "Downloading $MODEL_PATH" + mkdir -p "${TARGET_FOLDER}/${MODEL_PATH}" - for s in $(seq -f "0%g" 0 ${SHARD}) - do - wget ${PRESIGNED_URL/'*'/"${MODEL_PATH}/consolidated.${s}.pth"} -O ${TARGET_FOLDER}"/${MODEL_PATH}/consolidated.${s}.pth" + for s in $(seq -f "0%g" 0 ${SHARD}); do + download "${MODEL_PATH}/consolidated.${s}.pth" "$TARGET_FOLDER" done - wget ${PRESIGNED_URL/'*'/"${MODEL_PATH}/params.json"} -O ${TARGET_FOLDER}"/${MODEL_PATH}/params.json" - wget ${PRESIGNED_URL/'*'/"${MODEL_PATH}/checklist.chk"} -O ${TARGET_FOLDER}"/${MODEL_PATH}/checklist.chk" + download "${MODEL_PATH}/params.json" "$TARGET_FOLDER" + download "${MODEL_PATH}/checklist.chk" "$TARGET_FOLDER" echo "Checking checksums" - (cd ${TARGET_FOLDER}"/${MODEL_PATH}" && md5check checklist.chk) + (cd "${TARGET_FOLDER}/${MODEL_PATH}" && md5check checklist.chk) done - From ef497fd35a7dc38e0e179a5391c2ebbe88b7a0f9 Mon Sep 17 00:00:00 2001 From: Macneil Shonle Date: Tue, 8 Aug 2023 16:24:47 -0700 Subject: [PATCH 3/3] Fix chmod x bits The .py script should be executable, while the README should not --- README.md | 0 example_chat_completion.py | 0 2 files changed, 0 insertions(+), 0 deletions(-) mode change 100755 => 100644 README.md mode change 100644 => 100755 example_chat_completion.py diff --git a/README.md b/README.md old mode 100755 new mode 100644 diff --git a/example_chat_completion.py b/example_chat_completion.py old mode 100644 new mode 100755