Merge branch 'master' into patch-1

mlcommons · Oct 29, 2024 · a22e5fa · a22e5fa
2 parents 333c56e + ec8cbe5
commit a22e5fa
Show file tree

Hide file tree

Showing 15 changed files with 117 additions and 70 deletions.
diff --git a/.github/scripts/format-cpp.sh b/.github/scripts/format-cpp.sh
@@ -19,8 +19,8 @@
 
 # Checks all the modified c/c++ files, format them and adds them
 # to the commit.
-for FILE in $(git diff upstream/$1 --name-only | grep -E '.*\.(cc|cpp|h|hpp)$')
+for FILE in $(git diff $1/$2 --name-only | grep -E '.*\.(cc|cpp|h|hpp)$')
 do
     clang-format -i -style=file $FILE
     git add $FILE
-done
+done
diff --git a/.github/scripts/format-py.sh b/.github/scripts/format-py.sh
@@ -19,8 +19,8 @@
 
 # Checks all the modified c/c++ files, format them and adds them
 # to the commit.
-for FILE in $(git diff upstream/$1 --name-only | grep -E '.*\.py$')
+for FILE in $(git diff $1/$2 --name-only | grep -E '.*\.py$')
 do
     autopep8 --in-place -a $FILE
     git add $FILE
-done
+done
diff --git a/.github/workflows/build_wheels.yml b/.github/workflows/build_wheels.yml
@@ -13,6 +13,7 @@ on:
 jobs:
   update_version:
     name: Update version only on ubuntu but used by windows and macos
+    if: github.repository_owner == 'mlcommons'
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v3

diff --git a/.github/workflows/cla.yml b/.github/workflows/cla.yml
@@ -8,6 +8,7 @@ on:
 
 jobs:
   cla-check:
+    if: github.repository_owner == 'mlcommons'
     runs-on: ubuntu-latest
     steps:
       - name: "MLCommons CLA bot check"

diff --git a/.github/workflows/format.yml b/.github/workflows/format.yml
@@ -1,4 +1,4 @@
-
+# Automatic code formatting
 name: "format"
 on:
   pull_request:
@@ -8,16 +8,14 @@ on:
 
 env:
   python_version: "3.9"
-  repo: "mlcommons/inference"
 
 jobs:
   format-code:
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v3
         with:
-          ref: ${{ github.head_ref || github.ref_name }}
-          repository: ${{ github.event.pull_request.head.repo.full_name }}
+          ref: ${{ github.event.pull_request.head.ref }}
       - name: Set up Python ${{ env.python_version }}
         uses: actions/setup-python@v3
         with:
@@ -29,27 +27,26 @@ jobs:
 
       - name: Grant permissions
         run: |
-          chmod 777 "${GITHUB_WORKSPACE}/.github/scripts/format-cpp.sh"
-          chmod 777 "${GITHUB_WORKSPACE}/.github/scripts/format-py.sh"
+          chmod +x "${GITHUB_WORKSPACE}/.github/scripts/format-cpp.sh"
+          chmod +x "${GITHUB_WORKSPACE}/.github/scripts/format-py.sh"
       
       - name: Format Codebase
-        if: ${{ github.event.pull_request.base.repo.full_name == env.repo }}
         run: |
           git remote add upstream ${{ github.event.pull_request.base.repo.clone_url }}
-          git fetch upstream ${{ github.event.pull_request.base.ref }}
-          ".github/scripts/format-cpp.sh" "${{ github.event.pull_request.base.ref }}"
-          ".github/scripts/format-py.sh" "${{ github.event.pull_request.base.ref }}"
+          git fetch upstream
+          ".github/scripts/format-cpp.sh" "upstream" "${{ github.event.pull_request.base.ref }}"
+          ".github/scripts/format-py.sh" "upstream" "${{ github.event.pull_request.base.ref }}"
 
       - name: Commit
         run: |
           cd ${GITHUB_WORKSPACE}
-          HAS_CHANGES=$(git diff --cached --name-only)
+          HAS_CHANGES=$(git diff --staged --name-only)
           if [ ${#HAS_CHANGES} -gt 0 ]; then
             git log
-            git config --global user.email "${GITHUB_ACTOR_ID}+${GITHUB_ACTOR}@users.noreply.github.com"
-            git config --global user.name "${GITHUB_ACTOR}"
+            git config --global user.email "${{ github.actor }}@users.noreply.github.com"
+            git config --global user.name "${{ github.actor }}"
             git commit -m '[Automated Commit] Format Codebase'
-            git push upstream ${{ github.head_ref || github.ref_name }}
+            git push 
           fi
 
       
diff --git a/docs/benchmarks/language/llama2-70b.md b/docs/benchmarks/language/llama2-70b.md
@@ -25,3 +25,10 @@ hide:
 {{ mlperf_inference_implementation_readme (4, "llama2-70b-99", "neuralmagic") }}
 
 {{ mlperf_inference_implementation_readme (4, "llama2-70b-99.9", "neuralmagic") }}
+
+=== "AMD"
+    ## AMD MLPerf Implementation
+
+{{ mlperf_inference_implementation_readme (4, "llama2-70b-99", "amd") }}
+
+{{ mlperf_inference_implementation_readme (4, "llama2-70b-99.9", "amd") }}
diff --git a/docs/benchmarks/language/reproducibility/indyscc24-bert.md b/docs/benchmarks/language/reproducibility/indyscc24-bert.md
@@ -9,40 +9,62 @@ hide:
 
 This guide is designed for the [IndySCC 2024](https://sc24.supercomputing.org/students/indyscc/) to walk participants through running and optimizing the [MLPerf Inference Benchmark](https://arxiv.org/abs/1911.02549) using [Bert Large](https://github.com/mlcommons/inference/tree/master/language/bert#supported-models) across various software and hardware configurations. The goal is to maximize system throughput (measured in samples per second) without compromising accuracy.
 
-For a valid MLPerf inference submission, two types of runs are required: a performance run and an accuracy run. In this competition, we focus on the `Offline` scenario, where throughput is the key metric—higher values are better. The official MLPerf inference benchmark for Bert Large requires processing a minimum of 10833 samples in both performance and accuracy modes using the Squad v1.1 dataset. Setting up for Nvidia GPUs may take 2-3 hours but can be done offline. Your final output will be a tarball (`mlperf_submission.tar.gz`) containing MLPerf-compatible results, which you will submit to the SCC organizers for scoring.
+For a valid MLPerf inference submission, two types of runs are required: a performance run and an accuracy run. In this competition, we focus on the `Offline` scenario, where throughput is the key metric—higher values are better. The official MLPerf inference benchmark for Bert Large requires processing a minimum of 10833 samples in both performance and accuracy modes using the Squad v1.1 dataset.
 
 ## Scoring
 
-In the SCC, your first objective will be to run a reference (unoptimized) Python implementation or a vendor-provided version (such as Nvidia's) of the MLPerf inference benchmark to secure a baseline score.
-
-Once the initial run is successful, you'll have the opportunity to optimize the benchmark further by maximizing system utilization, applying quantization techniques, adjusting ML frameworks, experimenting with batch sizes, and more, all of which can earn you additional points.
-
-Since vendor implementations of the MLPerf inference benchmark vary and are often limited to single-node benchmarking, teams will compete within their respective hardware categories (e.g., Nvidia GPUs, AMD GPUs). Points will be awarded based on the throughput achieved on your system.
+In the IndySCC 2024, your objective will be to run a reference (unoptimized) Python implementation of the MLPerf inference benchmark to complete a successful submission passing the submission checker. Only one of the available framework needs to be submitted.
 
 
 !!! info
     Both MLPerf and CM automation are evolving projects.
     If you encounter issues or have questions, please submit them [here](https://github.com/mlcommons/cm4mlops/issues)
 
 ## Artifacts to submit to the SCC committee
-
-You will need to submit the following files:
-
-* `mlperf_submission_short.tar.gz` - automatically generated file with validated MLPerf results.
-* `mlperf_submission_short_summary.json` - automatically generated summary of MLPerf results.
-* `mlperf_submission_short.run` - CM commands to run MLPerf BERT inference benchmark saved to this file.
-* `mlperf_submission_short.tstamps` - execution timestamps before and after CM command saved to this file.
-* `mlperf_submission_short.md` - description of your platform and some highlights of the MLPerf benchmark execution.
-
+All the needed files are automatically pushed to the GitHub repository if you manage to complete the given commands. No additional files need to be submitted.
 
 
 === "MLCommons-Python"
     ## MLPerf Reference Implementation in Python
 
-{{ mlperf_inference_implementation_readme (4, "bert-99", "reference", extra_variation_tags=",_short", scenarios=["Offline"],categories=["Edge"], setup_tips=False) }}
+{{ mlperf_inference_implementation_readme (4, "bert-99", "reference", extra_variation_tags="", fixed_scenarios=["Offline"],categories=["Edge"], setup_tips=False) }}
 
-=== "Nvidia"
-    ## Nvidia MLPerf Implementation
-{{ mlperf_inference_implementation_readme (4, "bert-99", "nvidia", extra_variation_tags=",_short", scenarios=["Offline"],categories=["Edge"], setup_tips=False, implementation_tips=False) }}
-
 
+## Submission Commands
+
+### Generate actual submission tree
+
+```bash
+cm run script --tags=generate,inference,submission \
+   --clean \
+   --preprocess_submission=yes \
+   --run-checker \
+   --tar=yes \
+   --env.CM_TAR_OUTFILE=submission.tar.gz \
+   --division=open \
+   --category=edge \
+   --env.CM_DETERMINE_MEMORY_CONFIGURATION=yes \
+   --run_style=test \
+   --quiet \
+   --submitter=<Team Name>
+```
+
+* Use `--hw_name="My system name"` to give a meaningful system name.
+
+
+### Push Results to GitHub
+
+Fork the `mlperf-inference-results-scc24` branch of the repository URL at [https://github.com/mlcommons/cm4mlperf-inference](https://github.com/mlcommons/cm4mlperf-inference).
+
+Run the following command after **replacing `--repo_url` with your GitHub fork URL**.
+
+```bash
+cm run script --tags=push,github,mlperf,inference,submission \
+   --repo_url=https://github.com/<myfork>/cm4mlperf-inference \
+   --repo_branch=mlperf-inference-results-scc24 \
+   --commit_message="Results on system <HW Name>" \
+   --quiet
+```
+
+Once uploaded give a Pull Request to the origin repository. Github action will be running there and once
+finished you can see your submitted results at [https://docs.mlcommons.org/cm4mlperf-inference](https://docs.mlcommons.org/cm4mlperf-inference).
diff --git a/docs/benchmarks/text_to_image/reproducibility/scc24.md b/docs/benchmarks/text_to_image/reproducibility/scc24.md
@@ -46,11 +46,11 @@ or supporting multi-node execution) useful for the community and [MLCommons](htt
 === "MLCommons-Python"
     ## MLPerf Reference Implementation in Python
 
-{{ mlperf_inference_implementation_readme (4, "sdxl", "reference", extra_variation_tags=",_short,_scc24-base", devices=["ROCm", "CUDA"],scenarios=["Offline"],categories=["Datacenter"], setup_tips=False, skip_test_query_count=True, extra_input_string="--precision=float16") }}
+{{ mlperf_inference_implementation_readme (4, "sdxl", "reference", extra_variation_tags=",_short,_scc24-base", devices=["ROCm", "CUDA"],fixed_scenarios=["Offline"],categories=["Datacenter"], setup_tips=False, skip_test_query_count=True, extra_input_string="--precision=float16") }}
 
 === "Nvidia"
     ## Nvidia MLPerf Implementation
-{{ mlperf_inference_implementation_readme (4, "sdxl", "nvidia", extra_variation_tags=",_short,_scc24-base", scenarios=["Offline"],categories=["Datacenter"], setup_tips=False, implementation_tips=False, skip_test_query_count=True) }}
+{{ mlperf_inference_implementation_readme (4, "sdxl", "nvidia", extra_variation_tags=",_short,_scc24-base", fixed_scenarios=["Offline"],categories=["Datacenter"], setup_tips=False, implementation_tips=False, skip_test_query_count=True) }}
 
 !!! info
     Once the above run is successful, you can change `_scc24-base` to `_scc24-main` to run the main variant.
@@ -80,17 +80,17 @@ cm run script --tags=generate,inference,submission \
 
 ### Push Results to GitHub
 
-Fork the repository URL at [https://github.com/gateoverflow/cm4mlperf-inference](https://github.com/gateoverflow/cm4mlperf-inference). 
+Fork the `mlperf-inference-results-scc24` branch of the repository URL at [https://github.com/mlcommons/cm4mlperf-inference](https://github.com/mlcommons/cm4mlperf-inference). 
 
 Run the following command after **replacing `--repo_url` with your GitHub fork URL**.
 
 ```bash
 cm run script --tags=push,github,mlperf,inference,submission \
-   --repo_url=https://github.com/gateoverflow/cm4mlperf-inference \
+   --repo_url=https://github.com/<myfork>/cm4mlperf-inference \
    --repo_branch=mlperf-inference-results-scc24 \
    --commit_message="Results on system <HW Name>" \
    --quiet
 ```
 
 Once uploaded give a Pull Request to the origin repository. Github action will be running there and once 
-finished you can see your submitted results at [https://gateoverflow.github.io/cm4mlperf-inference](https://gateoverflow.github.io/cm4mlperf-inference).
+finished you can see your submitted results at [https://docs.mlcommons.org/cm4mlperf-inference](https://docs.mlcommons.org/cm4mlperf-inference).
diff --git a/docs/submission/index.md b/docs/submission/index.md
@@ -2,6 +2,7 @@
 hide:
   - toc
 ---
+[![Streamline your MLPerf results using CM Framework](https://img.youtube.com/vi/eI1Hoecc3ho/0.jpg)](https://youtu.be/eI1Hoecc3ho)
 
 === "CM based benchmark"
     If you have followed the `cm run` commands under the individual model pages in the [benchmarks](../index.md) directory, all the valid results will get aggregated to the `cm cache` folder. The following command could be used to browse the structure of inference results folder generated by CM.
@@ -125,6 +126,8 @@ Once all the results across all the models are ready you can use the following c
 
 * Use `--hw_notes_extra` option to add additional notes like `--hw_notes_extra="Result taken by NAME" `
 
+* Use `--results_dir` option to specify the results folder for Non CM based benchmarks
+
 The above command should generate "submission.tar.gz" if there are no submission checker issues and you can upload it to the [MLCommons Submission UI](https://submissions-ui.mlcommons.org/submission).
 
 ## Aggregate Results in GitHub

diff --git a/language/bert/accuracy-squad.py b/language/bert/accuracy-squad.py
@@ -82,8 +82,7 @@
     "int64": np.int64,
     "float16": np.float16,
     "float32": np.float32,
-    "float64": np.float64,
-}
+    "float64": np.float64}
 
 
 def get_final_text(pred_text, orig_text, do_lower_case):

diff --git a/language/gpt-j/evaluation.py b/language/gpt-j/evaluation.py
@@ -108,7 +108,7 @@ def main():
     result = metric.compute(
         predictions=preds, references=targets, use_stemmer=True, use_aggregator=False
     )
-    result = {k: round(np.mean(v) * 100, 4) for k, v in result.items()}
+    result = {k: f"{round(np.mean(v) * 100, 4)}" for k, v in result.items()}
     prediction_lens = [len(pred) for pred in preds]
     result["gen_len"] = np.sum(prediction_lens)
     result["gen_num"] = len(preds)

diff --git a/language/mixtral-8x7b/README.md b/language/mixtral-8x7b/README.md
@@ -104,16 +104,15 @@ sudo -v ; curl https://rclone.org/install.sh | sudo bash
 ```
 Once Rclone is installed, cd into the folder where you want to place the dataset and run:
 ```bash
-rclone copyurl https://inference.mlcommons-storage.org/mixtral_8x7b%2F2024.06.06_mixtral_15k_v4.pkl ./ -a -P
+rclone copyurl https://inference.mlcommons-storage.org/mixtral_8x7b/09292024_mixtral_15k_mintoken2_v1.pkl ./ -a -P
 ```
 #### Using wget
 
 Alternatively, you can simply cd into the folder where you want to place the dataset and run
 
-TBD: The dataset is being replaced in v5.0 due to https://github.com/mlcommons/inference/issues/1777
 
 ```bash
-wget https://inference.mlcommons-storage.org/mixtral_8x7b%2F2024.06.06_mixtral_15k_v4.pkl
+wget https://inference.mlcommons-storage.org/mixtral_8x7b/09292024_mixtral_15k_mintoken2_v1.pkl
 ```
 
 ### Calibration dataset

diff --git a/main.py b/main.py
@@ -10,7 +10,7 @@ def mlperf_inference_implementation_readme(
         setup_tips=True,
         run_tips=True,
         skip_test_query_count=False,
-        scenarios=[],
+        fixed_scenarios=[],
         devices=[],
         frameworks=[],
         categories=[],
@@ -59,6 +59,11 @@ def mlperf_inference_implementation_readme(
             devices = ["CUDA"]
             frameworks = ["TensorRT"]
 
+        elif implementation == "amd":
+            devices = ["cuda"]
+            frameworks = ["pytorch"]
+            execution_envs.remove("Docker")
+
         elif implementation == "neuralmagic":
             devices = ["CUDA"]
             frameworks = ["pytorch"]
@@ -107,7 +112,7 @@ def mlperf_inference_implementation_readme(
             frameworks = ["Onnxruntime"]
 
         elif implementation == "ctuning-cpp":
-            scenarios = ["SingleStream"]
+            fixed_scenarios = ["SingleStream"]
             devices = ["CPU"]
             if model.lower() == "resnet50":
                 frameworks = ["TFLite"]
@@ -132,16 +137,16 @@ def mlperf_inference_implementation_readme(
         final_run_mode = "valid" if "short" not in extra_variation_tags else "test"
 
         for category in categories:
-            if not scenarios:
-                if category == "Edge" and not scenarios:
-                    scenarios = ["Offline", "SingleStream"]
-                    if (
-                        model.lower() in ["resnet50", "retinanet"]
-                        and not "MultiStream" in scenarios
-                    ):  # MultiStream was duplicating
-                        scenarios.append("MultiStream")
-                elif category == "Datacenter":
-                    scenarios = ["Offline", "Server"]
+            if category == "Edge":
+                scenarios = ["Offline", "SingleStream"]
+                if model.lower() in [
+                        "resnet50", "retinanet"] and not "MultiStream" in scenarios:  # MultiStream was duplicating
+                    scenarios.append("MultiStream")
+            elif category == "Datacenter":
+                scenarios = ["Offline", "Server"]
+            if fixed_scenarios:
+                scenarios = [
+                    scenario for scenario in scenarios if scenario in fixed_scenarios]
 
             content += f'{pre_space}=== "{category.lower()}"\n\n'
 
@@ -163,7 +168,7 @@ def mlperf_inference_implementation_readme(
                     cur_space3 = cur_space2 + "    "
                     cur_space4 = cur_space3 + "    "
 
-                    content += f'{cur_space1}=== "{device}"\n'
+                    content += f"{cur_space1}=== \"{device}\"\n"
                     content += f"{cur_space2}##### {device} device\n\n"
 
                     # minimum system requirements
@@ -185,6 +190,7 @@ def mlperf_inference_implementation_readme(
                         # ref to cm installation
                         content += f"{cur_space3}Please refer to the [installation page](site:inference/install/) to install CM for running the automated benchmark commands.\n\n"
                         test_query_count = get_test_query_count(
+
                             model, implementation, device.lower()
                         )
                         if (
@@ -262,6 +268,7 @@ def mlperf_inference_implementation_readme(
                                 content += f"{cur_space3}<details>\n"
                                 content += f"{cur_space3}<summary> Please click here to see more options for the docker launch </summary>\n\n"
                                 content += f"{cur_space3}* `--docker_cm_repo=<Custom CM GitHub repo URL in username@repo format>`: to use a custom fork of cm4mlops repository inside the docker image\n\n"
+                                content += f"{cur_space3}* `--docker_cm_repo_branch=<Custom CM GitHub repo Branch>`: to checkout a custom branch of the cloned cm4mlops repository inside the docker image\n\n"
                                 content += f"{cur_space3}* `--docker_cache=no`: to not use docker cache during the image build\n"
 
                                 if implementation.lower() == "nvidia":
@@ -325,7 +332,7 @@ def mlperf_inference_implementation_readme(
                             run_suffix += f"{cur_space3}</details>\n"
 
                         for scenario in scenarios:
-                            content += f'{cur_space3}=== "{scenario}"\n{cur_space4}###### {scenario}\n\n'
+                            content += f"{cur_space3}=== \"{scenario}\"\n{cur_space4}###### {scenario}\n\n"
                             run_cmd = mlperf_inference_run_command(
                                 spaces + 21,
                                 model,
@@ -347,7 +354,7 @@ def mlperf_inference_implementation_readme(
                             # content += run_suffix
 
                         if len(scenarios) > 1:
-                            content += f'{cur_space3}=== "All Scenarios"\n{cur_space4}###### All Scenarios\n\n'
+                            content += f"{cur_space3}=== \"All Scenarios\"\n{cur_space4}###### All Scenarios\n\n"
                             run_cmd = mlperf_inference_run_command(
                                 spaces + 21,
                                 model,