From 82101bbf15b83a1b6e6d369245dd719b9256effe Mon Sep 17 00:00:00 2001
From: Pablo Gonzalez <pablo.gonzalez@factored.ai>
Date: Tue, 29 Oct 2024 11:43:00 -0500
Subject: [PATCH 1/5] Update Moe dataset download instructions (#1888)

---
 language/mixtral-8x7b/README.md | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/language/mixtral-8x7b/README.md b/language/mixtral-8x7b/README.md
index 7b20b6ba0..853bf1e75 100644
--- a/language/mixtral-8x7b/README.md
+++ b/language/mixtral-8x7b/README.md
@@ -104,16 +104,15 @@ sudo -v ; curl https://rclone.org/install.sh | sudo bash
 ```
 Once Rclone is installed, cd into the folder where you want to place the dataset and run:
 ```bash
-rclone copyurl https://inference.mlcommons-storage.org/mixtral_8x7b%2F2024.06.06_mixtral_15k_v4.pkl ./ -a -P
+rclone copyurl https://inference.mlcommons-storage.org/mixtral_8x7b/09292024_mixtral_15k_mintoken2_v1.pkl ./ -a -P
 ```
 #### Using wget
 
 Alternatively, you can simply cd into the folder where you want to place the dataset and run
 
-TBD: The dataset is being replaced in v5.0 due to https://github.com/mlcommons/inference/issues/1777
 
 ```bash
-wget https://inference.mlcommons-storage.org/mixtral_8x7b%2F2024.06.06_mixtral_15k_v4.pkl
+wget https://inference.mlcommons-storage.org/mixtral_8x7b/09292024_mixtral_15k_mintoken2_v1.pkl
 ```
 
 ### Calibration dataset

From c72a59f37815429f47d831d1dba90f969771c37a Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjunsuresh1987@gmail.com>
Date: Tue, 29 Oct 2024 16:45:34 +0000
Subject: [PATCH 2/5] Update evaluation.py | Output GPTJ accuracy metric as
 string (#1886)

* Update evaluation.py | Output GPTJ accuracy metric as string

Fixes https://github.com/mlcommons/inference/issues/1885

* [Automated Commit] Format Codebase
---
 language/gpt-j/evaluation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/language/gpt-j/evaluation.py b/language/gpt-j/evaluation.py
index 3203359d0..a95c8cd5c 100644
--- a/language/gpt-j/evaluation.py
+++ b/language/gpt-j/evaluation.py
@@ -108,7 +108,7 @@ def main():
     result = metric.compute(
         predictions=preds, references=targets, use_stemmer=True, use_aggregator=False
     )
-    result = {k: round(np.mean(v) * 100, 4) for k, v in result.items()}
+    result = {k: f"{round(np.mean(v) * 100, 4)}" for k, v in result.items()}
     prediction_lens = [len(pred) for pred in preds]
     result["gen_len"] = np.sum(prediction_lens)
     result["gen_num"] = len(preds)

From de6c550acc783bbead4ce255ebc5c9b3ab283d29 Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjunsuresh1987@gmail.com>
Date: Tue, 29 Oct 2024 16:47:27 +0000
Subject: [PATCH 3/5] Update format.yml | Improve the github action (#1875)

* Update format.yml | Improve the github action

* Update format-py.sh

* Update format-cpp.sh

* Update format.yml

* Update accuracy-squad.py | testing

* Update format.yml

* [Automated Commit] Format Codebase

---------

Co-authored-by: arjunsuresh <arjunsuresh@users.noreply.github.com>
---
 .github/scripts/format-cpp.sh   |  4 ++--
 .github/scripts/format-py.sh    |  4 ++--
 .github/workflows/format.yml    | 25 +++++++++++--------------
 language/bert/accuracy-squad.py |  3 +--
 4 files changed, 16 insertions(+), 20 deletions(-)

diff --git a/.github/scripts/format-cpp.sh b/.github/scripts/format-cpp.sh
index aa36c7564..c0a237489 100755
--- a/.github/scripts/format-cpp.sh
+++ b/.github/scripts/format-cpp.sh
@@ -19,8 +19,8 @@
 
 # Checks all the modified c/c++ files, format them and adds them
 # to the commit.
-for FILE in $(git diff upstream/$1 --name-only | grep -E '.*\.(cc|cpp|h|hpp)$')
+for FILE in $(git diff $1/$2 --name-only | grep -E '.*\.(cc|cpp|h|hpp)$')
 do
     clang-format -i -style=file $FILE
     git add $FILE
-done
\ No newline at end of file
+done
diff --git a/.github/scripts/format-py.sh b/.github/scripts/format-py.sh
index 0a85078b7..ca3ca5671 100755
--- a/.github/scripts/format-py.sh
+++ b/.github/scripts/format-py.sh
@@ -19,8 +19,8 @@
 
 # Checks all the modified c/c++ files, format them and adds them
 # to the commit.
-for FILE in $(git diff upstream/$1 --name-only | grep -E '.*\.py$')
+for FILE in $(git diff $1/$2 --name-only | grep -E '.*\.py$')
 do
     autopep8 --in-place -a $FILE
     git add $FILE
-done
\ No newline at end of file
+done
diff --git a/.github/workflows/format.yml b/.github/workflows/format.yml
index 9e4675d2c..b21674c4c 100644
--- a/.github/workflows/format.yml
+++ b/.github/workflows/format.yml
@@ -1,4 +1,4 @@
-
+# Automatic code formatting
 name: "format"
 on:
   pull_request:
@@ -8,7 +8,6 @@ on:
 
 env:
   python_version: "3.9"
-  repo: "mlcommons/inference"
 
 jobs:
   format-code:
@@ -16,8 +15,7 @@ jobs:
     steps:
       - uses: actions/checkout@v3
         with:
-          ref: ${{ github.head_ref || github.ref_name }}
-          repository: ${{ github.event.pull_request.head.repo.full_name }}
+          ref: ${{ github.event.pull_request.head.ref }}
       - name: Set up Python ${{ env.python_version }}
         uses: actions/setup-python@v3
         with:
@@ -29,27 +27,26 @@ jobs:
 
       - name: Grant permissions
         run: |
-          chmod 777 "${GITHUB_WORKSPACE}/.github/scripts/format-cpp.sh"
-          chmod 777 "${GITHUB_WORKSPACE}/.github/scripts/format-py.sh"
+          chmod +x "${GITHUB_WORKSPACE}/.github/scripts/format-cpp.sh"
+          chmod +x "${GITHUB_WORKSPACE}/.github/scripts/format-py.sh"
       
       - name: Format Codebase
-        if: ${{ github.event.pull_request.base.repo.full_name == env.repo }}
         run: |
           git remote add upstream ${{ github.event.pull_request.base.repo.clone_url }}
-          git fetch upstream ${{ github.event.pull_request.base.ref }}
-          ".github/scripts/format-cpp.sh" "${{ github.event.pull_request.base.ref }}"
-          ".github/scripts/format-py.sh" "${{ github.event.pull_request.base.ref }}"
+          git fetch upstream
+          ".github/scripts/format-cpp.sh" "upstream" "${{ github.event.pull_request.base.ref }}"
+          ".github/scripts/format-py.sh" "upstream" "${{ github.event.pull_request.base.ref }}"
 
       - name: Commit
         run: |
           cd ${GITHUB_WORKSPACE}
-          HAS_CHANGES=$(git diff --cached --name-only)
+          HAS_CHANGES=$(git diff --staged --name-only)
           if [ ${#HAS_CHANGES} -gt 0 ]; then
             git log
-            git config --global user.email "${GITHUB_ACTOR_ID}+${GITHUB_ACTOR}@users.noreply.github.com"
-            git config --global user.name "${GITHUB_ACTOR}"
+            git config --global user.email "${{ github.actor }}@users.noreply.github.com"
+            git config --global user.name "${{ github.actor }}"
             git commit -m '[Automated Commit] Format Codebase'
-            git push upstream ${{ github.head_ref || github.ref_name }}
+            git push 
           fi
 
       
diff --git a/language/bert/accuracy-squad.py b/language/bert/accuracy-squad.py
index 557f360fc..514258de3 100644
--- a/language/bert/accuracy-squad.py
+++ b/language/bert/accuracy-squad.py
@@ -82,8 +82,7 @@
     "int64": np.int64,
     "float16": np.float16,
     "float32": np.float32,
-    "float64": np.float64,
-}
+    "float64": np.float64}
 
 
 def get_final_text(pred_text, orig_text, do_lower_case):

From 064c7fd0fb864a56014a41d1ff007acff92843c0 Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjunsuresh1987@gmail.com>
Date: Tue, 29 Oct 2024 16:49:24 +0000
Subject: [PATCH 4/5] =?UTF-8?q?Update=20submission=5Fchecker.py=20|=20mixt?=
 =?UTF-8?q?ral=20is=20not=20having=20singlestream=20sce=E2=80=A6=20(#1881)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Update submission_checker.py | mixtral is not having singlestream scenario

* [Automated Commit] Format Codebase
---
 tools/submission/submission_checker.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tools/submission/submission_checker.py b/tools/submission/submission_checker.py
index e6218aa2e..bd06eeec8 100755
--- a/tools/submission/submission_checker.py
+++ b/tools/submission/submission_checker.py
@@ -298,7 +298,7 @@
             "llama2-70b-99": ["Server", "Offline"],
             "llama2-70b-99.9": ["Server", "Offline"],
             "stable-diffusion-xl": ["SingleStream", "Offline", "Server"],
-            "mixtral-8x7b": ["SingleStream", "Server", "Offline"],
+            "mixtral-8x7b": ["Server", "Offline"]
         },
         "optional-scenarios-datacenter-edge": {},
         "accuracy-target": {
@@ -2743,6 +2743,7 @@ def check_measurement_dir(
                         val.replace(" ", "")
                         conf_model, conf_scenario, conf_key = key.split(".")
                         if (
+
                             (conf_key == "sample_concatenate_permutation")
                             and ((conf_model == conf_ref_model) or conf_model == "*")
                             and ((conf_scenario == scenario) or conf_scenario == "*")

From ec8cbe54ac2fd1a8b3585ca6b9b7493a99cd023f Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjunsuresh1987@gmail.com>
Date: Tue, 29 Oct 2024 16:51:29 +0000
Subject: [PATCH 5/5] Docs update for IndySCC24, Improve the final report
 generation, Make some GH actions run only on MLCommons (#1876)

* Support batch-size in llama2 run

* Add Rclone-Cloudflare download instructions to README.md

* Add Rclone-Cloudflare download instructiosn to README.md

* Minor wording edit to README.md

* Add Rclone-Cloudflare download instructions to README.md

* Add Rclone-GDrive download instructions to README.md

* Add new and old instructions to README.md

* Tweak language in README.md

* Language tweak in README.md

* Minor language tweak in README.md

* Fix typo in README.md

* Count error when logging errors: submission_checker.py

* Fixes #1648, restrict loadgen uncommitted error message to within the loadgen directory

* Update test-rnnt.yml (#1688)

Stopping the github action for rnnt

* Added docs init

Added github action for website publish

Update benchmark documentation

Update publish.yaml

Update publish.yaml

Update benchmark documentation

Improved the submission documentation

Fix taskname

Removed unused images

* Fix benchmark URLs

* Fix links

* Add _full variation to run commands

* Added script flow diagram

* Added docker setup command for CM, extra run options

* Added support for docker options in the docs

* Added --quiet to the CM run_cmds in docs

* Fix the test query count for cm commands

* Support ctuning-cpp implementation

* Added commands for mobilenet models

* Docs cleanup

* Docs cleanup

* Added separate files for dataset and models in the docs

* Remove redundant tab in the docs

* Fixes some WIP models in the docs

* Use the official docs page for CM installation

* Fix the deadlink in docs

* Fix indendation issue in docs

* Added dockerinfo for nvidia implementation

* Added run options for gptj

* Added execution environment tabs

* Cleanup of the docs

* Cleanup of the docs

* Reordered the sections of the docs page

* Removed an unnecessary heading in the docs

* Fixes the commands for datacenter

* Fix the build --sdist for loadgen

* Fixes #1761, llama2 and mixtral runtime error on CPU systems

* Added mixtral to the benchmark list, improved benchmark docs

* Update docs for MLPerf inference v4.1

* Update docs for MLPerf inference v4.1

* Fix typo

* Gave direct link to implementation readmes

* Added tables detailing implementations

* Update vision README.md, split the frameworks into separate rows

* Update README.md

* pointed links to specific frameworks

* pointed links to specific frameworks

* Update Submission_Guidelines.md

* Update Submission_Guidelines.md

* Update Submission_Guidelines.md

* api support llama2

* Added request module and reduced max token len

* Fix for llama2 api server

* Update SUT_API offline to work for OpenAI

* Update SUT_API.py

* Minor fixes

* Fix json import in SUT_API.py

* Fix llama2 token length

* Added model name verification with server

* clean temp files

* support num_workers in LLAMA2 SUTs

* Remove batching from Offline SUT_API.py

* Update SUT_API.py

* Minor fixes for llama2 API

* Fix for llama2 API

* removed table of contents

* enabled llama2-nvidia + vllm-NM : WIP

* enabled dlrm for intel

* lower cased implementation

* added raw data input

* corrected data download commands

* renamed filename

* changes for bert and vllm

* documentation to work on custom repo and branch

* benchmark index page update

* enabled sdxl for nvidia and intel

* updated vllm server run cmd

* benchmark page information addition

* fix indendation issue

* Added submission categories

* update submission page - generate submission with or w/o using CM for benchmarking

* Updated kits dataset documentation

* Updated model parameters

* updation of information

* updated non cm based benchmark

* added info about hf password

* added links to model and access tokens

* Updated reference results structuree tree

* submission docs cleanup

* Some cleanups for benchmark info

* Some cleanups for benchmark info

* Some cleanups for benchmark info

* added generic stubs deepsparse

* Some cleanups for benchmark info

* Some cleanups for benchmark info

* Some cleanups for benchmark info

* Some cleanups for benchmark info (FID and CLIP data added)

* typo fix for bert deepsparse framework

* added min system requirements for models

* fixed code version

* changes for displaying reference and intel implementation tip

* added reference to installation page

* updated neural magic documentation

* Added links to the install page, redirect benchmarks page

* added tips about batch size and dataset for nvidia llama2

* fix conditions logic

* modified tips and additional run cmds

* sentence corrections

* Minor fix for the documentation

* fixed bug in deepsparse generic model stubs + styling

* added more information to stubs

* Added SCC24 readme, support reproducibility in the docs

* Made clear the custom CM repo URL format

* Support conditional implementation, setup and run tips

* Support rocm for sdxl

* Fix _short tag support

* Fix install URL

* Expose bfloat16 and float16 options for sdxl

* Expose download model to host option for sdxl

* IndySCC24 documentation added

* Improve the SCC24 docs

* Improve the support of short variation

* Improved the indyscc24 documentation

* Updated scc run commands

* removed test_query_count option for scc

* Remove scc24 in the main docs

* Remove scc24 in the main docs

* Fix docs: indendation issue on the submission page

* generalised code for skipping test query count

* Fixes for SCC24 docs

* Fix scenario text in main.py

* Fix links for scc24

* Fix links for scc24

* Improve the general docs

* Fix links for scc24

* Use float16 in scc24 doc

* Improve scc24 docs

* Improve scc24 docs

* Use float16 in scc24 doc

* fixed command bug

* Fix typo in docs

* Fix typo in docs

* Remove unnecessary indendation in docs

* initial commit for tip - native run CUDA

* Updated tip

* added docker_cm_repo_branch to more run option - docker

* Update docs for IndySCC24

* Support custom repo branch and owner for final report generation

* enabled amd implementation for llama2

* updations for amd - docs

* Fix scenarios in docs page

* formatted the files to pass the gh action

* scenarios -> fixed_scenarios in docs

* Update docs for specifying custom results dir

* Increment version to 4.1.8

* Increment version to 4.1.9

* Merge

* Increment version to 4.1.8

* Let the CLA and build wheels test run only on the mlcommons repo

* Update index.md

---------

Co-authored-by: Nathan Wasson <nathanw@mlcommons.org>
Co-authored-by: anandhu-eng <anandhukicks@gmail.com>
Co-authored-by: ANANDHU S <71482562+anandhu-eng@users.noreply.github.com>
Co-authored-by: Michael Goin <michael@neuralmagic.com>
Co-authored-by: arjunsuresh <arjunsuresh@users.noreply.github.com>
---
 .github/workflows/build_wheels.yml            |  1 +
 .github/workflows/cla.yml                     |  1 +
 docs/benchmarks/language/llama2-70b.md        |  7 +++
 .../reproducibility/indyscc24-bert.md         | 62 +++++++++++++------
 .../text_to_image/reproducibility/scc24.md    | 10 +--
 docs/submission/index.md                      |  3 +
 main.py                                       | 37 ++++++-----
 tools/submission/generate_final_report.py     | 20 ++++--
 8 files changed, 96 insertions(+), 45 deletions(-)

diff --git a/.github/workflows/build_wheels.yml b/.github/workflows/build_wheels.yml
index d7f98764e..c35870b42 100644
--- a/.github/workflows/build_wheels.yml
+++ b/.github/workflows/build_wheels.yml
@@ -13,6 +13,7 @@ on:
 jobs:
   update_version:
     name: Update version only on ubuntu but used by windows and macos
+    if: github.repository_owner == 'mlcommons'
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v3
diff --git a/.github/workflows/cla.yml b/.github/workflows/cla.yml
index c0e1544d4..5793f13c2 100644
--- a/.github/workflows/cla.yml
+++ b/.github/workflows/cla.yml
@@ -8,6 +8,7 @@ on:
 
 jobs:
   cla-check:
+    if: github.repository_owner == 'mlcommons'
     runs-on: ubuntu-latest
     steps:
       - name: "MLCommons CLA bot check"
diff --git a/docs/benchmarks/language/llama2-70b.md b/docs/benchmarks/language/llama2-70b.md
index e68693716..40c62cf71 100644
--- a/docs/benchmarks/language/llama2-70b.md
+++ b/docs/benchmarks/language/llama2-70b.md
@@ -25,3 +25,10 @@ hide:
 {{ mlperf_inference_implementation_readme (4, "llama2-70b-99", "neuralmagic") }}
 
 {{ mlperf_inference_implementation_readme (4, "llama2-70b-99.9", "neuralmagic") }}
+
+=== "AMD"
+    ## AMD MLPerf Implementation
+    
+{{ mlperf_inference_implementation_readme (4, "llama2-70b-99", "amd") }}
+
+{{ mlperf_inference_implementation_readme (4, "llama2-70b-99.9", "amd") }}
diff --git a/docs/benchmarks/language/reproducibility/indyscc24-bert.md b/docs/benchmarks/language/reproducibility/indyscc24-bert.md
index 68215c5e1..86f4f285d 100644
--- a/docs/benchmarks/language/reproducibility/indyscc24-bert.md
+++ b/docs/benchmarks/language/reproducibility/indyscc24-bert.md
@@ -9,15 +9,11 @@ hide:
 
 This guide is designed for the [IndySCC 2024](https://sc24.supercomputing.org/students/indyscc/) to walk participants through running and optimizing the [MLPerf Inference Benchmark](https://arxiv.org/abs/1911.02549) using [Bert Large](https://github.com/mlcommons/inference/tree/master/language/bert#supported-models) across various software and hardware configurations. The goal is to maximize system throughput (measured in samples per second) without compromising accuracy.
 
-For a valid MLPerf inference submission, two types of runs are required: a performance run and an accuracy run. In this competition, we focus on the `Offline` scenario, where throughput is the key metric—higher values are better. The official MLPerf inference benchmark for Bert Large requires processing a minimum of 10833 samples in both performance and accuracy modes using the Squad v1.1 dataset. Setting up for Nvidia GPUs may take 2-3 hours but can be done offline. Your final output will be a tarball (`mlperf_submission.tar.gz`) containing MLPerf-compatible results, which you will submit to the SCC organizers for scoring.
+For a valid MLPerf inference submission, two types of runs are required: a performance run and an accuracy run. In this competition, we focus on the `Offline` scenario, where throughput is the key metric—higher values are better. The official MLPerf inference benchmark for Bert Large requires processing a minimum of 10833 samples in both performance and accuracy modes using the Squad v1.1 dataset.
 
 ## Scoring
 
-In the SCC, your first objective will be to run a reference (unoptimized) Python implementation or a vendor-provided version (such as Nvidia's) of the MLPerf inference benchmark to secure a baseline score.
-
-Once the initial run is successful, you'll have the opportunity to optimize the benchmark further by maximizing system utilization, applying quantization techniques, adjusting ML frameworks, experimenting with batch sizes, and more, all of which can earn you additional points.
-
-Since vendor implementations of the MLPerf inference benchmark vary and are often limited to single-node benchmarking, teams will compete within their respective hardware categories (e.g., Nvidia GPUs, AMD GPUs). Points will be awarded based on the throughput achieved on your system.
+In the IndySCC 2024, your objective will be to run a reference (unoptimized) Python implementation of the MLPerf inference benchmark to complete a successful submission passing the submission checker. Only one of the available framework needs to be submitted.
 
 
 !!! info
@@ -25,24 +21,50 @@ Since vendor implementations of the MLPerf inference benchmark vary and are ofte
     If you encounter issues or have questions, please submit them [here](https://github.com/mlcommons/cm4mlops/issues)
 
 ## Artifacts to submit to the SCC committee
-
-You will need to submit the following files:
-
-* `mlperf_submission_short.tar.gz` - automatically generated file with validated MLPerf results.
-* `mlperf_submission_short_summary.json` - automatically generated summary of MLPerf results.
-* `mlperf_submission_short.run` - CM commands to run MLPerf BERT inference benchmark saved to this file.
-* `mlperf_submission_short.tstamps` - execution timestamps before and after CM command saved to this file.
-* `mlperf_submission_short.md` - description of your platform and some highlights of the MLPerf benchmark execution.
-
+All the needed files are automatically pushed to the GitHub repository if you manage to complete the given commands. No additional files need to be submitted.
 
 
 === "MLCommons-Python"
     ## MLPerf Reference Implementation in Python
     
-{{ mlperf_inference_implementation_readme (4, "bert-99", "reference", extra_variation_tags=",_short", scenarios=["Offline"],categories=["Edge"], setup_tips=False) }}
+{{ mlperf_inference_implementation_readme (4, "bert-99", "reference", extra_variation_tags="", fixed_scenarios=["Offline"],categories=["Edge"], setup_tips=False) }}
 
-=== "Nvidia"
-    ## Nvidia MLPerf Implementation
-{{ mlperf_inference_implementation_readme (4, "bert-99", "nvidia", extra_variation_tags=",_short", scenarios=["Offline"],categories=["Edge"], setup_tips=False, implementation_tips=False) }}
-    
 
+## Submission Commands
+
+### Generate actual submission tree
+
+```bash
+cm run script --tags=generate,inference,submission \
+   --clean \
+   --preprocess_submission=yes \
+   --run-checker \
+   --tar=yes \
+   --env.CM_TAR_OUTFILE=submission.tar.gz \
+   --division=open \
+   --category=edge \
+   --env.CM_DETERMINE_MEMORY_CONFIGURATION=yes \
+   --run_style=test \
+   --quiet \
+   --submitter=<Team Name>
+```
+
+* Use `--hw_name="My system name"` to give a meaningful system name.
+
+
+### Push Results to GitHub
+
+Fork the `mlperf-inference-results-scc24` branch of the repository URL at [https://github.com/mlcommons/cm4mlperf-inference](https://github.com/mlcommons/cm4mlperf-inference).
+
+Run the following command after **replacing `--repo_url` with your GitHub fork URL**.
+
+```bash
+cm run script --tags=push,github,mlperf,inference,submission \
+   --repo_url=https://github.com/<myfork>/cm4mlperf-inference \
+   --repo_branch=mlperf-inference-results-scc24 \
+   --commit_message="Results on system <HW Name>" \
+   --quiet
+```
+
+Once uploaded give a Pull Request to the origin repository. Github action will be running there and once
+finished you can see your submitted results at [https://docs.mlcommons.org/cm4mlperf-inference](https://docs.mlcommons.org/cm4mlperf-inference).
diff --git a/docs/benchmarks/text_to_image/reproducibility/scc24.md b/docs/benchmarks/text_to_image/reproducibility/scc24.md
index bae4eceb3..6fe119b53 100644
--- a/docs/benchmarks/text_to_image/reproducibility/scc24.md
+++ b/docs/benchmarks/text_to_image/reproducibility/scc24.md
@@ -46,11 +46,11 @@ or supporting multi-node execution) useful for the community and [MLCommons](htt
 === "MLCommons-Python"
     ## MLPerf Reference Implementation in Python
     
-{{ mlperf_inference_implementation_readme (4, "sdxl", "reference", extra_variation_tags=",_short,_scc24-base", devices=["ROCm", "CUDA"],scenarios=["Offline"],categories=["Datacenter"], setup_tips=False, skip_test_query_count=True, extra_input_string="--precision=float16") }}
+{{ mlperf_inference_implementation_readme (4, "sdxl", "reference", extra_variation_tags=",_short,_scc24-base", devices=["ROCm", "CUDA"],fixed_scenarios=["Offline"],categories=["Datacenter"], setup_tips=False, skip_test_query_count=True, extra_input_string="--precision=float16") }}
 
 === "Nvidia"
     ## Nvidia MLPerf Implementation
-{{ mlperf_inference_implementation_readme (4, "sdxl", "nvidia", extra_variation_tags=",_short,_scc24-base", scenarios=["Offline"],categories=["Datacenter"], setup_tips=False, implementation_tips=False, skip_test_query_count=True) }}
+{{ mlperf_inference_implementation_readme (4, "sdxl", "nvidia", extra_variation_tags=",_short,_scc24-base", fixed_scenarios=["Offline"],categories=["Datacenter"], setup_tips=False, implementation_tips=False, skip_test_query_count=True) }}
 
 !!! info
     Once the above run is successful, you can change `_scc24-base` to `_scc24-main` to run the main variant.
@@ -80,17 +80,17 @@ cm run script --tags=generate,inference,submission \
 
 ### Push Results to GitHub
 
-Fork the repository URL at [https://github.com/gateoverflow/cm4mlperf-inference](https://github.com/gateoverflow/cm4mlperf-inference). 
+Fork the `mlperf-inference-results-scc24` branch of the repository URL at [https://github.com/mlcommons/cm4mlperf-inference](https://github.com/mlcommons/cm4mlperf-inference). 
 
 Run the following command after **replacing `--repo_url` with your GitHub fork URL**.
 
 ```bash
 cm run script --tags=push,github,mlperf,inference,submission \
-   --repo_url=https://github.com/gateoverflow/cm4mlperf-inference \
+   --repo_url=https://github.com/<myfork>/cm4mlperf-inference \
    --repo_branch=mlperf-inference-results-scc24 \
    --commit_message="Results on system <HW Name>" \
    --quiet
 ```
 
 Once uploaded give a Pull Request to the origin repository. Github action will be running there and once 
-finished you can see your submitted results at [https://gateoverflow.github.io/cm4mlperf-inference](https://gateoverflow.github.io/cm4mlperf-inference).
+finished you can see your submitted results at [https://docs.mlcommons.org/cm4mlperf-inference](https://docs.mlcommons.org/cm4mlperf-inference).
diff --git a/docs/submission/index.md b/docs/submission/index.md
index 4f6e05c25..adcd3df53 100644
--- a/docs/submission/index.md
+++ b/docs/submission/index.md
@@ -2,6 +2,7 @@
 hide:
   - toc
 ---
+[![Streamline your MLPerf results using CM Framework](https://img.youtube.com/vi/eI1Hoecc3ho/0.jpg)](https://youtu.be/eI1Hoecc3ho)
 
 === "CM based benchmark"
     If you have followed the `cm run` commands under the individual model pages in the [benchmarks](../index.md) directory, all the valid results will get aggregated to the `cm cache` folder. The following command could be used to browse the structure of inference results folder generated by CM.
@@ -125,6 +126,8 @@ Once all the results across all the models are ready you can use the following c
 
 * Use `--hw_notes_extra` option to add additional notes like `--hw_notes_extra="Result taken by NAME" `
 
+* Use `--results_dir` option to specify the results folder for Non CM based benchmarks
+
 The above command should generate "submission.tar.gz" if there are no submission checker issues and you can upload it to the [MLCommons Submission UI](https://submissions-ui.mlcommons.org/submission).
 
 ## Aggregate Results in GitHub
diff --git a/main.py b/main.py
index 8fc8f202b..a0a0ec99e 100755
--- a/main.py
+++ b/main.py
@@ -10,7 +10,7 @@ def mlperf_inference_implementation_readme(
         setup_tips=True,
         run_tips=True,
         skip_test_query_count=False,
-        scenarios=[],
+        fixed_scenarios=[],
         devices=[],
         frameworks=[],
         categories=[],
@@ -59,6 +59,11 @@ def mlperf_inference_implementation_readme(
             devices = ["CUDA"]
             frameworks = ["TensorRT"]
 
+        elif implementation == "amd":
+            devices = ["cuda"]
+            frameworks = ["pytorch"]
+            execution_envs.remove("Docker")
+
         elif implementation == "neuralmagic":
             devices = ["CUDA"]
             frameworks = ["pytorch"]
@@ -107,7 +112,7 @@ def mlperf_inference_implementation_readme(
             frameworks = ["Onnxruntime"]
 
         elif implementation == "ctuning-cpp":
-            scenarios = ["SingleStream"]
+            fixed_scenarios = ["SingleStream"]
             devices = ["CPU"]
             if model.lower() == "resnet50":
                 frameworks = ["TFLite"]
@@ -132,16 +137,16 @@ def mlperf_inference_implementation_readme(
         final_run_mode = "valid" if "short" not in extra_variation_tags else "test"
 
         for category in categories:
-            if not scenarios:
-                if category == "Edge" and not scenarios:
-                    scenarios = ["Offline", "SingleStream"]
-                    if (
-                        model.lower() in ["resnet50", "retinanet"]
-                        and not "MultiStream" in scenarios
-                    ):  # MultiStream was duplicating
-                        scenarios.append("MultiStream")
-                elif category == "Datacenter":
-                    scenarios = ["Offline", "Server"]
+            if category == "Edge":
+                scenarios = ["Offline", "SingleStream"]
+                if model.lower() in [
+                        "resnet50", "retinanet"] and not "MultiStream" in scenarios:  # MultiStream was duplicating
+                    scenarios.append("MultiStream")
+            elif category == "Datacenter":
+                scenarios = ["Offline", "Server"]
+            if fixed_scenarios:
+                scenarios = [
+                    scenario for scenario in scenarios if scenario in fixed_scenarios]
 
             content += f'{pre_space}=== "{category.lower()}"\n\n'
 
@@ -163,7 +168,7 @@ def mlperf_inference_implementation_readme(
                     cur_space3 = cur_space2 + "    "
                     cur_space4 = cur_space3 + "    "
 
-                    content += f'{cur_space1}=== "{device}"\n'
+                    content += f"{cur_space1}=== \"{device}\"\n"
                     content += f"{cur_space2}##### {device} device\n\n"
 
                     # minimum system requirements
@@ -185,6 +190,7 @@ def mlperf_inference_implementation_readme(
                         # ref to cm installation
                         content += f"{cur_space3}Please refer to the [installation page](site:inference/install/) to install CM for running the automated benchmark commands.\n\n"
                         test_query_count = get_test_query_count(
+
                             model, implementation, device.lower()
                         )
                         if (
@@ -262,6 +268,7 @@ def mlperf_inference_implementation_readme(
                                 content += f"{cur_space3}<details>\n"
                                 content += f"{cur_space3}<summary> Please click here to see more options for the docker launch </summary>\n\n"
                                 content += f"{cur_space3}* `--docker_cm_repo=<Custom CM GitHub repo URL in username@repo format>`: to use a custom fork of cm4mlops repository inside the docker image\n\n"
+                                content += f"{cur_space3}* `--docker_cm_repo_branch=<Custom CM GitHub repo Branch>`: to checkout a custom branch of the cloned cm4mlops repository inside the docker image\n\n"
                                 content += f"{cur_space3}* `--docker_cache=no`: to not use docker cache during the image build\n"
 
                                 if implementation.lower() == "nvidia":
@@ -325,7 +332,7 @@ def mlperf_inference_implementation_readme(
                             run_suffix += f"{cur_space3}</details>\n"
 
                         for scenario in scenarios:
-                            content += f'{cur_space3}=== "{scenario}"\n{cur_space4}###### {scenario}\n\n'
+                            content += f"{cur_space3}=== \"{scenario}\"\n{cur_space4}###### {scenario}\n\n"
                             run_cmd = mlperf_inference_run_command(
                                 spaces + 21,
                                 model,
@@ -347,7 +354,7 @@ def mlperf_inference_implementation_readme(
                             # content += run_suffix
 
                         if len(scenarios) > 1:
-                            content += f'{cur_space3}=== "All Scenarios"\n{cur_space4}###### All Scenarios\n\n'
+                            content += f"{cur_space3}=== \"All Scenarios\"\n{cur_space4}###### All Scenarios\n\n"
                             run_cmd = mlperf_inference_run_command(
                                 spaces + 21,
                                 model,
diff --git a/tools/submission/generate_final_report.py b/tools/submission/generate_final_report.py
index a590cacc6..ba2c368cd 100644
--- a/tools/submission/generate_final_report.py
+++ b/tools/submission/generate_final_report.py
@@ -16,13 +16,22 @@ def get_args():
     """Parse commandline."""
     parser = argparse.ArgumentParser()
     parser.add_argument(
-        "--input",
+        '--input',
         required=True,
-        help="results csv from checker")
-    parser.add_argument("--version", default="4.0", help="mlperf version")
+        help='results csv from checker')
+    parser.add_argument('--version', default='4.1', help='mlperf version')
     parser.add_argument(
-        "--repository", default="submissions_inference_4.0", help="mlperf repository"
-    )
+        '--repository',
+        default='submissions_inference_4.1',
+        help='mlperf repository')
+    parser.add_argument(
+        '--repository-owner',
+        default='mlcommons',
+        help='mlperf repository owner in GitHub')
+    parser.add_argument(
+        '--repository-branch',
+        default='main',
+        help='mlperf repository branch')
     args = parser.parse_args()
     return args
 
@@ -343,6 +352,7 @@ def reformatlink(data, key):
     outjsondata = [i for i in outjsondata if i != {}]
     with open(f"{output}_results.json", "w") as f:
         f.write(json.dumps(outjsondata, indent=2))
+
     score_format = writer.book.add_format({"num_format": "#,##0.00"})
     bg_format = writer.book.add_format({"bg_color": "#efefef"})
     for ws in writer.book.worksheets():