Merge from ctuning (#772)

mlcommons · May 26, 2023 · 90376a2 · 90376a2
2 parents 240287e + 903ea3c
commit 90376a2
Show file tree

Hide file tree

Showing 14 changed files with 866 additions and 88 deletions.
diff --git a/cm-mlops/script/download-and-extract/_cm.json b/cm-mlops/script/download-and-extract/_cm.json
@@ -22,7 +22,8 @@
     "file",
     "download-and-extract"
   ],
-  "default_env": {
+  "env": {
+    "CM_DAE_REMOVE_EXTRACTED": "yes"
   },
   "uid": "c67e81a4ce2649f5",
   "variations": {
@@ -52,9 +53,9 @@
         "CM_DAE_EXTRACT_DOWNLOADED": "yes"
       }
     },
-    "remove-extracted": {
+    "no-remove-extracted": {
       "env": {
-        "CM_DAE_REMOVE_EXTRACTED": "yes"
+        "CM_DAE_REMOVE_EXTRACTED": "no"
       }
     }
   },

diff --git a/cm-mlops/script/download-and-extract/customize.py b/cm-mlops/script/download-and-extract/customize.py
@@ -26,13 +26,15 @@ def preprocess(i):
                 env['CM_DAE_FILENAME'] = "index.html"
 
         filename = env['CM_DAE_FILENAME']
+        env['CM_DAE_DOWNLOADED_FILENAME'] = filename
 
         url = env['CM_DAE_URL']
+        extra_download_options = env.get('CM_DAE_EXTRA_DOWNLOAD_OPTIONS', '')
 
         if env['CM_DAE_DOWNLOAD_TOOL'] == "wget":
-            env['CM_DAE_DOWNLOAD_CMD'] = f"wget -nc {url}"
+            env['CM_DAE_DOWNLOAD_CMD'] = f"wget -nc {extra_download_options} {url}"
         if env['CM_DAE_DOWNLOAD_TOOL'] == "curl":
-            env['CM_DAE_DOWNLOAD_CMD'] = f"curl {url}"
+            env['CM_DAE_DOWNLOAD_CMD'] = f"curl {extra_download_options} {url}"
 
     else:
         env['CM_DAE_DOWNLOAD_CMD'] = ""
@@ -58,25 +60,31 @@ def preprocess(i):
         if filename.endswith(".zip"):
             env['CM_DAE_EXTRACT_TOOL'] = "unzip"
         elif filename.endswith(".tar.gz"):
-            env['CM_DAE_EXTRACT_TOOL'] = "tar -xvzf"
+            env['CM_DAE_EXTRACT_TOOL_OPTIONS'] = ' -xvzf'
+            env['CM_DAE_EXTRACT_TOOL'] = 'tar '
         elif filename.endswith(".tar"):
-            env['CM_DAE_EXTRACT_TOOL'] = "tar -xvf"
+            env['CM_DAE_EXTRACT_TOOL_OPTIONS'] = ' -xvf'
+            env['CM_DAE_EXTRACT_TOOL'] = 'tar '
         elif filename.endswith(".gz"):
             env['CM_DAE_EXTRACT_TOOL'] = 'gzip -d '+ ('-k ' if not remove_extracted else '')
             env['CM_DAE_GZIP'] = "gzip -d"
         elif env.get('CM_DAE_UNZIP','') == 'yes':
             env['CM_DAE_EXTRACT_TOOL'] = 'unzip '
         elif env.get('CM_DAE_UNTAR','') == 'yes':
-            env['CM_DAE_EXTRACT_TOOL'] = 'tar -xvf '
+            env['CM_DAE_EXTRACT_TOOL_OPTIONS'] = ' -xvf'
+            env['CM_DAE_EXTRACT_TOOL'] = 'tar '
         elif env.get('CM_DAE_GZIP','') == 'yes':
-            env['CM_DAE_EXTRACT_CMD'] = 'gzip -d '+ ('-k ' if not remove_extracted else '')
+            env['CM_DAE_EXTRACT_CMD'] = 'gzip '
+            env['CM_DAE_EXTRACT_TOOL_OPTIONS'] = ' -d '+ ('-k ' if not remove_extracted else '')
         else:
             return {'return': 1, 'error': 'CM_DAE_EXTRACT_DOWNLOADED is yes but neither CM_DAE_UNZIP nor CM_DAE_UNTAR is yes'}
 
-        env['CM_DAE_EXTRACT_CMD'] = env['CM_DAE_EXTRACT_TOOL'] + ' ' + filename
+        if 'tar ' in env['CM_DAE_EXTRACT_TOOL'] and env.get('CM_DAE_EXTRACT_TO_FOLDER', '') != '':
+            env['CM_DAE_EXTRACT_TOOL_OPTIONS'] = ' --one-top-level='+ env['CM_DAE_EXTRACT_TO_FOLDER'] + env.get('CM_DAE_EXTRACT_TOOL_OPTIONS', '')
+            env['CM_DAE_EXTRACTED_FILENAME'] = env['CM_DAE_EXTRACT_TO_FOLDER']
+
 
-        if env.get('CM_DAE_EXTRACTED_FILENAME'):
-            env['CM_DAE_FILENAME'] = env['CM_DAE_EXTRACTED_FILENAME']
+        env['CM_DAE_EXTRACT_CMD'] = env['CM_DAE_EXTRACT_TOOL'] + ' ' + env.get('CM_DAE_EXTRACT_TOOL_EXTRA_OPTIONS', '') + ' ' + env.get('CM_DAE_EXTRACT_TOOL_OPTIONS', '')+ ' '+ filename
 
         if env.get('CM_DAE_EXTRACTED_CHECKSUM'):
             env['CM_DAE_EXTRACTED_CHECKSUM_CMD'] = "echo {} {} | md5sum -c".format(env.get('CM_DAE_EXTRACTED_CHECKSUM'), env['CM_DAE_EXTRACTED_FILENAME'])
@@ -97,4 +105,12 @@ def postprocess(i):
     else:
         return {'return':1, 'error': 'CM_DAE_FILENAME is not set and CM_DAE_URL given is not pointing to a file'}
 
+    if env.get('CM_DAE_EXTRACTED_FILENAME'):
+        extracted_name = os.path.basename(env['CM_DAE_EXTRACTED_FILENAME'])
+        extracted_path = os.path.join(os.getcwd(), extracted_name)
+        env['CM_DAE_FILE_EXTRACTED_PATH'] = extracted_path
+
+    if env.get('CM_DAE_FINAL_ENV_NAME'):
+        env['CM_DAE_FINAL_ENV_NAME'] = filename
+
     return {'return':0}
diff --git a/cm-mlops/script/get-dataset-imagenet-train/_cm.json b/cm-mlops/script/get-dataset-imagenet-train/_cm.json
@@ -18,7 +18,10 @@
       "tags": "download,torrent",
       "names": [
         "download-torrent"
-      ]
+      ],
+      "enable_if_env": {
+        "CM_DATASET_IMAGENET_TRAIN_REQUIRE_TORRENT": [ "yes" ]
+      }
     },
     {
       "tags": "download,extract,file,_extract",

diff --git a/cm-mlops/script/get-dataset-imagenet-val/_cm.json b/cm-mlops/script/get-dataset-imagenet-val/_cm.json
@@ -5,13 +5,13 @@
   "cache": true,
   "category": "ML/AI datasets",
   "category_sort":8500,
-  "default_variation": "2012-500",
   "env": {
     "CM_DATASET": "IMAGENET"
   },
   "new_env_keys": [
     "CM_DATASET_PATH",
     "CM_DATASET_IMAGENET_PATH",
+    "CM_DATASET_IMAGENET_VAL_PATH",
     "CM_DATASET_SIZE",
     "CM_DATASET_VER"
   ],
@@ -26,47 +26,69 @@
     "original"
   ],
   "uid": "7afd58d287fe4f11",
+  "deps": [
+    {
+      "tags": "detect,os"
+    }
+  ],
+  "prehook_deps": [
+    {
+      "tags": "download,torrent",
+      "names": [
+        "download-torrent"
+      ],
+      "enable_if_env": {
+        "CM_DATASET_IMAGENET_VAL_REQUIRE_TORRENT": [ "yes" ]
+      }
+    },
+    {
+      "tags": "download,extract,file,_extract",
+      "env": {
+        "CM_DAE_EXTRACT_TO_FOLDER": "imagenet-2012-val"
+      },
+      "enable_if_env": {
+        "CM_DATASET_IMAGENET_VAL_REQUIRE_DAE": [ "yes" ]
+      }
+    }
+  ],
   "variations": {
     "2012": {
+      "group": "dataset-version",
+      "default": true,
       "env": {
         "CM_DATASET_VER": "2012"
       }
     },
-    "2012-1": {
-      "base": [
-	"size.1",
-	"2012"
-      ],
-      "env": {
-        "CM_DATASET_SIZE": "1"
-      }
-    },
     "2012-500": {
       "base": [
-	"size.500",
-	"2012"
-      ],
-      "env": {
-        "CM_DATASET_SIZE": "500"
-      }
+        "size.500",
+        "2012"
+      ]
     },
     "2012-full": {
       "base": [
-	"full",
-	"2012"
-      ],
+        "full",
+        "2012"
+      ]
+    },
+    "full": {
       "env": {
         "CM_DATASET_SIZE": "50000",
-        "CM_IMAGENET_FULL": "yes"
+        "CM_IMAGENET_FULL": "yes",
+        "CM_DAE_DOWNLOADED_FILENAME": "ILSVRC2012_img_val.tar",
+        "CM_DAE_DOWNLOADED_CHECKSUM": "29b22e2961454d5413ddabcf34fc5622"
       }
     },
-    "full": {
+    "size.500": {
+      "group": "count",
+      "default": true,
       "env": {
-        "CM_DATASET_SIZE": "50000",
-        "CM_IMAGENET_FULL": "yes"
+        "CM_DATASET_SIZE": "500",
+        "CM_DAE_URL": "https://www.dropbox.com/s/57s11df6pts3z69/ILSVRC2012_img_val_500.tar"
       }
     },
     "size.#": {
+      "group": "count",
       "env": {
         "CM_DATASET_SIZE": "#"
       }

diff --git a/cm-mlops/script/get-dataset-imagenet-val/customize.py b/cm-mlops/script/get-dataset-imagenet-val/customize.py
@@ -6,36 +6,72 @@ def preprocess(i):
     os_info = i['os_info']
 
     env = i['env']
+    os_info = i['os_info']
+    if os_info['platform'] == 'windows':
+        return {'return':0}
 
     full = env.get('CM_IMAGENET_FULL', '').strip() == 'yes'
 
-    path = env.get('CM_INPUT', '').strip()
+    path = env.get('CM_INPUT', env.get('IMAGENET_PATH', '')).strip()
 
-    if full:
-        if path == '':
-            # If full dataset but path to imagenet is not specified,
-            # try IMAGENET_PATH
+    if path == '':
+        if full:
 
-            path = env.get('IMAGENET_PATH', '')
+            if env.get('CM_DATASET_IMAGENET_VAL_TORRENT_PATH'):
+                path = env['CM_DATASET_IMAGENET_VAL_TORRENT_PATH']
+                env['CM_DATASET_IMAGENET_VAL_REQUIRE_TORRENT'] = "yes"
 
-        if path == '':
-            return {'return':1, 'error':'Please rerun the last CM command with --env.IMAGENET_PATH={path the folder containing full ImageNet images} or envoke cm run script "get val dataset imagenet" --input={path to the folder containing ImageNet images}'}
+                r = automation.update_deps({'deps':meta['prehook_deps'],
+                    'update_deps':{
+                        'download-torrent':{
+                        'tags':"_torrent."+path
+                        }
+                    }
+                })
 
-        if not os.path.isdir(path):
-            return {'return':1, 'error':'Path {} doesn\'t exist'.format(path)}
+                if r['return'] > 0: return r
+                env['CM_DATASET_IMAGENET_VAL_REQUIRE_DAE'] = 'yes'
+                env['CM_DAE_ONLY_EXTRACT'] = 'yes'
+
+                return {'return':0}
+
+            else:
+                return {'return':1, 'error':'Please rerun the last CM command with --env.IMAGENET_PATH={path the folder containing full ImageNet images} or envoke cm run script "get val dataset imagenet" --input={path to the folder containing ImageNet images}'}
 
-        path_image = os.path.join(path, 'ILSVRC2012_val_00000001.JPEG')
+        else:
+            env['CM_DATASET_IMAGENET_VAL_REQUIRE_DAE'] = 'yes'
 
-        if not os.path.isfile(path_image):
-            return {'return':1, 'error':'ImageNet file {} not found'.format(path_image)}
 
-        env['CM_DATASET_PATH'] = path
-        env['CM_DATASET_IMAGENET_PATH'] = path
-    elif path!='':
-        if not os.path.isdir(path):
+    elif not os.path.isdir(path):
+        if path.endswith(".tar"):
+            env['CM_DAE_FILEPATH'] = path
+            env['CM_DATASET_IMAGENET_VAL_REQUIRE_DAE'] = 'yes'
+            env['CM_DAE_ONLY_EXTRACT'] = 'yes'
+            return {'return':0}
+        else:
             return {'return':1, 'error':'Path {} doesn\'t exist'.format(path)}
+    else:
+        env['CM_DAE_FILE_EXTRACTED_PATH'] = path
+
+    return {'return':0}
+
+def postprocess(i):
 
-        env['CM_DATASET_PATH'] = path
-        env['CM_DATASET_IMAGENET_PATH'] = path
+    os_info = i['os_info']
+    if os_info['platform'] == 'windows':
+        return {'return':0}
+
+    env = i['env']
+    path = env['CM_DAE_FILE_EXTRACTED_PATH']
+
+    path_image = os.path.join(path, 'ILSVRC2012_val_00000001.JPEG')
+
+    if not os.path.isfile(path_image):
+        return {'return':1, 'error':'ImageNet file {} not found'.format(path_image)}
+
+    env['CM_DATASET_PATH'] = path
+    env['CM_DATASET_IMAGENET_PATH'] = path
+    env['CM_DATASET_IMAGENET_VAL_PATH'] = path
 
     return {'return':0}
+
diff --git a/cm-mlops/script/get-dataset-imagenet-val/run.bat b/cm-mlops/script/get-dataset-imagenet-val/run.bat
@@ -11,4 +11,5 @@ if "%CM_DATASET_PATH%" == "" (
 
   echo CM_DATASET_PATH=%CD%\images > tmp-run-env.out
   echo CM_DATASET_IMAGENET_PATH=%CD%\images >> tmp-run-env.out
+  echo CM_DATASET_IMAGENET_VAL_PATH=%CD%\images >> tmp-run-env.out
 )
diff --git a/cm-mlops/script/get-dataset-imagenet-val/run.sh b/cm-mlops/script/get-dataset-imagenet-val/run.sh
diff --git a/cm-mlops/script/gui/playground_experiments_graph.py b/cm-mlops/script/gui/playground_experiments_graph.py
@@ -67,7 +67,8 @@ def page(st, params, parent, experiment):
 
     x = 'experiment set(s)' if result_uid=='' else 'result'
 
-    show_optional = True if result_uid=='' and len(results)>1 else False
+#    show_optional = True if result_uid=='' and len(results)>1 else False
+    show_optional = True if result_uid=='' else False
 
     st.write('''
              <center>

diff --git a/cm-mlops/script/import-mlperf-inference-to-experiment/README-extra.md b/cm-mlops/script/import-mlperf-inference-to-experiment/README-extra.md
@@ -0,0 +1,49 @@
+# About
+
+This portable script converts raw results from the [MLPerf™ Inference benchmark]( https://github.com/mlcommons/inference )
+to the [MLCommons CM format](https://github.com/mlcommons/ck) for the [Collective Knowledge Playground](https://x.cKnowledge.org).
+
+The goal is to make it easier for the community to analyze MLPerf inference results, 
+add derived metrics such as performance/Watt and constraints,
+and link reproducibility reports as shown in these examples:
+* [Power efficiency to compare Qualcomm, Nvidia and Sima.ai devices](https://cKnowledge.org/mlcommons-mlperf-inference-gui-derived-metrics-and-conditions)
+* [Reproducibility report for Nvidia Orin](https://access.cknowledge.org/playground/?action=experiments&name=mlperf-inference--v3.0--edge--closed--image-classification--offline&result_uid=3751b230c800434a)
+
+Aggreaged results are available in [this MLCommons repository](https://github.com/mlcommons/cm_inference_results).
+
+You can see these results at [MLCommons CK playground](https://access.cknowledge.org/playground/?action=experiments&tags=mlperf-inference,all).
+
+## Usage
+
+We have tested this portable CM script on Ubuntu and Windows.
+
+Install [MLCommons CM framework](https://github.com/mlcommons/ck/blob/master/docs/installation.md).
+
+Pull the MLCommons CK repository with automation recipes for interoperable MLOps:
+```bash
+cm pull repo mlcommons@ck
+```
+
+Install repositories with raw MLPerf inference benchmark results:
+```bash
+cm run script "get git repo _repo.https://github.com/mlcommons/inference_results_v2.0" --env.CM_GIT_CHECKOUT=master --extra_cache_tags=mlperf-inference-results,version-2.0
+cm run script "get git repo _repo.https://github.com/mlcommons/inference_results_v2.1" --env.CM_GIT_CHECKOUT=master --extra_cache_tags=mlperf-inference-results,version-2.1
+cm run script "get git repo _repo.https://github.com/mlcommons/inference_results_v3.0" --env.CM_GIT_CHECKOUT=main --extra_cache_tags=mlperf-inference-results,version-3.0
+```
+
+Convert raw MLPerf results into CM experiment entries:
+```bash
+cm run script "import mlperf inference to-experiment" 
+```
+
+Visualize results on your local machine via CK playground GUI:
+```bash
+cm run script "gui _playground"
+```
+
+These results are also available in the [public CK playground](https://access.cknowledge.org/playground/?action=experiments&tags=mlperf-inference,all).
+
+# Contact us
+
+This project is maintained by the [MLCommons taskforce on automation and reproducibility](https://cKnowledge.org/mlcommons-taskforce).
+Join our [Discord server](https://discord.gg/JjWNWXKxwT) to ask questions, provide your feedback and participate in further developments.