Convert directory fbcode/pytorch to use the Ruff Formatter

Summary: Converts the directory specified to use the Ruff formatter in pyfmt ruff_dog If this diff causes merge conflicts when rebasing, please run `hg status -n -0 --change . -I '**/*.{py,pyi}' | xargs -0 arc pyfmt` on your diff, and amend any changes before rebasing onto latest. That should help reduce or eliminate any merge conflicts. allow-large-files Reviewed By: yhcharles Differential Revision: D66543433 fbshipit-source-id: 1e24ef6c1e230a3bceef28c73f9bb9118c3dab27
pytorch · Dec 4, 2024 · e5986f8 · e5986f8
1 parent 3d68691
commit e5986f8
Show file tree

Hide file tree

Showing 119 changed files with 160 additions and 321 deletions.
diff --git a/.github/scripts/abtest.py b/.github/scripts/abtest.py
@@ -199,9 +199,7 @@ def validate_results(a, b) -> bool:
             args.pytorch_repo
         ).is_dir(), f"Specified PyTorch repo dir {args.pytorch_repo} doesn't exist."
         commits = gitutils.get_git_commits(args.pytorch_repo, args.base, args.head)
-        assert (
-            commits
-        ), f"Can't find git commit {args.base} or {args.head} in repo {args.pytorch_repo}"
+        assert commits, f"Can't find git commit {args.base} or {args.head} in repo {args.pytorch_repo}"
     # setup cuda environment
     cuda_env = prepare_cuda_env(cuda_version=DEFAULT_CUDA_VERSION)
     result_a = run_commit(

diff --git a/.github/scripts/bmutils/analyze-bisection-result.py b/.github/scripts/bmutils/analyze-bisection-result.py
@@ -17,21 +17,15 @@ def check_env(bisection_root: str):
     assert (
         bisection_path.is_dir()
     ), f"Specified bisection root {bisection_path} is not a directory."
-    assert bisection_path.joinpath(
-        "gh-issue.md"
-    ).exists(), (
-        f"Bisection directory {bisection_path} doesn't contain file gh-issue.md."
-    )
-    assert bisection_path.joinpath(
-        "result.json"
-    ).exists(), (
-        f"Bisection directory {bisection_path} doesn't contain file result.json."
-    )
-    assert bisection_path.joinpath(
-        "config.yaml"
-    ).exists(), (
-        f"Bisection directory {bisection_path} doesn't contain file config.yaml."
-    )
+    assert (
+        bisection_path.joinpath("gh-issue.md").exists()
+    ), f"Bisection directory {bisection_path} doesn't contain file gh-issue.md."
+    assert (
+        bisection_path.joinpath("result.json").exists()
+    ), f"Bisection directory {bisection_path} doesn't contain file result.json."
+    assert (
+        bisection_path.joinpath("config.yaml").exists()
+    ), f"Bisection directory {bisection_path} doesn't contain file config.yaml."
 
 
 def setup_gh_issue(bisection_root: str, gh_workflow_id: str):

diff --git a/.github/scripts/run-config.py b/.github/scripts/run-config.py
@@ -59,9 +59,7 @@ def get_models(config) -> Optional[str]:
         r = re.compile(model_pattern)
         matched_models = list(filter(lambda x: r.match(x), models))
         enabled_models.extend(matched_models)
-    assert (
-        enabled_models
-    ), f"The model patterns you specified {config['models']} does not match any model. Please double check."
+    assert enabled_models, f"The model patterns you specified {config['models']} does not match any model. Please double check."
     return enabled_models
 
 

diff --git a/.github/scripts/userbenchmark/aicluster.py b/.github/scripts/userbenchmark/aicluster.py
@@ -236,9 +236,7 @@ def run_aicluster_benchmark(
     index = get_metrics_index(s3, benchmark_name, work_dir)
     # if the previous run is not successful, exit immediately
     if check_success and not determine_success_today(index):
-        assert (
-            False
-        ), f"Don't find the last successful run in index: { index }. Please report a bug."
+        assert False, f"Don't find the last successful run in index: { index }. Please report a bug."
     # upload to scribe by the index
     if upload_scribe:
         upload_metrics_to_scribe(s3, benchmark_name, index, work_dir)

diff --git a/bisection.py b/bisection.py
@@ -632,8 +632,8 @@ def main() -> None:
     if args.skip_update:
         skip_update_repos = list(map(lambda x: x.strip(), args.skip_update.split(",")))
         for repo in skip_update_repos:
-            assert repo in list(
-                TORCHBENCH_BISECTION_TARGETS.keys()
+            assert (
+                repo in list(TORCHBENCH_BISECTION_TARGETS.keys())
             ), f"User specified skip update repo {repo} not in list: {TORCHBENCH_BISECTION_TARGETS.keys()}"
     else:
         skip_update_repos = None

diff --git a/regression_detector.py b/regression_detector.py
@@ -257,9 +257,7 @@ def get_metrics_by_date(
         if metric_datetime.date() == pick_date.date():
             pick_metrics_json_key = metrics_json_key
             break
-    assert (
-        pick_metrics_json_key
-    ), f"Selected date {pick_date} is not found in the latest_metrics_jsons: {latest_metrics_jsons}"
+    assert pick_metrics_json_key, f"Selected date {pick_date} is not found in the latest_metrics_jsons: {latest_metrics_jsons}"
     s3 = S3Client(USERBENCHMARK_S3_BUCKET, USERBENCHMARK_S3_OBJECT)
     metrics_json = s3.get_file_as_json(pick_metrics_json_key)
     return (metrics_json, pick_metrics_json_key)

diff --git a/run.py b/run.py
@@ -44,12 +44,10 @@
 
 
 def run_one_step_with_cudastreams(func, streamcount):
-
     print("Running Utilization Scaling Using Cuda Streams")
 
     streamlist = []
     for i in range(1, streamcount + 1, 1):
-
         # create additional streams and prime with load
         while len(streamlist) < i:
             s = torch.cuda.Stream()

diff --git a/scripts/proper_bs.py b/scripts/proper_bs.py
@@ -121,9 +121,7 @@ def _run_model_test_proper_bs(
         except NotImplementedError as e:
             status = "NotImplemented"
             error_message = str(e)
-        except (
-            TypeError
-        ) as e:  # TypeError is raised when the model doesn't support variable batch sizes
+        except TypeError as e:  # TypeError is raised when the model doesn't support variable batch sizes
             status = "TypeError"
             error_message = str(e)
         except KeyboardInterrupt as e:

diff --git a/scripts/upload_scribe.py b/scripts/upload_scribe.py
@@ -31,7 +31,6 @@ def format_message(self, field_dict):
             elif field in self.schema["float"]:
                 message["float"][field] = float(value)
             else:
-
                 raise ValueError(
                     "Field {} is not currently used, "
                     "be intentional about adding new fields".format(field)

diff --git a/scripts/upload_scribe_v2.py b/scripts/upload_scribe_v2.py
@@ -64,7 +64,6 @@ def format_message(self, field_dict):
             elif field in self.schema["float"]:
                 message["float"][field] = float(value)
             else:
-
                 raise ValueError(
                     "Field {} is not currently used, "
                     "be intentional about adding new fields".format(field)

diff --git a/test.py b/test.py
@@ -55,7 +55,6 @@ def _create_example_model_instance(task: ModelTask, device: str):
 
 
 def _load_test(path, device):
-
     model_name = os.path.basename(path)
 
     def _skip_cuda_memory_check_p(metadata):

diff --git a/test_bench.py b/test_bench.py
@@ -60,7 +60,6 @@ def pytest_generate_tests(metafunc):
     group="hub",
 )
 class TestBenchNetwork:
-
     def test_train(self, model_path, device, benchmark):
         try:
             model_name = os.path.basename(model_path)

diff --git a/torchbenchmark/__init__.py b/torchbenchmark/__init__.py
@@ -304,7 +304,6 @@ def args(self) -> List[str]:
 
 
 class ModelTask(base_task.TaskBase):
-
     # The worker may (and often does) consume significant system resources.
     # In order to ensure that runs do not interfere with each other, we only
     # allow a single ModelTask to exist at a time.

diff --git a/torchbenchmark/_components/_impl/workers/subprocess_rpc.py b/torchbenchmark/_components/_impl/workers/subprocess_rpc.py
@@ -274,8 +274,10 @@ def write(self, msg: bytes) -> None:
     def get_writer_pid(self) -> int:
         assert (
             self._writer_pid is not None
-        ), "Writer pid is not specified. Maybe calling from child process or input pipe.\
+        ), (
+            "Writer pid is not specified. Maybe calling from child process or input pipe.\
                                               Please report a bug."
+        )
         return self._writer_pid
 
     def set_writer_pid(self, writer_pid: int) -> None:

diff --git a/torchbenchmark/_components/model_analyzer/TorchBenchAnalyzer.py b/torchbenchmark/_components/model_analyzer/TorchBenchAnalyzer.py
@@ -258,9 +258,9 @@ def export_all_records_to_csv(self):
                 ]
                 cluster_records.sort(key=lambda x: x.timestamp())
                 for record in cluster_records:
-                    csv_records[gpu_uuid][record_type][
-                        record.timestamp()
-                    ] = record.value()
+                    csv_records[gpu_uuid][record_type][record.timestamp()] = (
+                        record.value()
+                    )
         with open(self.export_csv_name, "w") as fout:
             for gpu_uuid in csv_records:
                 # timestamp record in DCGM is microsecond

diff --git a/torchbenchmark/_components/model_analyzer/dcgm/dcgm_field_helpers.py b/torchbenchmark/_components/model_analyzer/dcgm/dcgm_field_helpers.py
@@ -147,7 +147,6 @@ def default(self, obj):  # pylint: disable=E0202
 
 
 def py_helper_dcgm_field_values_since_callback(gpuId, values, numValues, userData):
-
     userData = ctypes.cast(userData, ctypes.py_object).value
     userData._ProcessValues(gpuId, values[0:numValues])
     return 0
@@ -363,7 +362,6 @@ def GetAllSinceLastCall(self):
 def py_helper_dcgm_field_values_since_entity_callback(
     entityGroupId, entityId, values, numValues, userData
 ):
-
     userData = ctypes.cast(userData, ctypes.py_object).value
     userData._ProcessValues(entityGroupId, entityId, values[0:numValues])
     return 0
@@ -382,9 +380,7 @@ def py_helper_dcgm_field_values_since_entity_callback(
 
 class DcgmFieldValueEntityCollection:
     def __init__(self, handle, groupId):
-        self.values = (
-            {}
-        )  # 3D dictionary of [entityGroupId][entityId][fieldId](DcgmFieldValueTimeSeries)
+        self.values = {}  # 3D dictionary of [entityGroupId][entityId][fieldId](DcgmFieldValueTimeSeries)
         self._handle = handle
         self._groupId = groupId
         self._numValuesSeen = 0
@@ -408,9 +404,9 @@ def _ProcessValues(self, entityGroupId, entityId, values):
             value = DcgmFieldValue(rawValue)
 
             if value.fieldId not in self.values[entityGroupId][entityId]:
-                self.values[entityGroupId][entityId][
-                    value.fieldId
-                ] = DcgmFieldValueTimeSeries()
+                self.values[entityGroupId][entityId][value.fieldId] = (
+                    DcgmFieldValueTimeSeries()
+                )
 
             self.values[entityGroupId][entityId][value.fieldId].InsertValue(value)
 

diff --git a/torchbenchmark/_components/model_analyzer/dcgm/dcgm_monitor.py b/torchbenchmark/_components/model_analyzer/dcgm/dcgm_monitor.py
@@ -123,7 +123,6 @@ def _collect_records(self):
                 for metric_type in self._metrics:
                     dcgm_field = self.model_analyzer_to_dcgm_field[metric_type]
                     for measurement in metrics[dcgm_field].values:
-
                         if measurement.value is not None:
                             # DCGM timestamp is in nanoseconds
                             records.append(

diff --git a/torchbenchmark/_components/model_analyzer/dcgm/dcgm_structs.py b/torchbenchmark/_components/model_analyzer/dcgm/dcgm_structs.py
@@ -90,9 +90,7 @@
     -21
 )  # Connection to the host engine is not valid any longer
 DCGM_ST_GPU_NOT_SUPPORTED = -22  # This GPU is not supported by DCGM
-DCGM_ST_GROUP_INCOMPATIBLE = (
-    -23
-)  # The GPUs of the provided group are not compatible with each other for the requested operation
+DCGM_ST_GROUP_INCOMPATIBLE = -23  # The GPUs of the provided group are not compatible with each other for the requested operation
 DCGM_ST_MAX_LIMIT = -24
 DCGM_ST_LIBRARY_NOT_FOUND = -25  # DCGM library could not be found
 DCGM_ST_DUPLICATE_KEY = -26  # Duplicate key passed to the function
@@ -111,9 +109,7 @@
 DCGM_ST_MODULE_NOT_LOADED = (
     -33
 )  # This request is serviced by a module of DCGM that is not currently loaded
-DCGM_ST_IN_USE = (
-    -34
-)  # The requested operation could not be completed because the affected resource is in use
+DCGM_ST_IN_USE = -34  # The requested operation could not be completed because the affected resource is in use
 DCGM_ST_GROUP_IS_EMPTY = (
     -35
 )  # The specified group is empty and this operation is not valid with an empty group
@@ -126,9 +122,7 @@
 DCGM_ST_PROFILING_MULTI_PASS = (
     -38
 )  # The requested profiling metrics cannot be collected in a single pass
-DCGM_ST_DIAG_ALREADY_RUNNING = (
-    -39
-)  # A diag instance is already running, cannot run a new diag until the current one finishes.
+DCGM_ST_DIAG_ALREADY_RUNNING = -39  # A diag instance is already running, cannot run a new diag until the current one finishes.
 DCGM_ST_DIAG_BAD_JSON = (
     -40
 )  # The DCGM GPU Diagnostic returned JSON that cannot be parsed

diff --git a/torchbenchmark/_components/model_analyzer/dcgm/dcgm_value.py b/torchbenchmark/_components/model_analyzer/dcgm/dcgm_value.py
@@ -124,7 +124,6 @@ def __str__(self):
 
 ###############################################################################
 def self_test():
-
     v = DcgmValue(1.0)
     assert not v.IsBlank()
     assert v.value == 1.0

diff --git a/torchbenchmark/_components/model_analyzer/tb_dcgm_types/record_aggregator.py b/torchbenchmark/_components/model_analyzer/tb_dcgm_types/record_aggregator.py
@@ -193,9 +193,9 @@ def groupby_wo_aggregate(self, record_types, groupby_criterion):
                     record_types=[record_type],
                     filters=[lambda r: groupby_criterion(r) == field_value],
                 )
-                groupby_result[record_type][
-                    field_value
-                ] = temp_records_aggregator.get_records()
+                groupby_result[record_type][field_value] = (
+                    temp_records_aggregator.get_records()
+                )
         return groupby_result
 
     def record_types(self):

diff --git a/torchbenchmark/_components/test/test_subprocess.py b/torchbenchmark/_components/test/test_subprocess.py
@@ -27,7 +27,6 @@
 
 
 class TestParseFunction(TestCase):
-
     @staticmethod
     def _indent(s: str) -> str:
         return textwrap.indent(s, " " * 12)
@@ -115,7 +114,6 @@ def f(
 
     def test_parse_method(self) -> None:
         class MyClass:
-
             @staticmethod
             def f(x: int) -> int:
                 """Identity, but with more steps"""
@@ -228,7 +226,6 @@ def g(x: int):
 
     def test_no_functor(self) -> None:
         class F:
-
             def __call__(self) -> None:
                 pass
 
@@ -255,9 +252,7 @@ def g(**kwargs) -> None:
             task_base.parse_f(g)
 
     def test_no_decorator(self) -> None:
-
         def my_decorator(f: typing.Callable) -> typing.Callable:
-
             @functools.wraps(f)
             def g(*args, **kwargs) -> typing.Any:
                 return f(*args, **kwargs)
@@ -275,7 +270,6 @@ def f() -> None:
 
 
 class TestSubprocessRPC(TestCase):
-
     def test_pipe_basic_read_write(self) -> None:
         pipe = subprocess_rpc.Pipe()
 
@@ -378,7 +372,6 @@ def test_pipe_cleanup(self) -> None:
         del_audit = {"count": 0}
 
         class OwnCheckingPipe(subprocess_rpc.Pipe):
-
             def __init__(self):
                 super().__init__()
                 self._cleanup_was_run = False
@@ -394,7 +387,6 @@ def __del__(self) -> None:
                 del_audit["count"] += 1
 
         class NonOwnCheckingPipe(subprocess_rpc.Pipe):
-
             def __init__(self, *args, **kwargs):
                 super().__init__(*args, **kwargs)
                 assertFalse(self._owns_pipe)
@@ -424,7 +416,6 @@ def __del__(self) -> None:
 
 
 class TestSubprocessExceptions(TestCase):
-
     def _test_raise(
         self,
         raise_type: typing.Type[Exception],

diff --git a/torchbenchmark/canary_models/fambench_dlrm/config.py b/torchbenchmark/canary_models/fambench_dlrm/config.py
@@ -32,9 +32,7 @@ class FAMBenchTrainConfig:
     arch_mlp_bot: str = "2000-1500-1500-1500-192"
     arch_mlp_top: str = "4000-4000-4000-4000-4000-4000-4000-4000-4000-1"
     arch_sparse_feature_size: int = 192
-    arch_embedding_size: str = (
-        "965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965"
-    )
+    arch_embedding_size: str = "965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965-965"
     num_indices_per_lookup: int = 55
     num_indices_per_lookup_fixed: int = 1
     numpy_rand_seed: int = 727
@@ -59,13 +57,9 @@ class FAMBenchEvalConfig:
     test_num_workers: int = 0
     data_generation: str = "random"
     arch_mlp_bot: str = "1414-1750-1750-1750-1750-1750-1750-1750-1750-96"
-    arch_mlp_top: str = (
-        "1450-1450-1450-1450-1450-1450-1450-1450-1450-1450-1450-1450-1450-1450-1450-1450-1450-1450-1450-1450-1450-1450-1450-1450-1450-1450-1450-1450-1450-1450-1450-1450-1450-1450-1450-1450-1"
-    )
+    arch_mlp_top: str = "1450-1450-1450-1450-1450-1450-1450-1450-1450-1450-1450-1450-1450-1450-1450-1450-1450-1450-1450-1450-1450-1450-1450-1450-1450-1450-1450-1450-1450-1450-1450-1450-1450-1450-1450-1450-1"
     arch_sparse_feature_size: int = 96
-    arch_embedding_size: str = (
-        "555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693"
-    )
+    arch_embedding_size: str = "555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693-555693"
     num_indices_per_lookup: int = 8
     num_indices_per_lookup_fixed: int = 1
     numpy_rand_seed: int = 727

diff --git a/torchbenchmark/canary_models/fambench_dlrm/dlrmnet.py b/torchbenchmark/canary_models/fambench_dlrm/dlrmnet.py
@@ -399,7 +399,6 @@ def apply_emb(self, lS_o, lS_i):
 
     #  using quantizing functions from caffe2/aten/src/ATen/native/quantized/cpu
     def quantize_embedding(self, bits):
-
         n = len(self.emb_l)
         self.emb_l_q = [None] * n
         for k in range(n):
@@ -420,7 +419,6 @@ def quantize_embedding(self, bits):
         self.quantize_bits = bits
 
     def interact_features(self, x, ly):
-
         if self.arch_interaction_op == "dot":
             # concatenate dense and sparse features
             (batch_size, d) = x.shape
Original file line number	Diff line number	Diff line change
Expand Up		@@ -55,7 +55,6 @@ def _create_example_model_instance(task: ModelTask, device: str):


		def _load_test(path, device):

		model_name = os.path.basename(path)

		def _skip_cuda_memory_check_p(metadata):
Expand Down