Skip to content

Commit

Permalink
[pre-commit.ci] auto fixes from pre-commit.com hooks
Browse files Browse the repository at this point in the history
for more information, see https://pre-commit.ci
  • Loading branch information
pre-commit-ci[bot] committed Oct 10, 2022
1 parent 6cafdd0 commit 8c9136a
Show file tree
Hide file tree
Showing 14 changed files with 16 additions and 25 deletions.
3 changes: 1 addition & 2 deletions ac_dc/deduplicate/self_deduplicate.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Date : 2022-01-08 22:39:29
# @Author : Chenghao Mou ([email protected])
# @Description: Self-deduplication with `datasets`
Expand Down Expand Up @@ -28,7 +27,7 @@

def main(conf: str) -> None:

with open(conf, "r") as f:
with open(conf) as f:
conf = yaml.safe_load(f.read())

if conf["load_from_disk"]["path"]:
Expand Down
1 change: 0 additions & 1 deletion bertin/evaluation/run_glue.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
#!/usr/bin/env python
# coding=utf-8
# Copyright 2020 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
Expand Down
1 change: 0 additions & 1 deletion bertin/evaluation/run_ner.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
#!/usr/bin/env python
# coding=utf-8
# Copyright 2020 The HuggingFace Team All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
Expand Down
2 changes: 1 addition & 1 deletion bertin/mc4/mc4.py
Original file line number Diff line number Diff line change
Expand Up @@ -404,7 +404,7 @@ def _generate_examples(self, filepaths):
for filepath in filepaths:
logger.info("generating examples from = %s", filepath)
if filepath.endswith("jsonl"):
with open(filepath, "r", encoding="utf-8") as f:
with open(filepath, encoding="utf-8") as f:
for line in f:
if line:
example = json.loads(line)
Expand Down
1 change: 0 additions & 1 deletion bertin/run_mlm_flax.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
#!/usr/bin/env python
# coding=utf-8
# Copyright 2021 The HuggingFace Team All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
Expand Down
3 changes: 1 addition & 2 deletions bertin/run_mlm_flax_stream.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
#!/usr/bin/env python
# coding=utf-8
# Copyright 2021 The HuggingFace Team All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
Expand Down Expand Up @@ -446,7 +445,7 @@ def restore_checkpoint(save_dir, state):
args = joblib.load(os.path.join(save_dir, "training_args.joblib"))
data_collator = joblib.load(os.path.join(save_dir, "data_collator.joblib"))

with open(os.path.join(save_dir, "training_state.json"), "r") as f:
with open(os.path.join(save_dir, "training_state.json")) as f:
training_state = json.load(f)
step = training_state["step"]

Expand Down
2 changes: 1 addition & 1 deletion bertin/utils/dataset_perplexity.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def get_perplexity(doc):


with open("mc4-es-train-50M-stats.csv", "w") as csv:
with open("mc4-es-train-50M-steps.jsonl", "r") as data:
with open("mc4-es-train-50M-steps.jsonl") as data:
for line in tqdm(data):
text = json.loads(line)["text"]
csv.write(f"{len(text.split())},{get_perplexity(text)}\n")
4 changes: 2 additions & 2 deletions cc_pseudo_crawl/python_scripts/download_warc.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,9 +143,9 @@ def get_warcs(batch):
existing_compressed_warcs,
)

batch["compressed_warc"], batch["download_exception"] = [
batch["compressed_warc"], batch["download_exception"] = (
list(l) for l in zip(*warcs_or_exceptions)
]
)
return batch


Expand Down
2 changes: 1 addition & 1 deletion cc_pseudo_crawl/python_scripts/load_all_seed_ids.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def main():

seed_ids = []
for seed_path in args.seed_paths:
with open(seed_path, "r") as fi:
with open(seed_path) as fi:
data = csv.reader(fi)
# First line is all the headers that we remove.
seed_ids += [row[0] for row_id, row in enumerate(data) if row_id > 0]
Expand Down
5 changes: 2 additions & 3 deletions kenlm_training/cc_net/jsonql.py
Original file line number Diff line number Diff line change
Expand Up @@ -880,8 +880,7 @@ def describe(source, columns=None, weights=None, **kwargs):
continue
if "." in k or k == ALL_DOCUMENTS:
continue
for line in display_stats(stats, k, weights=weights, **kwargs):
yield line
yield from display_stats(stats, k, weights=weights, **kwargs)


def shard(lines):
Expand Down Expand Up @@ -961,7 +960,7 @@ def open_read(filename: ReadableFileLike) -> Iterable[str]:
if filename.suffix == ".gz":
file: TextIO = gzip.open(filename, "rt") # type: ignore
else:
file = open(filename, "rt")
file = open(filename)

return _close_when_exhausted(file)

Expand Down
4 changes: 2 additions & 2 deletions pii-manager/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,15 @@

def requirements(filename="requirements.txt"):
"""Read the requirements file"""
with io.open(filename, "r") as f:
with open(filename) as f:
return [line.strip() for line in f if line and line[0] != "#"]


def long_description():
"""
Take the README and remove markdown hyperlinks
"""
with open("README.md", "rt", encoding="utf-8") as f:
with open("README.md", encoding="utf-8") as f:
desc = f.read()
desc = re.sub(r"^\[ ([^\]]+) \]: \s+ \S.*\n", r"", desc, flags=re.X | re.M)
return re.sub(r"\[ ([^\]]+) \]", r"\1", desc, flags=re.X)
Expand Down
9 changes: 3 additions & 6 deletions pii-manager/src/pii_manager/api/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,22 +31,19 @@ def fetch_all_tasks(
"""
taskdict = get_taskdict(debug=debug)
# Language-independent
for task in taskdict[LANG_ANY].values():
yield task
yield from taskdict[LANG_ANY].values()

langdict = taskdict.get(lang, {})
# Country-independent
for task in langdict.get(COUNTRY_ANY, {}).values():
yield task
yield from langdict.get(COUNTRY_ANY, {}).values()
# Country-specific
if country:
if country[0] in (COUNTRY_ANY, "all"):
country = country_list(lang)
for c in country:
if c == COUNTRY_ANY: # already included above
continue
for task in langdict.get(c, {}).values():
yield task
yield from langdict.get(c, {}).values()


def fetch_task(
Expand Down
2 changes: 1 addition & 1 deletion pii-manager/test/unit/api/test_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ def datafile(name: str) -> str:


def readfile(name: str) -> str:
with open(name, "rt", encoding="utf-8") as f:
with open(name, encoding="utf-8") as f:
return f.read().strip()


Expand Down
2 changes: 1 addition & 1 deletion pii-manager/test/unit/api/test_file_taskfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def datafile(name: str) -> str:


def readfile(name: str) -> str:
with open(name, "rt", encoding="utf-8") as f:
with open(name, encoding="utf-8") as f:
return f.read().strip()


Expand Down

0 comments on commit 8c9136a

Please sign in to comment.