diff --git a/.circleci/config.yml b/.circleci/config.yml deleted file mode 100644 index 1ab1c6a..0000000 --- a/.circleci/config.yml +++ /dev/null @@ -1,70 +0,0 @@ -version: 2.1 - -orbs: - codecov: codecov/codecov@3.1.1 - -workflows: - version: 1 - install_and_test: - jobs: - - python_lint - - test_ubuntu - -commands: - install_deps_ubuntu: - steps: - - checkout - - restore_cache: - key: conda-ubuntu-{{ checksum ".circleci/config.yml" }} - - run: - name: Install conda and environment - command: | - if [ ! -d "/home/circleci/miniconda" ]; then - wget https://repo.anaconda.com/miniconda/Miniconda3-py39_22.11.1-1-Linux-x86_64.sh -O miniconda.sh - bash miniconda.sh -b -p "$HOME"/miniconda - source /home/circleci/miniconda/etc/profile.d/conda.sh - conda activate base - # Conda configuration - conda config --set always_yes yes --set auto_update_conda false - # Update conda - conda create -n ocp python=3.9 - # Install ocp conda env - source /home/circleci/miniconda/etc/profile.d/conda.sh - conda activate ocp - pip install ase==3.22.1 black==22.3.0 pymatgen==2023.5.10 - pip install pytest-cov==4.0.0 pre-commit==2.10.* - fi - - save_cache: - paths: - - /home/circleci/miniconda - key: conda-ubuntu-{{ checksum ".circleci/config.yml" }} - -jobs: - python_lint: - docker: - - image: cimg/python:3.9.13 - steps: - - checkout - - run: - name: setup - command: pip install black==22.3.0 - - run: - name: run black - command: black . --check - - test_ubuntu: - docker: - - image: cimg/python:3.9.13 - resource_class: large - steps: - - install_deps_ubuntu - - run: - name: install ocdata and run tests - command: | - source /home/circleci/miniconda/etc/profile.d/conda.sh - conda activate ocp - pip install -e . - pre-commit install - pytest --cov-report=xml --cov=ocdata/core /home/circleci/project/tests - - codecov/upload: - file: coverage.xml diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml new file mode 100644 index 0000000..3aef4ea --- /dev/null +++ b/.github/workflows/lint.yml @@ -0,0 +1,25 @@ +name: lint + +on: + push: + workflow_dispatch: + +jobs: + lint: + runs-on: ubuntu-latest + strategy: + max-parallel: 6 + + steps: + - uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: 3.11 + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -e .[dev] + - name: black + run: | + black --color ocdata diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 0000000..79c01ef --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,50 @@ +name: test +on: + push: + branches: [main] + pull_request: + workflow_call: + +jobs: + test: + runs-on: ubuntu-latest + strategy: + max-parallel: 10 + matrix: + python_version: ['3.9', '3.10', '3.11'] + + steps: + - uses: actions/checkout@v4 + - name: Set up Python ${{ matrix.python_version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python_version }} + + - name: Cache pip + uses: actions/cache@v4 + with: + path: ~/.cache/pip + key: ${{ runner.os }}-pip-${{ hashFiles('requirements.txt') }} + restore-keys: | + ${{ runner.os }}-pip- + ${{ runner.os }}- + + - name: Install dependencies and package + # this can be added along with a dependabot config to run tests with latest versions + # pip install -r requirements.txt + # pip install -r requirements-optional.txt + run: | + python -m pip install --upgrade pip + pip install -e .[dev] + - name: Test with pytest + run: | + pytest tests -vv --cov-report=xml --cov=ocpdata + + - if: ${{ matrix.python_version == '3.11' }} + name: codecov-report + uses: codecov/codecov-action@v4 + with: + fail_ci_if_error: false # optional (default = false) + files: ./coverage.xml + token: ${{ secrets.CODECOV_TOKEN }} # required + verbose: true # optional (default = false) diff --git a/README.md b/README.md index 0f162ae..e16b230 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,5 @@ +# THIS REPOSITORY HAS BEEN MOVED TO [http://github.com/fair-chem/fairchem/tree/main/src/fairchem/data/oc](http://github.com/fair-chem/fairchem/tree/main/src/fairchem/data/oc) + [![CircleCI](https://dl.circleci.com/status-badge/img/gh/Open-Catalyst-Project/Open-Catalyst-Dataset/tree/main.svg?style=svg)](https://dl.circleci.com/status-badge/redirect/gh/Open-Catalyst-Project/Open-Catalyst-Dataset/tree/main) [![codecov](https://codecov.io/gh/Open-Catalyst-Project/Open-Catalyst-Dataset/branch/main/graph/badge.svg?token=IZ7J729L6S)](https://codecov.io/gh/Open-Catalyst-Project/Open-Catalyst-Dataset/tree/main) diff --git a/codecov.yml b/codecov.yml new file mode 100644 index 0000000..ea9b23a --- /dev/null +++ b/codecov.yml @@ -0,0 +1,9 @@ +coverage: + status: + project: + default: + informational: true + patch: + default: + informational: true +github_checks: false diff --git a/ocdata/configs/__init__.py b/ocdata/configs/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/ocdata/configs/paths.py b/ocdata/configs/paths.py deleted file mode 100644 index 6649271..0000000 --- a/ocdata/configs/paths.py +++ /dev/null @@ -1,14 +0,0 @@ -# Path to a database of bulks, organized as a list of dictionaries with -# each dict containing atoms objects, mpid, and other metadata. -BULK_PKL_PATH = "ocdata/databases/pkls/bulks.pkl" - -# Path to a folder of pickle files, each containing a list of precomputed -# slabs. The filename of each pickle is where `bulk_index` -# is the index of the corresponding bulk in BULK_PKL_PATH. -PRECOMPUTED_SLABS_DIR_PATH = ( - "/checkpoint/janlan/ocp/input_dbs/precomputed_surfaces_2021Sep20/" -) - -# Path to a database of adsorbates, organized as a dictionary with a unique -# integer as key and corresponding adsorbate tuple as value. -ADSORBATES_PKL_PATH = "ocdata/databases/pkls/adsorbates.pkl" diff --git a/ocdata/core/adsorbate.py b/ocdata/core/adsorbate.py index 22ef1d6..f6f59d8 100644 --- a/ocdata/core/adsorbate.py +++ b/ocdata/core/adsorbate.py @@ -1,11 +1,11 @@ import pickle -from typing import Any, Dict, Tuple import warnings +from typing import Any, Dict, Tuple import ase import numpy as np -from ocdata.configs.paths import ADSORBATES_PKL_PATH +from ocdata.databases.pkls import ADSORBATES_PKL_PATH class Adsorbate: diff --git a/ocdata/core/bulk.py b/ocdata/core/bulk.py index 188c7f1..3e4cb81 100644 --- a/ocdata/core/bulk.py +++ b/ocdata/core/bulk.py @@ -1,13 +1,13 @@ import os import pickle -from typing import Any, Dict, List import warnings +from typing import Any, Dict, List import ase import numpy as np -from ocdata.configs.paths import BULK_PKL_PATH from ocdata.core.slab import Slab +from ocdata.databases.pkls import BULK_PKL_PATH class Bulk: diff --git a/ocdata/core/slab.py b/ocdata/core/slab.py index c35fba8..913ec4b 100644 --- a/ocdata/core/slab.py +++ b/ocdata/core/slab.py @@ -21,8 +21,8 @@ class Slab: """ Initializes a slab object, i.e. a particular slab tiled along xyz, in one of 2 ways: - - Pass in a Bulk object and a slab 4-tuple containing - (atoms, miller, shift, top). + - Pass in a Bulk object and a slab 5-tuple containing + (atoms, miller, shift, top, oriented bulk). - Pass in a Bulk object and randomly sample a slab. Arguments @@ -48,6 +48,7 @@ def __init__( millers: tuple = None, shift: float = None, top: bool = None, + oriented_bulk: Structure = None, min_ab: float = 0.8, ): assert bulk is not None @@ -57,6 +58,7 @@ def __init__( self.millers = millers self.shift = shift self.top = top + self.oriented_bulk = oriented_bulk assert ( Composition(self.atoms.get_chemical_formula()).reduced_formula @@ -87,10 +89,12 @@ def from_bulk_get_random_slab( max_miller=max_miller, ) slab_idx = np.random.randint(len(untiled_slabs)) - unit_slab_struct, millers, shift, top = untiled_slabs[slab_idx] + unit_slab_struct, millers, shift, top, oriented_bulk = untiled_slabs[ + slab_idx + ] slab_atoms = tile_and_tag_atoms(unit_slab_struct, bulk.atoms, min_ab=min_ab) - return cls(bulk, slab_atoms, millers, shift, top) + return cls(bulk, slab_atoms, millers, shift, top, oriented_bulk) @classmethod def from_bulk_get_specific_millers( @@ -123,10 +127,11 @@ def from_bulk_get_specific_millers( s[1], s[2], s[3], + s[4], ) ) - return [cls(bulk, s[0], s[1], s[2], s[3]) for s in slabs] + return [cls(bulk, s[0], s[1], s[2], s[3], s[4]) for s in slabs] @classmethod def from_bulk_get_all_slabs( @@ -141,7 +146,13 @@ def from_bulk_get_all_slabs( slabs = [] for s in untiled_slabs: slabs.append( - (tile_and_tag_atoms(s[0], bulk.atoms, min_ab=min_ab), s[1], s[2], s[3]) + ( + tile_and_tag_atoms(s[0], bulk.atoms, min_ab=min_ab), + s[1], + s[2], + s[3], + s[4], + ) ) # if path is provided, save out the pkl @@ -150,7 +161,7 @@ def from_bulk_get_all_slabs( with open(save_path, "wb") as f: pickle.dump(slabs, f) - return [cls(bulk, s[0], s[1], s[2], s[3]) for s in slabs] + return [cls(bulk, s[0], s[1], s[2], s[3], s[4]) for s in slabs] @classmethod def from_precomputed_slabs_pkl( @@ -173,7 +184,7 @@ def from_precomputed_slabs_pkl( return slabs else: assert np.all(np.array([s[1] for s in slabs]) <= max_miller) - return [cls(bulk, s[0], s[1], s[2], s[3]) for s in slabs] + return [cls(bulk, s[0], s[1], s[2], s[3], s[4]) for s in slabs] @classmethod def from_atoms(cls, atoms: ase.Atoms = None, bulk=None, **kwargs): @@ -193,6 +204,7 @@ def get_metadata_dict(self): "millers": self.millers, "shift": self.shift, "top": self.top, + "oriented_bulk": self.oriented_bulk, }, } @@ -211,6 +223,7 @@ def __eq__(self, other): and self.millers == other.millers and self.shift == other.shift and self.top == other.top + and self.oriented_bulk == other.oriented_bulk ) @@ -496,8 +509,9 @@ def compute_slabs( Returns ------- all_slabs_info: list - A list of 4-tuples containing pymatgen structure objects for enumerated - slabs, the Miller indices, floats for the shifts, and booleans for top. + A list of 5-tuples containing pymatgen structure objects for enumerated + slabs, the Miller indices, floats for the shifts, booleans for top, and + the oriented bulk structure. """ assert bulk_atoms is not None bulk_struct = standardize_bulk(bulk_atoms) @@ -527,13 +541,16 @@ def compute_slabs( # want to consider them too. if len(slabs) != 0: flipped_slabs_info = [ - (flip_struct(slab), millers, slab.shift, False) + (flip_struct(slab), millers, slab.shift, False, slab.oriented_unit_cell) for slab in slabs if is_structure_invertible(slab) is False ] # Concatenate all the results together - slabs_info = [(slab, millers, slab.shift, True) for slab in slabs] + slabs_info = [ + (slab, millers, slab.shift, True, slab.oriented_unit_cell) + for slab in slabs + ] all_slabs_info.extend(slabs_info + flipped_slabs_info) return all_slabs_info diff --git a/ocdata/databases/pkls/__init__.py b/ocdata/databases/pkls/__init__.py index 811d8d4..9a32604 100644 --- a/ocdata/databases/pkls/__init__.py +++ b/ocdata/databases/pkls/__init__.py @@ -1,4 +1,4 @@ import os -BULK_PKL = os.path.join(__path__[0], "bulks.pkl") -ADSORBATE_PKL = os.path.join(__path__[0], "adsorbates.pkl") +BULK_PKL_PATH = os.path.join(__path__[0], "bulks.pkl") +ADSORBATES_PKL_PATH = os.path.join(__path__[0], "adsorbates.pkl") diff --git a/ocdata/databases/update.py b/ocdata/databases/update.py index a596374..ed2622d 100644 --- a/ocdata/databases/update.py +++ b/ocdata/databases/update.py @@ -2,10 +2,7 @@ Script for updating ase pkl and db files from v3.19 to v3.21. Run it with ase v3.19. """ - - import pickle -from collections import defaultdict import ase.io from ase.atoms import Atoms diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..b165102 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,54 @@ +[build-system] +requires = ["setuptools>=69"] + +[project] +name = "oc-data" +description = "Code for generating adsorbate-catalyst input configurations" +readme = "README.md" +license = {text = "MIT License"} +version = "0.2.0" +requires-python = ">=3.9, <3.13" +dependencies = [ + "scipy", + "numpy", + "ase==3.22.1", + "pymatgen", + "tqdm" +] + +[tool.setuptools.packages] +find = {namespaces = false} # Disable implicit namespaces + +[tool.setuptools_scm] # for version instrospection based on tags + commit + +[project.urls] +repository = "http://github.com/Open-Catalyst-Project/Open-Catalyst-Dataset" + +# include package data +[tool.setuptools.package-data] +"ocdata.databases.pkls" = ["*pkl"] + +[project.optional-dependencies] +dev = ["pre-commit", "pytest", "pytest-cov", "coverage", "black"] + +[tool.pytest.ini_options] +minversion = "6.0" +addopts = "-p no:warnings --import-mode importlib -x --quiet -rxXs --color yes" +filterwarnings = [ + 'ignore::UserWarning', + 'ignore::FutureWarning', + 'ignore::RuntimeWarning' + ] +testpaths = ["tests"] + +[tool.coverage.run] +source = ["ocdata"] + +[tool.isort] +profile = 'black' +skip_gitignore = true +multi_line_output=3 +include_trailing_comma = true +force_grid_wrap = 0 +use_parentheses = true +line_length = 88 diff --git a/setup.py b/setup.py deleted file mode 100644 index 065fe06..0000000 --- a/setup.py +++ /dev/null @@ -1,16 +0,0 @@ -""" -Copyright (c) Facebook, Inc. and its affiliates. -This source code is licensed under the MIT license found in the -LICENSE file in the root directory of this source tree. -""" - -from setuptools import find_packages, setup - -setup( - name="ocdata", - version="0.2.0", - description="Code for generating adsorbate-catalyst input configurations", - url="http://github.com/Open-Catalyst-Project/Open-Catalyst-Dataset", - packages=find_packages(), - include_package_data=True, -) diff --git a/tests/test_adsorbate_slab_config.py b/tests/test_adsorbate_slab_config.py index b0b9a7b..20f93da 100644 --- a/tests/test_adsorbate_slab_config.py +++ b/tests/test_adsorbate_slab_config.py @@ -6,9 +6,9 @@ from pymatgen.analysis.adsorption import AdsorbateSiteFinder from pymatgen.io.ase import AseAtomsAdaptor -from ocdata.configs.paths import ADSORBATES_PKL_PATH, BULK_PKL_PATH from ocdata.core import Adsorbate, AdsorbateSlabConfig, Bulk, Slab from ocdata.core.adsorbate_slab_config import get_interstitial_distances +from ocdata.databases.pkls import ADSORBATES_PKL_PATH, BULK_PKL_PATH @pytest.fixture(scope="class") diff --git a/tests/test_bulk.py b/tests/test_bulk.py index b8de05d..c641a53 100644 --- a/tests/test_bulk.py +++ b/tests/test_bulk.py @@ -74,7 +74,11 @@ def test_unique_slab_enumeration(self): assert slab not in seen seen.append(slab) - # pymatgen-2023.5.10 + ase 3.22.1 + # pymatgen bug see https://github.com/materialsproject/pymatgen/issues/3747 + if len(slabs) == 15: + pytest.xfail( + f"Number of generated slabs {len(slabs)} is off due to pymatgen bug!" + ) assert len(slabs) == 14 with open(self.precomputed_path, "wb") as f: @@ -86,7 +90,12 @@ def test_precomputed_slab(self): precomputed_slabs = self.bulk.get_slabs( precomputed_slabs_dir=precomputed_slabs_dir ) - # pymatgen-2023.5.10 + ase 3.22.1 + + if len(precomputed_slabs) == 15: + pytest.xfail( + f"Number of generated slabs {len(precomputed_slabs)} is off due to pymatgen bug!" + ) + assert len(precomputed_slabs) == 14 slabs = self.bulk.get_slabs() diff --git a/tests/test_multi_adsorbate_slab_config.py b/tests/test_multi_adsorbate_slab_config.py index a756db5..b7ed9cd 100644 --- a/tests/test_multi_adsorbate_slab_config.py +++ b/tests/test_multi_adsorbate_slab_config.py @@ -7,9 +7,9 @@ from pymatgen.analysis.adsorption import AdsorbateSiteFinder from pymatgen.io.ase import AseAtomsAdaptor -from ocdata.configs.paths import ADSORBATES_PKL_PATH, BULK_PKL_PATH from ocdata.core import Adsorbate, Bulk, MultipleAdsorbateSlabConfig, Slab from ocdata.core.adsorbate_slab_config import get_interstitial_distances +from ocdata.databases.pkls import ADSORBATES_PKL_PATH, BULK_PKL_PATH @pytest.fixture(scope="class") diff --git a/tests/test_slab.py b/tests/test_slab.py index 9c97b5a..41b0567 100644 --- a/tests/test_slab.py +++ b/tests/test_slab.py @@ -1,6 +1,7 @@ import random import numpy as np +import pytest from ocdata.core import Bulk, Slab @@ -32,4 +33,4 @@ def test_slab_init_random(self): assert slab.atoms.get_chemical_formula() == "Sn48" assert slab.millers == (2, 1, 0) - assert slab.shift == 0.0833333333333334 + assert slab.shift == pytest.approx(0.0833333333333334)