From 8a7cf816c99ee3a3f6ac183070e7b883c039a2f9 Mon Sep 17 00:00:00 2001 From: Roberto Rossini <71787608+robomics@users.noreply.github.com> Date: Tue, 1 Aug 2023 17:10:10 +0200 Subject: [PATCH 1/5] Restructure project --- src/CMakeLists.txt | 21 +++++--- src/hictkpy.cpp | 8 +-- ...{hictkpy_cooler.hpp => hictkpy_cooler.cpp} | 50 +++++++++--------- src/{hictkpy_file.hpp => hictkpy_file.cpp} | 39 +++++++------- src/hictkpy_hic.cpp | 52 +++++++++++++++++++ src/hictkpy_hic.hpp | 51 ------------------ src/{ => include/hictkpy}/common.hpp | 0 src/include/hictkpy/cooler.hpp | 46 ++++++++++++++++ src/include/hictkpy/file.hpp | 37 +++++++++++++ src/include/hictkpy/hic.hpp | 37 +++++++++++++ 10 files changed, 235 insertions(+), 106 deletions(-) rename src/{hictkpy_cooler.hpp => hictkpy_cooler.cpp} (62%) rename src/{hictkpy_file.hpp => hictkpy_file.cpp} (54%) create mode 100644 src/hictkpy_hic.cpp delete mode 100644 src/hictkpy_hic.hpp rename src/{ => include/hictkpy}/common.hpp (100%) create mode 100644 src/include/hictkpy/cooler.hpp create mode 100644 src/include/hictkpy/file.hpp create mode 100644 src/include/hictkpy/hic.hpp diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 3c3cada..7fb6638 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -7,16 +7,23 @@ find_package( COMPONENTS Interpreter Development.Module REQUIRED) -# For some reason linking to std::filesystem breaks cibw builds for Apple Silicon -# find_package(Filesystem REQUIRED) +# For some reason linking to std::filesystem breaks cibw builds for Apple Silicon find_package(Filesystem REQUIRED) find_package(pybind11 CONFIG REQUIRED) -pybind11_add_module(hictkpy MODULE hictkpy.cpp) +pybind11_add_module( + hictkpy + MODULE + hictkpy.cpp + hictkpy_cooler.cpp + hictkpy_file.cpp + hictkpy_hic.cpp) +target_include_directories(hictkpy PRIVATE include) target_link_libraries( hictkpy - PRIVATE hictkpy_project_options hictkpy_project_warnings - PUBLIC hictk::cooler - hictk::file - hictk::hic) + PRIVATE hictkpy_project_options + hictkpy_project_warnings + hictk::cooler + hictk::file + hictk::hic) diff --git a/src/hictkpy.cpp b/src/hictkpy.cpp index 54229b5..eb50b69 100644 --- a/src/hictkpy.cpp +++ b/src/hictkpy.cpp @@ -4,14 +4,14 @@ #include -#include "./common.hpp" -#include "./hictkpy_cooler.hpp" -#include "./hictkpy_file.hpp" -#include "./hictkpy_hic.hpp" #include "hictk/cooler/cooler.hpp" #include "hictk/file.hpp" #include "hictk/hic.hpp" #include "hictk/hic/utils.hpp" +#include "hictkpy/common.hpp" +#include "hictkpy/cooler.hpp" +#include "hictkpy/file.hpp" +#include "hictkpy/hic.hpp" namespace hictkpy { diff --git a/src/hictkpy_cooler.hpp b/src/hictkpy_cooler.cpp similarity index 62% rename from src/hictkpy_cooler.hpp rename to src/hictkpy_cooler.cpp index 993becf..8e313cb 100644 --- a/src/hictkpy_cooler.hpp +++ b/src/hictkpy_cooler.cpp @@ -2,8 +2,6 @@ // // SPDX-License-Identifier: MIT -#pragma once - #include #include @@ -11,16 +9,18 @@ #include #include "hictk/cooler/cooler.hpp" +#include "hictk/reference.hpp" +#include "hictkpy/common.hpp" +#include "hictkpy/cooler.hpp" -// This is fine, this header is only supposed to be included in hictkpy.cpp namespace py = pybind11; namespace hictkpy::cooler { -inline hictk::cooler::File file_ctor(std::string_view uri) { return hictk::cooler::File(uri); } +hictk::cooler::File file_ctor(std::string_view uri) { return hictk::cooler::File(uri); } -inline hictk::cooler::File file_ctor(std::string_view uri, const py::dict &py_chroms, - std::uint32_t bin_size, bool overwrite_if_exists = false) { +hictk::cooler::File file_ctor(std::string_view uri, const py::dict &py_chroms, + std::uint32_t bin_size, bool overwrite_if_exists) { std::vector chrom_names{}; std::vector chrom_sizes{}; @@ -32,11 +32,11 @@ inline hictk::cooler::File file_ctor(std::string_view uri, const py::dict &py_ch return hictk::cooler::File::create(uri, chroms, bin_size, overwrite_if_exists); } -inline bool is_cooler(std::string_view uri) { return bool(hictk::cooler::utils::is_cooler(uri)); } +bool is_cooler(std::string_view uri) { return bool(hictk::cooler::utils::is_cooler(uri)); } -inline hictk::cooler::File cooler_ctor(std::string_view uri, const py::dict &py_chroms, - std::uint32_t bin_size, bool overwrite_if_exists = false, - bool float_pixels = false) { +hictk::cooler::File cooler_ctor(std::string_view uri, const py::dict &py_chroms, + std::uint32_t bin_size, bool overwrite_if_exists, + bool float_pixels) { std::vector chrom_names{}; std::vector chrom_sizes{}; @@ -51,7 +51,7 @@ inline hictk::cooler::File cooler_ctor(std::string_view uri, const py::dict &py_ return hictk::cooler::File::create(uri, chroms, bin_size, overwrite_if_exists); } -[[nodiscard]] inline py::dict get_cooler_attrs(const hictk::cooler::File &clr) { +[[nodiscard]] py::dict get_cooler_attrs(const hictk::cooler::File &clr) { py::dict py_attrs; const auto &attrs = clr.attributes(); @@ -103,32 +103,32 @@ inline hictk::cooler::File cooler_ctor(std::string_view uri, const py::dict &py_ return py_attrs; } -inline py::object fetch(const hictk::cooler::File &f, std::string_view range1, - std::string_view range2, std::string_view normalization, - std::string_view count_type, bool join, std::string_view query_type) { +py::object fetch(const hictk::cooler::File &f, std::string_view range1, std::string_view range2, + std::string_view normalization, std::string_view count_type, bool join, + std::string_view query_type) { return file_fetch(f, range1, range2, normalization, count_type, join, query_type); } -inline py::object fetch_sparse(const hictk::cooler::File &f, std::string_view range1, - std::string_view range2, std::string_view normalization, - std::string_view count_type, std::string_view query_type) { +py::object fetch_sparse(const hictk::cooler::File &f, std::string_view range1, + std::string_view range2, std::string_view normalization, + std::string_view count_type, std::string_view query_type) { return file_fetch_sparse(f, range1, range2, normalization, count_type, query_type); } -inline py::object fetch_dense(const hictk::cooler::File &f, std::string_view range1, - std::string_view range2, std::string_view normalization, - std::string_view count_type, std::string_view query_type) { +py::object fetch_dense(const hictk::cooler::File &f, std::string_view range1, + std::string_view range2, std::string_view normalization, + std::string_view count_type, std::string_view query_type) { return file_fetch_dense(f, range1, range2, normalization, count_type, query_type); } -inline py::object fetch_sum(const hictk::cooler::File &f, std::string_view range1, - std::string_view range2, std::string_view normalization, - std::string_view count_type, std::string_view query_type) { +py::object fetch_sum(const hictk::cooler::File &f, std::string_view range1, std::string_view range2, + std::string_view normalization, std::string_view count_type, + std::string_view query_type) { return file_fetch_sum(f, range1, range2, normalization, count_type, query_type); } -inline std::int64_t fetch_nnz(const hictk::cooler::File &f, std::string_view range1, - std::string_view range2, std::string_view query_type) { +std::int64_t fetch_nnz(const hictk::cooler::File &f, std::string_view range1, + std::string_view range2, std::string_view query_type) { return file_fetch_nnz(f, range1, range2, query_type); } } // namespace hictkpy::cooler diff --git a/src/hictkpy_file.hpp b/src/hictkpy_file.cpp similarity index 54% rename from src/hictkpy_file.hpp rename to src/hictkpy_file.cpp index c61b500..e05e0fe 100644 --- a/src/hictkpy_file.hpp +++ b/src/hictkpy_file.cpp @@ -2,26 +2,27 @@ // // SPDX-License-Identifier: MIT -#pragma once - #include #include #include +#include -#include "./common.hpp" #include "hictk/file.hpp" +#include "hictkpy/file.hpp" + +namespace py = pybind11; namespace hictkpy::file { -[[nodiscard]] hictk::File ctor(std::string_view path, std::int32_t resolution, - std::string_view matrix_type, std::string_view matrix_unit) { +hictk::File ctor(std::string_view path, std::int32_t resolution, std::string_view matrix_type, + std::string_view matrix_unit) { return hictk::File{std::string{path}, static_cast(resolution), hictk::hic::ParseMatrixTypeStr(std::string{matrix_type}), hictk::hic::ParseUnitStr(std::string{matrix_unit})}; } -inline py::object fetch(const hictk::File &f, std::string_view range1, std::string_view range2, - std::string_view normalization, std::string_view count_type, bool join, - std::string_view query_type) { +py::object fetch(const hictk::File &f, std::string_view range1, std::string_view range2, + std::string_view normalization, std::string_view count_type, bool join, + std::string_view query_type) { return std::visit( [&](const auto &ff) -> py::object { return file_fetch(ff, range1, range2, normalization, count_type, join, query_type); @@ -29,9 +30,9 @@ inline py::object fetch(const hictk::File &f, std::string_view range1, std::stri f.get()); } -inline py::object fetch_sparse(const hictk::File &f, std::string_view range1, - std::string_view range2, std::string_view normalization, - std::string_view count_type, std::string_view query_type) { +py::object fetch_sparse(const hictk::File &f, std::string_view range1, std::string_view range2, + std::string_view normalization, std::string_view count_type, + std::string_view query_type) { return std::visit( [&](const auto &ff) -> py::object { return file_fetch_sparse(ff, range1, range2, normalization, count_type, query_type); @@ -39,9 +40,9 @@ inline py::object fetch_sparse(const hictk::File &f, std::string_view range1, f.get()); } -inline py::object fetch_dense(const hictk::File &f, std::string_view range1, - std::string_view range2, std::string_view normalization, - std::string_view count_type, std::string_view query_type) { +py::object fetch_dense(const hictk::File &f, std::string_view range1, std::string_view range2, + std::string_view normalization, std::string_view count_type, + std::string_view query_type) { return std::visit( [&](const auto &ff) -> py::object { return file_fetch_dense(ff, range1, range2, normalization, count_type, query_type); @@ -49,9 +50,9 @@ inline py::object fetch_dense(const hictk::File &f, std::string_view range1, f.get()); } -inline py::object fetch_sum(const hictk::File &f, std::string_view range1, std::string_view range2, - std::string_view normalization, std::string_view count_type, - std::string_view query_type) { +py::object fetch_sum(const hictk::File &f, std::string_view range1, std::string_view range2, + std::string_view normalization, std::string_view count_type, + std::string_view query_type) { return std::visit( [&](const auto &ff) -> py::object { return file_fetch_sum(ff, range1, range2, normalization, count_type, query_type); @@ -59,8 +60,8 @@ inline py::object fetch_sum(const hictk::File &f, std::string_view range1, std:: f.get()); } -inline std::int64_t fetch_nnz(const hictk::File &f, std::string_view range1, - std::string_view range2, std::string_view query_type) { +std::int64_t fetch_nnz(const hictk::File &f, std::string_view range1, std::string_view range2, + std::string_view query_type) { return std::visit([&](const auto &ff) { return file_fetch_nnz(ff, range1, range2, query_type); }, f.get()); } diff --git a/src/hictkpy_hic.cpp b/src/hictkpy_hic.cpp new file mode 100644 index 0000000..50d4ebb --- /dev/null +++ b/src/hictkpy_hic.cpp @@ -0,0 +1,52 @@ +// Copyright (C) 2023 Roberto Rossini +// +// SPDX-License-Identifier: MIT + +#include "hictkpy/hic.hpp" + +#include +#include +#include + +#include "hictk/hic.hpp" +#include "hictkpy/common.hpp" + +namespace py = pybind11; + +namespace hictkpy::hic { +hictk::hic::File file_ctor(std::string_view path, std::int32_t resolution, + std::string_view matrix_type, std::string_view matrix_unit) { + return hictk::hic::File{std::string{path}, static_cast(resolution), + hictk::hic::ParseMatrixTypeStr(std::string{matrix_type}), + hictk::hic::ParseUnitStr(std::string{matrix_unit})}; +} + +py::object fetch(const hictk::hic::File &f, std::string_view range1, std::string_view range2, + std::string_view normalization, std::string_view count_type, bool join, + std::string_view query_type) { + return file_fetch(f, range1, range2, normalization, count_type, join, query_type); +} + +py::object fetch_sparse(const hictk::hic::File &f, std::string_view range1, std::string_view range2, + std::string_view normalization, std::string_view count_type, + std::string_view query_type) { + return file_fetch_sparse(f, range1, range2, normalization, count_type, query_type); +} + +py::object fetch_dense(const hictk::hic::File &f, std::string_view range1, std::string_view range2, + std::string_view normalization, std::string_view count_type, + std::string_view query_type) { + return file_fetch_dense(f, range1, range2, normalization, count_type, query_type); +} + +py::object fetch_sum(const hictk::hic::File &f, std::string_view range1, std::string_view range2, + std::string_view normalization, std::string_view count_type, + std::string_view query_type) { + return file_fetch_sum(f, range1, range2, normalization, count_type, query_type); +} + +std::int64_t fetch_nnz(const hictk::hic::File &f, std::string_view range1, std::string_view range2, + std::string_view query_type) { + return file_fetch_nnz(f, range1, range2, query_type); +} +} // namespace hictkpy::hic diff --git a/src/hictkpy_hic.hpp b/src/hictkpy_hic.hpp deleted file mode 100644 index 5857da7..0000000 --- a/src/hictkpy_hic.hpp +++ /dev/null @@ -1,51 +0,0 @@ -// Copyright (C) 2023 Roberto Rossini -// -// SPDX-License-Identifier: MIT - -#pragma once - -#include -#include -#include - -#include "./common.hpp" -#include "hictk/hic.hpp" - -namespace hictkpy::hic { -[[nodiscard]] hictk::hic::File file_ctor(std::string_view path, std::int32_t resolution, - std::string_view matrix_type, - std::string_view matrix_unit) { - return hictk::hic::File{std::string{path}, static_cast(resolution), - hictk::hic::ParseMatrixTypeStr(std::string{matrix_type}), - hictk::hic::ParseUnitStr(std::string{matrix_unit})}; -} - -inline py::object fetch(const hictk::hic::File &f, std::string_view range1, std::string_view range2, - std::string_view normalization, std::string_view count_type, bool join, - std::string_view query_type) { - return file_fetch(f, range1, range2, normalization, count_type, join, query_type); -} - -inline py::object fetch_sparse(const hictk::hic::File &f, std::string_view range1, - std::string_view range2, std::string_view normalization, - std::string_view count_type, std::string_view query_type) { - return file_fetch_sparse(f, range1, range2, normalization, count_type, query_type); -} - -inline py::object fetch_dense(const hictk::hic::File &f, std::string_view range1, - std::string_view range2, std::string_view normalization, - std::string_view count_type, std::string_view query_type) { - return file_fetch_dense(f, range1, range2, normalization, count_type, query_type); -} - -inline py::object fetch_sum(const hictk::hic::File &f, std::string_view range1, - std::string_view range2, std::string_view normalization, - std::string_view count_type, std::string_view query_type) { - return file_fetch_sum(f, range1, range2, normalization, count_type, query_type); -} - -inline std::int64_t fetch_nnz(const hictk::hic::File &f, std::string_view range1, - std::string_view range2, std::string_view query_type) { - return file_fetch_nnz(f, range1, range2, query_type); -} -} // namespace hictkpy::hic diff --git a/src/common.hpp b/src/include/hictkpy/common.hpp similarity index 100% rename from src/common.hpp rename to src/include/hictkpy/common.hpp diff --git a/src/include/hictkpy/cooler.hpp b/src/include/hictkpy/cooler.hpp new file mode 100644 index 0000000..43acced --- /dev/null +++ b/src/include/hictkpy/cooler.hpp @@ -0,0 +1,46 @@ +// Copyright (C) 2023 Roberto Rossini +// +// SPDX-License-Identifier: MIT + +#pragma once + +#include + +#include +#include +#include +#include + +#include "hictk/cooler/cooler.hpp" + +namespace hictkpy::cooler { + +hictk::cooler::File file_ctor(std::string_view uri); + +hictk::cooler::File file_ctor(std::string_view uri, const pybind11::dict &py_chroms, + std::uint32_t bin_size, bool overwrite_if_exists = false); +bool is_cooler(std::string_view uri); + +hictk::cooler::File cooler_ctor(std::string_view uri, const pybind11::dict &py_chroms, + std::uint32_t bin_size, bool overwrite_if_exists = false, + bool float_pixels = false); +[[nodiscard]] pybind11::dict get_cooler_attrs(const hictk::cooler::File &clr); + +pybind11::object fetch(const hictk::cooler::File &f, std::string_view range1, + std::string_view range2, std::string_view normalization, + std::string_view count_type, bool join, std::string_view query_type); +pybind11::object fetch_sparse(const hictk::cooler::File &f, std::string_view range1, + std::string_view range2, std::string_view normalization, + std::string_view count_type, std::string_view query_type); + +pybind11::object fetch_dense(const hictk::cooler::File &f, std::string_view range1, + std::string_view range2, std::string_view normalization, + std::string_view count_type, std::string_view query_type); + +pybind11::object fetch_sum(const hictk::cooler::File &f, std::string_view range1, + std::string_view range2, std::string_view normalization, + std::string_view count_type, std::string_view query_type); +std::int64_t fetch_nnz(const hictk::cooler::File &f, std::string_view range1, + std::string_view range2, std::string_view query_type); + +} // namespace hictkpy::cooler diff --git a/src/include/hictkpy/file.hpp b/src/include/hictkpy/file.hpp new file mode 100644 index 0000000..ec9ffa5 --- /dev/null +++ b/src/include/hictkpy/file.hpp @@ -0,0 +1,37 @@ +// Copyright (C) 2023 Roberto Rossini +// +// SPDX-License-Identifier: MIT + +#pragma once + +#include +#include +#include + +#include "hictk/file.hpp" +#include "hictkpy/common.hpp" + +namespace hictkpy::file { +[[nodiscard]] hictk::File ctor(std::string_view path, std::int32_t resolution, + std::string_view matrix_type, std::string_view matrix_unit); + +pybind11::object fetch(const hictk::File &f, std::string_view range1, std::string_view range2, + std::string_view normalization, std::string_view count_type, bool join, + std::string_view query_type); + +pybind11::object fetch_sparse(const hictk::File &f, std::string_view range1, + std::string_view range2, std::string_view normalization, + std::string_view count_type, std::string_view query_type); + +pybind11::object fetch_dense(const hictk::File &f, std::string_view range1, std::string_view range2, + std::string_view normalization, std::string_view count_type, + std::string_view query_type); + +pybind11::object fetch_sum(const hictk::File &f, std::string_view range1, std::string_view range2, + std::string_view normalization, std::string_view count_type, + std::string_view query_type); + +std::int64_t fetch_nnz(const hictk::File &f, std::string_view range1, std::string_view range2, + std::string_view query_type); + +} // namespace hictkpy::file diff --git a/src/include/hictkpy/hic.hpp b/src/include/hictkpy/hic.hpp new file mode 100644 index 0000000..c654ba5 --- /dev/null +++ b/src/include/hictkpy/hic.hpp @@ -0,0 +1,37 @@ +// Copyright (C) 2023 Roberto Rossini +// +// SPDX-License-Identifier: MIT + +#pragma once + +#include +#include +#include + +#include "hictk/hic.hpp" +#include "hictkpy/common.hpp" + +namespace hictkpy::hic { +[[nodiscard]] hictk::hic::File file_ctor(std::string_view path, std::int32_t resolution, + std::string_view matrix_type, + std::string_view matrix_unit); + +pybind11::object fetch(const hictk::hic::File &f, std::string_view range1, std::string_view range2, + std::string_view normalization, std::string_view count_type, bool join, + std::string_view query_type); + +pybind11::object fetch_sparse(const hictk::hic::File &f, std::string_view range1, + std::string_view range2, std::string_view normalization, + std::string_view count_type, std::string_view query_type); + +pybind11::object fetch_dense(const hictk::hic::File &f, std::string_view range1, + std::string_view range2, std::string_view normalization, + std::string_view count_type, std::string_view query_type); + +pybind11::object fetch_sum(const hictk::hic::File &f, std::string_view range1, + std::string_view range2, std::string_view normalization, + std::string_view count_type, std::string_view query_type); + +std::int64_t fetch_nnz(const hictk::hic::File &f, std::string_view range1, std::string_view range2, + std::string_view query_type); +} // namespace hictkpy::hic From b66fd61fffc185b1547abc8bf1d8613d5060ef14 Mon Sep 17 00:00:00 2001 From: Roberto Rossini <71787608+robomics@users.noreply.github.com> Date: Thu, 3 Aug 2023 16:15:27 +0200 Subject: [PATCH 2/5] Refactor --- src/CMakeLists.txt | 3 +- src/hictkpy.cpp | 171 ++++++++---------- src/hictkpy_cooler.cpp | 134 -------------- src/hictkpy_file.cpp | 73 ++++---- src/hictkpy_hic.cpp | 52 ------ src/hictkpy_pixel_selector.cpp | 231 +++++++++++++++++++++++++ src/include/hictkpy/cooler.hpp | 46 ----- src/include/hictkpy/file.hpp | 23 +-- src/include/hictkpy/hic.hpp | 37 ---- src/include/hictkpy/pixel_selector.hpp | 64 +++++++ test/test_fetch.py | 91 ---------- test/test_fetch_accessors.py | 43 ++--- test/test_fetch_dense.py | 94 ++++------ test/test_fetch_df.py | 79 +++++++++ test/test_fetch_nnz.py | 37 ++-- test/test_fetch_sparse.py | 94 +++++----- test/test_fetch_sum.py | 36 ++-- 17 files changed, 598 insertions(+), 710 deletions(-) delete mode 100644 src/hictkpy_cooler.cpp delete mode 100644 src/hictkpy_hic.cpp create mode 100644 src/hictkpy_pixel_selector.cpp delete mode 100644 src/include/hictkpy/cooler.hpp delete mode 100644 src/include/hictkpy/hic.hpp create mode 100644 src/include/hictkpy/pixel_selector.hpp delete mode 100644 test/test_fetch.py create mode 100644 test/test_fetch_df.py diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 7fb6638..a6cf62c 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -15,9 +15,8 @@ pybind11_add_module( hictkpy MODULE hictkpy.cpp - hictkpy_cooler.cpp hictkpy_file.cpp - hictkpy_hic.cpp) + hictkpy_pixel_selector.cpp) target_include_directories(hictkpy PRIVATE include) target_link_libraries( diff --git a/src/hictkpy.cpp b/src/hictkpy.cpp index eb50b69..93947d6 100644 --- a/src/hictkpy.cpp +++ b/src/hictkpy.cpp @@ -2,6 +2,7 @@ // // SPDX-License-Identifier: MIT +#include #include #include "hictk/cooler/cooler.hpp" @@ -9,96 +10,77 @@ #include "hictk/hic.hpp" #include "hictk/hic/utils.hpp" #include "hictkpy/common.hpp" -#include "hictkpy/cooler.hpp" #include "hictkpy/file.hpp" -#include "hictkpy/hic.hpp" +#include "hictkpy/pixel_selector.hpp" +namespace py = pybind11; namespace hictkpy { -static pybind11::module_ declare_cooler_submodule(pybind11::module_ &m) { - auto cooler = m.def_submodule("cooler"); - auto cooler_utils = cooler.def_submodule("utils"); - - cooler_utils.def("is_cooler", &cooler::is_cooler, "test whether path points to a cooler file"); - - auto cooler_file = - py::class_(cooler, "File") - .def(py::init(py::overload_cast(cooler::file_ctor)), py::arg("uri")) - .def(py::init(py::overload_cast( - cooler::file_ctor)), - py::arg("uri"), py::arg("chromosomes"), py::arg("bin_size"), - py::arg("overwrite_if_exists")); - - cooler_file.def("uri", &hictk::cooler::File::uri); - cooler_file.def("hdf5_path", &hictk::cooler::File::hdf5_path); - cooler_file.def("path", &hictk::cooler::File::path); - - cooler_file.def("bin_size", &hictk::cooler::File::bin_size); - cooler_file.def("nbins", &hictk::cooler::File::nbins); - cooler_file.def("nchroms", &hictk::cooler::File::nchroms); - cooler_file.def("nnz", &hictk::cooler::File::nnz); - - cooler_file.def("chromosomes", &get_chromosomes_from_file, - py::arg("include_all") = false); - cooler_file.def("bins", &get_bins_from_file); - cooler_file.def("attributes", &cooler::get_cooler_attrs); - - cooler_file.def("fetch", &cooler::fetch, py::arg("range1") = "", py::arg("range2") = "", - py::arg("normalization") = "NONE", py::arg("count_type") = "int", - py::arg("join") = false, py::arg("query_type") = "UCSC"); - cooler_file.def("fetch_sparse", &cooler::fetch_sparse, py::arg("range1") = "", - py::arg("range2") = "", py::arg("normalization") = "NONE", - py::arg("count_type") = "int", py::arg("query_type") = "UCSC"); - cooler_file.def("fetch_dense", &cooler::fetch_dense, py::arg("range1") = "", - py::arg("range2") = "", py::arg("normalization") = "NONE", - py::arg("count_type") = "int", py::arg("query_type") = "UCSC"); - cooler_file.def("fetch_sum", &cooler::fetch_sum, py::arg("range1") = "", py::arg("range2") = "", - py::arg("normalization") = "NONE", py::arg("count_type") = "int", - py::arg("query_type") = "UCSC"); - cooler_file.def("fetch_nnz", &cooler::fetch_nnz, py::arg("range1") = "", py::arg("range2") = "", - py::arg("query_type") = "UCSC"); - - return cooler; +template +static void declare_thin_pixel_class(pybind11::module_ &m, const std::string &suffix) { + const auto type_name = std::string{"ThinPixel"} + suffix; + py::class_>(m, type_name.c_str()) + .def_property_readonly("bin1_id", [](const hictk::ThinPixel &tp) { return tp.bin1_id; }) + .def_property_readonly("bin2_id", [](const hictk::ThinPixel &tp) { return tp.bin2_id; }) + .def_property_readonly("count", [](const hictk::ThinPixel &tp) { return tp.count; }) + .def("__repr__", + [](const hictk::ThinPixel &tp) { + return fmt::format(FMT_COMPILE("bin1_id={}; bin2_id={}; count={};"), tp.bin1_id, + tp.bin2_id, tp.count); + }) + .def("__str__", [](const hictk::ThinPixel &tp) { + return fmt::format(FMT_COMPILE("{}\t{}\t{}"), tp.bin1_id, tp.bin2_id, tp.count); + }); +} + +template +static void declare_pixel_class(pybind11::module_ &m, const std::string &suffix) { + const auto type_name = std::string{"Pixel"} + suffix; + py::class_>(m, type_name.c_str()) + .def_property_readonly("bin1_id", [](const hictk::Pixel &p) { return p.coords.bin1.id(); }) + .def_property_readonly("bin2_id", [](const hictk::Pixel &p) { return p.coords.bin2.id(); }) + .def_property_readonly("rel_bin1_id", + [](const hictk::Pixel &p) { return p.coords.bin1.rel_id(); }) + .def_property_readonly("rel_bin2_id", + [](const hictk::Pixel &p) { return p.coords.bin2.rel_id(); }) + .def_property_readonly("count", [](const hictk::Pixel &p) { return p.count; }) + .def("__repr__", + [](const hictk::Pixel &p) { + return fmt::format( + FMT_COMPILE("chrom1={}; start1={}; end1={}; chrom2={}; start2={}; end2={};"), + p.coords.bin1.chrom().name(), p.coords.bin1.start(), p.coords.bin1.end(), + p.coords.bin2.chrom().name(), p.coords.bin2.start(), p.coords.bin2.end(), p.count); + }) + .def("__str__", [](const hictk::Pixel &p) { + return fmt::format(FMT_COMPILE("{}\t{}\t{}\t{}\t{}\t{}"), p.coords.bin1.chrom().name(), + p.coords.bin1.start(), p.coords.bin1.end(), p.coords.bin2.chrom().name(), + p.coords.bin2.start(), p.coords.bin2.end(), p.count); + }); } -static pybind11::module_ declare_hic_submodule(pybind11::module_ &m) { - auto hic = m.def_submodule("hic"); - auto hic_utils = hic.def_submodule("utils"); - - hic_utils.def("is_hic_file", &hictk::hic::utils::is_hic_file, - "test whether path points to a .hic file"); - - auto hic_file = py::class_(hic, "File") - .def(py::init(&hic::file_ctor), py::arg("path"), py::arg("resolution"), - py::arg("matrix_type") = "observed", py::arg("matrix_unit") = "BP"); - - hic_file.def("path", &hictk::hic::File::url); - hic_file.def("name", &hictk::hic::File::name); - hic_file.def("version", &hictk::hic::File::version); - - hic_file.def("bin_size", &hictk::hic::File::resolution); - hic_file.def("nbins", &hictk::hic::File::nbins); - hic_file.def("nchroms", &hictk::hic::File::nchroms); - - hic_file.def("chromosomes", &get_chromosomes_from_file, - py::arg("include_all") = false); - hic_file.def("bins", &get_bins_from_file); - - hic_file.def("fetch", &hic::fetch, py::arg("range1") = "", py::arg("range2") = "", - py::arg("normalization") = "NONE", py::arg("count_type") = "int", - py::arg("join") = false, py::arg("query_type") = "UCSC"); - hic_file.def("fetch_sparse", &hic::fetch_sparse, py::arg("range1") = "", py::arg("range2") = "", - py::arg("normalization") = "NONE", py::arg("count_type") = "int", - py::arg("query_type") = "UCSC"); - hic_file.def("fetch_dense", &hic::fetch_dense, py::arg("range1") = "", py::arg("range2") = "", - py::arg("normalization") = "NONE", py::arg("count_type") = "int", - py::arg("query_type") = "UCSC"); - hic_file.def("fetch_sum", &hic::fetch_sum, py::arg("range1") = "", py::arg("range2") = "", - py::arg("normalization") = "NONE", py::arg("count_type") = "int", - py::arg("query_type") = "UCSC"); - hic_file.def("fetch_nnz", &hic::fetch_nnz, py::arg("range1") = "", py::arg("range2") = "", - py::arg("query_type") = "UCSC"); - return hic; +static void declare_pixel_selector_class(pybind11::module_ &m) { + auto sel = + py::class_(m, "PixelSelector") + .def(py::init, std::string_view, + bool>(), + py::arg("selector"), py::arg("type"), py::arg("join")) + .def(py::init, std::string_view, bool>(), + py::arg("selector"), py::arg("type"), py::arg("join")) + .def(py::init, std::string_view, + bool>(), + py::arg("selector"), py::arg("type"), py::arg("join")); + + sel.def("coord1", &PixelSelector::get_coord1); + sel.def("coord2", &PixelSelector::get_coord2); + + sel.def("__iter__", &PixelSelector::make_iterable, py::keep_alive<0, 1>()); + + sel.def("to_df", &PixelSelector::to_df); + sel.def("to_numpy", &PixelSelector::to_numpy); + sel.def("to_coo", &PixelSelector::to_coo); + + sel.def("nnz", &PixelSelector::nnz); + sel.def("sum", &PixelSelector::sum); } static void declare_file_class(pybind11::module_ &m) { @@ -122,17 +104,6 @@ static void declare_file_class(pybind11::module_ &m) { file.def("fetch", &file::fetch, py::arg("range1") = "", py::arg("range2") = "", py::arg("normalization") = "NONE", py::arg("count_type") = "int", py::arg("join") = false, py::arg("query_type") = "UCSC"); - file.def("fetch_sparse", &file::fetch_sparse, py::arg("range1") = "", py::arg("range2") = "", - py::arg("normalization") = "NONE", py::arg("count_type") = "int", - py::arg("query_type") = "UCSC"); - file.def("fetch_dense", &file::fetch_dense, py::arg("range1") = "", py::arg("range2") = "", - py::arg("normalization") = "NONE", py::arg("count_type") = "int", - py::arg("query_type") = "UCSC"); - file.def("fetch_sum", &file::fetch_sum, py::arg("range1") = "", py::arg("range2") = "", - py::arg("normalization") = "NONE", py::arg("count_type") = "int", - py::arg("query_type") = "UCSC"); - file.def("fetch_nnz", &file::fetch_nnz, py::arg("range1") = "", py::arg("range2") = "", - py::arg("query_type") = "UCSC"); } namespace py = pybind11; @@ -146,9 +117,15 @@ PYBIND11_MODULE(hictkpy, m) { m.doc() = "Blazing fast toolkit to work with .hic and .cool files"; - declare_cooler_submodule(m); - declare_hic_submodule(m); + m.def("is_cooler", &file::is_cooler, "test whether path points to a cooler file"); + m.def("is_hic_file", &hictk::hic::utils::is_hic_file, "test whether path points to a .hic file"); + declare_file_class(m); + declare_pixel_selector_class(m); + declare_thin_pixel_class(m, "Int"); + declare_thin_pixel_class(m, "FP"); + declare_pixel_class(m, "Int"); + declare_pixel_class(m, "FP"); } } // namespace hictkpy diff --git a/src/hictkpy_cooler.cpp b/src/hictkpy_cooler.cpp deleted file mode 100644 index 8e313cb..0000000 --- a/src/hictkpy_cooler.cpp +++ /dev/null @@ -1,134 +0,0 @@ -// Copyright (C) 2023 Roberto Rossini -// -// SPDX-License-Identifier: MIT - -#include - -#include -#include -#include - -#include "hictk/cooler/cooler.hpp" -#include "hictk/reference.hpp" -#include "hictkpy/common.hpp" -#include "hictkpy/cooler.hpp" - -namespace py = pybind11; - -namespace hictkpy::cooler { - -hictk::cooler::File file_ctor(std::string_view uri) { return hictk::cooler::File(uri); } - -hictk::cooler::File file_ctor(std::string_view uri, const py::dict &py_chroms, - std::uint32_t bin_size, bool overwrite_if_exists) { - std::vector chrom_names{}; - std::vector chrom_sizes{}; - - for (auto it : py_chroms) { - chrom_names.push_back(py::cast(it.first)); - chrom_sizes.push_back(py::cast(it.second)); - } - const hictk::Reference chroms(chrom_names.begin(), chrom_names.end(), chrom_sizes.begin()); - return hictk::cooler::File::create(uri, chroms, bin_size, overwrite_if_exists); -} - -bool is_cooler(std::string_view uri) { return bool(hictk::cooler::utils::is_cooler(uri)); } - -hictk::cooler::File cooler_ctor(std::string_view uri, const py::dict &py_chroms, - std::uint32_t bin_size, bool overwrite_if_exists, - bool float_pixels) { - std::vector chrom_names{}; - std::vector chrom_sizes{}; - - for (auto it : py_chroms) { - chrom_names.push_back(py::cast(it.first)); - chrom_sizes.push_back(py::cast(it.second)); - } - const hictk::Reference chroms(chrom_names.begin(), chrom_names.end(), chrom_sizes.begin()); - if (float_pixels) { - return hictk::cooler::File::create(uri, chroms, bin_size, overwrite_if_exists); - } - return hictk::cooler::File::create(uri, chroms, bin_size, overwrite_if_exists); -} - -[[nodiscard]] py::dict get_cooler_attrs(const hictk::cooler::File &clr) { - py::dict py_attrs; - const auto &attrs = clr.attributes(); - - py_attrs["bin_size"] = attrs.bin_size; - py_attrs["bin_type"] = attrs.bin_type; - py_attrs["format"] = attrs.format; - py_attrs["format_version"] = attrs.format_version; - - for (const auto &key : {"storage-mode", "creation-date", "generated-by", "assembly", "metadata", - "format-url", "nbins", "nchroms", "nnz", "sum", "cis"}) { - py_attrs[key] = pybind11::none(); - } - - if (attrs.storage_mode.has_value()) { - py_attrs["storage-mode"] = *attrs.storage_mode; - } - - if (attrs.creation_date.has_value()) { - py_attrs["creation-date"] = *attrs.creation_date; - } - if (attrs.generated_by.has_value()) { - py_attrs["generated-by"] = *attrs.generated_by; - } - if (attrs.assembly.has_value()) { - py_attrs["assembly"] = *attrs.assembly; - } - if (attrs.metadata.has_value()) { - py_attrs["metadata"] = *attrs.metadata; - } - if (attrs.format_url.has_value()) { - py_attrs["format-url"] = *attrs.format_url; - } - if (attrs.nbins.has_value()) { - py_attrs["nbins"] = *attrs.nbins; - } - if (attrs.nchroms.has_value()) { - py_attrs["nchroms"] = *attrs.nchroms; - } - if (attrs.nnz.has_value()) { - py_attrs["nnz"] = *attrs.nnz; - } - if (attrs.sum.has_value()) { - std::visit([&](const auto &sum) { py_attrs["sum"] = sum; }, *attrs.sum); - } - if (attrs.cis.has_value()) { - std::visit([&](const auto &cis) { py_attrs["cis"] = cis; }, *attrs.cis); - } - - return py_attrs; -} - -py::object fetch(const hictk::cooler::File &f, std::string_view range1, std::string_view range2, - std::string_view normalization, std::string_view count_type, bool join, - std::string_view query_type) { - return file_fetch(f, range1, range2, normalization, count_type, join, query_type); -} - -py::object fetch_sparse(const hictk::cooler::File &f, std::string_view range1, - std::string_view range2, std::string_view normalization, - std::string_view count_type, std::string_view query_type) { - return file_fetch_sparse(f, range1, range2, normalization, count_type, query_type); -} - -py::object fetch_dense(const hictk::cooler::File &f, std::string_view range1, - std::string_view range2, std::string_view normalization, - std::string_view count_type, std::string_view query_type) { - return file_fetch_dense(f, range1, range2, normalization, count_type, query_type); -} - -py::object fetch_sum(const hictk::cooler::File &f, std::string_view range1, std::string_view range2, - std::string_view normalization, std::string_view count_type, - std::string_view query_type) { - return file_fetch_sum(f, range1, range2, normalization, count_type, query_type); -} - -std::int64_t fetch_nnz(const hictk::cooler::File &f, std::string_view range1, - std::string_view range2, std::string_view query_type) { - return file_fetch_nnz(f, range1, range2, query_type); -} -} // namespace hictkpy::cooler diff --git a/src/hictkpy_file.cpp b/src/hictkpy_file.cpp index e05e0fe..c69fcce 100644 --- a/src/hictkpy_file.cpp +++ b/src/hictkpy_file.cpp @@ -2,13 +2,16 @@ // // SPDX-License-Identifier: MIT +#include #include #include #include #include +#include "hictk/balancing/methods.hpp" #include "hictk/file.hpp" #include "hictkpy/file.hpp" +#include "hictkpy/pixel_selector.hpp" namespace py = pybind11; @@ -20,50 +23,40 @@ hictk::File ctor(std::string_view path, std::int32_t resolution, std::string_vie hictk::hic::ParseUnitStr(std::string{matrix_unit})}; } -py::object fetch(const hictk::File &f, std::string_view range1, std::string_view range2, - std::string_view normalization, std::string_view count_type, bool join, - std::string_view query_type) { - return std::visit( - [&](const auto &ff) -> py::object { - return file_fetch(ff, range1, range2, normalization, count_type, join, query_type); - }, - f.get()); -} - -py::object fetch_sparse(const hictk::File &f, std::string_view range1, std::string_view range2, - std::string_view normalization, std::string_view count_type, - std::string_view query_type) { - return std::visit( - [&](const auto &ff) -> py::object { - return file_fetch_sparse(ff, range1, range2, normalization, count_type, query_type); - }, - f.get()); -} - -py::object fetch_dense(const hictk::File &f, std::string_view range1, std::string_view range2, - std::string_view normalization, std::string_view count_type, - std::string_view query_type) { - return std::visit( - [&](const auto &ff) -> py::object { - return file_fetch_dense(ff, range1, range2, normalization, count_type, query_type); - }, - f.get()); -} +bool is_cooler(std::string_view uri) { return bool(hictk::cooler::utils::is_cooler(uri)); } + +hictkpy::PixelSelector fetch(const hictk::File &f, std::string_view range1, std::string_view range2, + std::string_view normalization, std::string_view count_type, bool join, + std::string_view query_type) { + if (normalization != "NONE") { + count_type = "float"; + } + + if (range1.empty()) { + assert(range2.empty()); + return std::visit( + [&](const auto &ff) { + auto sel = ff.fetch(hictk::balancing::Method{normalization}); + using SelT = decltype(sel); + return hictkpy::PixelSelector(std::make_shared(std::move(sel)), count_type, + join); + }, + f.get()); + } + + const auto qt = + query_type == "UCSC" ? hictk::GenomicInterval::Type::UCSC : hictk::GenomicInterval::Type::BED; -py::object fetch_sum(const hictk::File &f, std::string_view range1, std::string_view range2, - std::string_view normalization, std::string_view count_type, - std::string_view query_type) { return std::visit( - [&](const auto &ff) -> py::object { - return file_fetch_sum(ff, range1, range2, normalization, count_type, query_type); + [&](const auto &ff) { + auto sel = range2.empty() || range1 == range2 + ? ff.fetch(range1, hictk::balancing::Method(normalization), qt) + : ff.fetch(range1, range2, hictk::balancing::Method(normalization), qt); + using SelT = decltype(sel); + return hictkpy::PixelSelector(std::make_shared(std::move(sel)), count_type, + join); }, f.get()); } -std::int64_t fetch_nnz(const hictk::File &f, std::string_view range1, std::string_view range2, - std::string_view query_type) { - return std::visit([&](const auto &ff) { return file_fetch_nnz(ff, range1, range2, query_type); }, - f.get()); -} - } // namespace hictkpy::file diff --git a/src/hictkpy_hic.cpp b/src/hictkpy_hic.cpp deleted file mode 100644 index 50d4ebb..0000000 --- a/src/hictkpy_hic.cpp +++ /dev/null @@ -1,52 +0,0 @@ -// Copyright (C) 2023 Roberto Rossini -// -// SPDX-License-Identifier: MIT - -#include "hictkpy/hic.hpp" - -#include -#include -#include - -#include "hictk/hic.hpp" -#include "hictkpy/common.hpp" - -namespace py = pybind11; - -namespace hictkpy::hic { -hictk::hic::File file_ctor(std::string_view path, std::int32_t resolution, - std::string_view matrix_type, std::string_view matrix_unit) { - return hictk::hic::File{std::string{path}, static_cast(resolution), - hictk::hic::ParseMatrixTypeStr(std::string{matrix_type}), - hictk::hic::ParseUnitStr(std::string{matrix_unit})}; -} - -py::object fetch(const hictk::hic::File &f, std::string_view range1, std::string_view range2, - std::string_view normalization, std::string_view count_type, bool join, - std::string_view query_type) { - return file_fetch(f, range1, range2, normalization, count_type, join, query_type); -} - -py::object fetch_sparse(const hictk::hic::File &f, std::string_view range1, std::string_view range2, - std::string_view normalization, std::string_view count_type, - std::string_view query_type) { - return file_fetch_sparse(f, range1, range2, normalization, count_type, query_type); -} - -py::object fetch_dense(const hictk::hic::File &f, std::string_view range1, std::string_view range2, - std::string_view normalization, std::string_view count_type, - std::string_view query_type) { - return file_fetch_dense(f, range1, range2, normalization, count_type, query_type); -} - -py::object fetch_sum(const hictk::hic::File &f, std::string_view range1, std::string_view range2, - std::string_view normalization, std::string_view count_type, - std::string_view query_type) { - return file_fetch_sum(f, range1, range2, normalization, count_type, query_type); -} - -std::int64_t fetch_nnz(const hictk::hic::File &f, std::string_view range1, std::string_view range2, - std::string_view query_type) { - return file_fetch_nnz(f, range1, range2, query_type); -} -} // namespace hictkpy::hic diff --git a/src/hictkpy_pixel_selector.cpp b/src/hictkpy_pixel_selector.cpp new file mode 100644 index 0000000..f37f97f --- /dev/null +++ b/src/hictkpy_pixel_selector.cpp @@ -0,0 +1,231 @@ +// Copyright (C) 2023 Roberto Rossini +// +// SPDX-License-Identifier: MIT + +#include + +#include + +#include "hictk/cooler/cooler.hpp" +#include "hictk/hic.hpp" +#include "hictkpy/common.hpp" +#include "hictkpy/pixel_selector.hpp" + +namespace py = pybind11; + +namespace hictkpy { + +PixelSelector::PixelSelector(std::shared_ptr sel_, + std::string_view type, bool join_) + : selector(std::move(sel_)), join(join_) { + if (type != "int" && type != "float") { + throw std::runtime_error("type should be int or float"); + } + + if (type == "int") { + pixel_count = std::int32_t{}; + } else { + pixel_count = double{}; + } +} + +PixelSelector::PixelSelector(std::shared_ptr sel_, + std::string_view type, bool join_) + : selector(std::move(sel_)), join(join_) { + if (type != "int" && type != "float") { + throw std::runtime_error("type should be int or float"); + } + + if (type == "int") { + pixel_count = std::int32_t{}; + } else { + pixel_count = double{}; + } +} + +PixelSelector::PixelSelector(std::shared_ptr sel_, + std::string_view type, bool join_) + : selector(std::move(sel_)), join(join_) { + if (type != "int" && type != "float") { + throw std::runtime_error("type should be int or float"); + } + + if (type == "int") { + pixel_count = std::int32_t{}; + } else { + pixel_count = double{}; + } +} + +constexpr bool PixelSelector::int_pixels() const noexcept { + return std::holds_alternative(pixel_count); +} + +constexpr bool PixelSelector::float_pixels() const noexcept { return !int_pixels(); } + +hictk::PixelCoordinates PixelSelector::coord1() const noexcept { + return std::visit( + [](const auto& s) -> hictk::PixelCoordinates { + if constexpr (std::is_same_v, hictk::hic::PixelSelectorAll>) { + return {}; + } else { + return s->coord1(); + } + }, + selector); +} + +hictk::PixelCoordinates PixelSelector::coord2() const noexcept { + return std::visit( + [](const auto& s) -> hictk::PixelCoordinates { + if constexpr (std::is_same_v, hictk::hic::PixelSelectorAll>) { + return {}; + } else { + return s->coord2(); + } + }, + selector); +} + +const hictk::BinTable& PixelSelector::bins() const noexcept { + return std::visit([](const auto& s) -> const hictk::BinTable& { return s->bins(); }, selector); +} + +auto PixelSelector::get_coord1() const -> PixelCoordTuple { + const auto c = coord1(); + return PixelCoordTuple{std::make_tuple(c.bin1.chrom().name(), c.bin1.start(), c.bin1.end(), + c.bin2.chrom().name(), c.bin2.start(), c.bin2.end())}; +} + +auto PixelSelector::get_coord2() const -> PixelCoordTuple { + const auto c = coord2(); + return PixelCoordTuple{std::make_tuple(c.bin1.chrom().name(), c.bin1.start(), c.bin1.end(), + c.bin2.chrom().name(), c.bin2.start(), c.bin2.end())}; +} + +py::iterator PixelSelector::make_iterable() const { + if (join) { + return std::visit( + [&](const auto& s) { + if (int_pixels()) { + using T = std::int32_t; + auto jsel = hictk::transformers::JoinGenomicCoords( + s->template begin(), s->template end(), + std::make_shared(bins())); + return py::make_iterator(jsel.begin(), jsel.end()); + } + using T = double; + auto jsel = hictk::transformers::JoinGenomicCoords( + s->template begin(), s->template end(), + std::make_shared(bins())); + return py::make_iterator(jsel.begin(), jsel.end()); + }, + selector); + } + return std::visit( + [&](const auto& s) { + if (int_pixels()) { + using T = std::int32_t; + return py::make_iterator(s->template begin(), s->template end()); + } + using T = double; + return py::make_iterator(s->template begin(), s->template end()); + }, + selector); +} + +py::object PixelSelector::to_df() const { + return std::visit( + [&](const auto& s) { + if (int_pixels()) { + using T = std::int32_t; + return pixel_iterators_to_df(s->bins(), s->template begin(), s->template end(), + join); + } else { + using T = double; + return pixel_iterators_to_df(s->bins(), s->template begin(), s->template end(), + join); + } + }, + selector); +} + +py::object PixelSelector::to_coo() const { + const auto bin_size = bins().bin_size(); + + const auto span1 = coord1().bin2.end() - coord1().bin1.start(); + const auto span2 = coord2().bin2.end() - coord2().bin1.start(); + const auto num_rows = span1 == 0 ? bins().size() : (span1 + bin_size - 1) / bin_size; + const auto num_cols = span2 == 0 ? bins().size() : (span2 + bin_size - 1) / bin_size; + return std::visit( + [&](const auto& s) { + if (int_pixels()) { + using T = std::int32_t; + return pixel_iterators_to_coo(s->template begin(), s->template end(), num_rows, + num_cols, coord1().bin1.id(), coord2().bin1.id()); + } else { + using T = double; + return pixel_iterators_to_coo(s->template begin(), s->template end(), num_rows, + num_cols, coord1().bin1.id(), coord2().bin1.id()); + } + }, + selector); +} + +py::object PixelSelector::to_numpy() const { + const auto bin_size = bins().bin_size(); + + const auto span1 = coord1().bin2.end() - coord1().bin1.start(); + const auto span2 = coord2().bin2.end() - coord2().bin1.start(); + const auto num_rows = span1 == 0 ? bins().size() : (span1 + bin_size - 1) / bin_size; + const auto num_cols = span2 == 0 ? bins().size() : (span2 + bin_size - 1) / bin_size; + + const auto mirror_matrix = coord1().bin1.chrom() == coord2().bin1.chrom(); + + return std::visit( + [&](const auto& s) { + if (int_pixels()) { + using T = std::int32_t; + return pixel_iterators_to_numpy(s->template begin(), s->template end(), num_rows, + num_cols, mirror_matrix, coord1().bin1.id(), + coord2().bin1.id()); + } else { + using T = double; + return pixel_iterators_to_numpy(s->template begin(), s->template end(), num_rows, + num_cols, mirror_matrix, coord1().bin1.id(), + coord2().bin1.id()); + } + }, + selector); +} + +py::object PixelSelector::sum() const { + return std::visit( + [&](const auto& s) -> py::object { + if (int_pixels()) { + using T = std::int32_t; + return py::cast( + std::accumulate(s->template begin(), s->template end(), std::int64_t(0), + [](std::int64_t accumulator, const hictk::ThinPixel& tp) { + return accumulator + tp.count; + })); + } else { + using T = double; + return py::cast(std::accumulate( + s->template begin(), s->template end(), double(0), + [](T accumulator, const hictk::ThinPixel& tp) { return accumulator + tp.count; })); + } + }, + selector); +} + +std::int64_t PixelSelector::nnz() const { + return std::visit( + [&](const auto& s) { + using T = std::int_fast8_t; + return std::distance(s->template begin(), s->template end()); + }, + selector); +} + +} // namespace hictkpy diff --git a/src/include/hictkpy/cooler.hpp b/src/include/hictkpy/cooler.hpp deleted file mode 100644 index 43acced..0000000 --- a/src/include/hictkpy/cooler.hpp +++ /dev/null @@ -1,46 +0,0 @@ -// Copyright (C) 2023 Roberto Rossini -// -// SPDX-License-Identifier: MIT - -#pragma once - -#include - -#include -#include -#include -#include - -#include "hictk/cooler/cooler.hpp" - -namespace hictkpy::cooler { - -hictk::cooler::File file_ctor(std::string_view uri); - -hictk::cooler::File file_ctor(std::string_view uri, const pybind11::dict &py_chroms, - std::uint32_t bin_size, bool overwrite_if_exists = false); -bool is_cooler(std::string_view uri); - -hictk::cooler::File cooler_ctor(std::string_view uri, const pybind11::dict &py_chroms, - std::uint32_t bin_size, bool overwrite_if_exists = false, - bool float_pixels = false); -[[nodiscard]] pybind11::dict get_cooler_attrs(const hictk::cooler::File &clr); - -pybind11::object fetch(const hictk::cooler::File &f, std::string_view range1, - std::string_view range2, std::string_view normalization, - std::string_view count_type, bool join, std::string_view query_type); -pybind11::object fetch_sparse(const hictk::cooler::File &f, std::string_view range1, - std::string_view range2, std::string_view normalization, - std::string_view count_type, std::string_view query_type); - -pybind11::object fetch_dense(const hictk::cooler::File &f, std::string_view range1, - std::string_view range2, std::string_view normalization, - std::string_view count_type, std::string_view query_type); - -pybind11::object fetch_sum(const hictk::cooler::File &f, std::string_view range1, - std::string_view range2, std::string_view normalization, - std::string_view count_type, std::string_view query_type); -std::int64_t fetch_nnz(const hictk::cooler::File &f, std::string_view range1, - std::string_view range2, std::string_view query_type); - -} // namespace hictkpy::cooler diff --git a/src/include/hictkpy/file.hpp b/src/include/hictkpy/file.hpp index ec9ffa5..e39aa7f 100644 --- a/src/include/hictkpy/file.hpp +++ b/src/include/hictkpy/file.hpp @@ -10,28 +10,17 @@ #include "hictk/file.hpp" #include "hictkpy/common.hpp" +#include "hictkpy/pixel_selector.hpp" namespace hictkpy::file { [[nodiscard]] hictk::File ctor(std::string_view path, std::int32_t resolution, std::string_view matrix_type, std::string_view matrix_unit); -pybind11::object fetch(const hictk::File &f, std::string_view range1, std::string_view range2, - std::string_view normalization, std::string_view count_type, bool join, - std::string_view query_type); +[[nodiscard]] bool is_cooler(std::string_view uri); -pybind11::object fetch_sparse(const hictk::File &f, std::string_view range1, - std::string_view range2, std::string_view normalization, - std::string_view count_type, std::string_view query_type); - -pybind11::object fetch_dense(const hictk::File &f, std::string_view range1, std::string_view range2, - std::string_view normalization, std::string_view count_type, - std::string_view query_type); - -pybind11::object fetch_sum(const hictk::File &f, std::string_view range1, std::string_view range2, - std::string_view normalization, std::string_view count_type, - std::string_view query_type); - -std::int64_t fetch_nnz(const hictk::File &f, std::string_view range1, std::string_view range2, - std::string_view query_type); +[[nodiscard]] hictkpy::PixelSelector fetch(const hictk::File &f, std::string_view range1, + std::string_view range2, std::string_view normalization, + std::string_view count_type, bool join, + std::string_view query_type); } // namespace hictkpy::file diff --git a/src/include/hictkpy/hic.hpp b/src/include/hictkpy/hic.hpp deleted file mode 100644 index c654ba5..0000000 --- a/src/include/hictkpy/hic.hpp +++ /dev/null @@ -1,37 +0,0 @@ -// Copyright (C) 2023 Roberto Rossini -// -// SPDX-License-Identifier: MIT - -#pragma once - -#include -#include -#include - -#include "hictk/hic.hpp" -#include "hictkpy/common.hpp" - -namespace hictkpy::hic { -[[nodiscard]] hictk::hic::File file_ctor(std::string_view path, std::int32_t resolution, - std::string_view matrix_type, - std::string_view matrix_unit); - -pybind11::object fetch(const hictk::hic::File &f, std::string_view range1, std::string_view range2, - std::string_view normalization, std::string_view count_type, bool join, - std::string_view query_type); - -pybind11::object fetch_sparse(const hictk::hic::File &f, std::string_view range1, - std::string_view range2, std::string_view normalization, - std::string_view count_type, std::string_view query_type); - -pybind11::object fetch_dense(const hictk::hic::File &f, std::string_view range1, - std::string_view range2, std::string_view normalization, - std::string_view count_type, std::string_view query_type); - -pybind11::object fetch_sum(const hictk::hic::File &f, std::string_view range1, - std::string_view range2, std::string_view normalization, - std::string_view count_type, std::string_view query_type); - -std::int64_t fetch_nnz(const hictk::hic::File &f, std::string_view range1, std::string_view range2, - std::string_view query_type); -} // namespace hictkpy::hic diff --git a/src/include/hictkpy/pixel_selector.hpp b/src/include/hictkpy/pixel_selector.hpp new file mode 100644 index 0000000..f0dce69 --- /dev/null +++ b/src/include/hictkpy/pixel_selector.hpp @@ -0,0 +1,64 @@ +// Copyright (C) 2023 Roberto Rossini +// +// SPDX-License-Identifier: MIT + +#pragma once + +#include +#include +#include +#include + +#include "hictk/cooler/pixel_selector.hpp" +#include "hictk/hic/pixel_selector.hpp" +#include "hictk/transformers/join_genomic_coords.hpp" + +namespace hictkpy { + +struct PixelSelector { + // clang-format off + using SelectorVar = + std::variant, + std::shared_ptr, + std::shared_ptr>; + + using PixelVar = std::variant; + // clang-format on + + SelectorVar selector{}; + PixelVar pixel_count{std::int32_t(0)}; + bool join{}; + + PixelSelector() = default; + + PixelSelector(std::shared_ptr sel_, std::string_view type, + bool join_); + PixelSelector(std::shared_ptr sel_, std::string_view type, + bool join_); + PixelSelector(std::shared_ptr sel_, std::string_view type, + bool join_); + + using PixelCoordTuple = + std::tuple; + + [[nodiscard]] auto get_coord1() const -> PixelCoordTuple; + [[nodiscard]] auto get_coord2() const -> PixelCoordTuple; + + [[nodiscard]] pybind11::iterator make_iterable() const; + [[nodiscard]] pybind11::object to_df() const; + [[nodiscard]] pybind11::object to_coo() const; + [[nodiscard]] pybind11::object to_numpy() const; + [[nodiscard]] pybind11::object sum() const; + [[nodiscard]] std::int64_t nnz() const; + + private: + [[nodiscard]] constexpr bool int_pixels() const noexcept; + [[nodiscard]] constexpr bool float_pixels() const noexcept; + + [[nodiscard]] hictk::PixelCoordinates coord1() const noexcept; + [[nodiscard]] hictk::PixelCoordinates coord2() const noexcept; + + [[nodiscard]] const hictk::BinTable& bins() const noexcept; +}; + +} // namespace hictkpy diff --git a/test/test_fetch.py b/test/test_fetch.py deleted file mode 100644 index c035e0c..0000000 --- a/test/test_fetch.py +++ /dev/null @@ -1,91 +0,0 @@ -# Copyright (C) 2023 Roberto Rossini -# -# SPDX-License-Identifier: MIT - -import os - -import numpy as np -import pytest - -import hictkpy - -testdir = os.path.dirname(os.path.abspath(__file__)) - - -def compare_pixels(f): - ### GW - df = f.fetch() - assert df["count"].sum() == 119_208_613 - assert len(df) == 890_384 - - ### CIS - df = f.fetch("chr2R:10,000,000-15,000,000") - assert df["count"].sum() == 4_519_080 - assert len(df.columns) == 3 - - df = f.fetch("chr2R:10,000,000-15,000,000", join=True) - assert df["count"].sum() == 4_519_080 - assert len(df.columns) == 7 - - df = f.fetch("chr2R:10,000,000-15,000,000", count_type="int") - assert df["count"].dtype == np.int32 - - df = f.fetch("chr2R:10,000,000-15,000,000", count_type="float") - assert df["count"].dtype == np.float64 - - df = f.fetch("chr2R\t10000000\t15000000", query_type="BED") - assert len(df) == 1275 - - ### TRANS - df = f.fetch("chr2R:10,000,000-15,000,000", "chrX:0-10,000,000") - assert df["count"].sum() == 83_604 - assert len(df.columns) == 3 - - df = f.fetch("chr2R:10,000,000-15,000,000", "chrX:0-10,000,000", join=True) - assert df["count"].sum() == 83_604 - assert len(df.columns) == 7 - - df = f.fetch("chr2R:10,000,000-15,000,000", "chrX:0-10,000,000", count_type="int") - assert df["count"].dtype == np.int32 - - df = f.fetch("chr2R:10,000,000-15,000,000", "chrX:0-10,000,000", count_type="float") - assert df["count"].dtype == np.float64 - - df = f.fetch("chr2R\t10000000\t15000000", "chrX\t0\t10000000", query_type="BED") - assert len(df) == 4995 - - -@pytest.mark.parametrize( - "file,resolution", - [(os.path.join(testdir, "data", "cooler_test_file.cool"), 100_000)], -) -def test_file_fetch_file(file, resolution): - f = hictkpy.File(file, resolution) - compare_pixels(f) - - df = f.fetch("chr2R:10,000,000-15,000,000", normalization="weight") - assert np.isclose(59.349524704033215, df["count"].sum()) - - -@pytest.mark.parametrize( - "file", - [(os.path.join(testdir, "data", "cooler_test_file.cool"))], -) -def test_cooler_fetch_cooler(file): - f = hictkpy.cooler.File(file) - compare_pixels(f) - - df = f.fetch("chr2R:10,000,000-15,000,000", normalization="weight") - assert np.isclose(59.349524704033215, df["count"].sum()) - - -@pytest.mark.parametrize( - "file,resolution", - [(os.path.join(testdir, "data", "hic_test_file.hic"), 100_000)], -) -def test_hic_fetch_hic(file, resolution): - f = hictkpy.hic.File(file, resolution) - compare_pixels(f) - - df = f.fetch("chr2R:10,000,000-15,000,000", normalization="ICE") - assert np.isclose(59.349524704033215, df["count"].sum()) diff --git a/test/test_fetch_accessors.py b/test/test_fetch_accessors.py index cd6777d..9e283c1 100644 --- a/test/test_fetch_accessors.py +++ b/test/test_fetch_accessors.py @@ -10,38 +10,21 @@ testdir = os.path.dirname(os.path.abspath(__file__)) - -def compare_shared_attributes(f): - assert f.bin_size() == 100_000 - assert f.nbins() == 1380 - - assert "chr2L" in f.chromosomes() - assert len(f.bins()) == 1380 - assert len(f.chromosomes()) == 8 - - -@pytest.mark.parametrize( +pytestmark = pytest.mark.parametrize( "file,resolution", - [(os.path.join(testdir, "data", "cooler_test_file.cool"), 100_000)], + [ + (os.path.join(testdir, "data", "cooler_test_file.cool"), 100_000), + (os.path.join(testdir, "data", "hic_test_file.hic"), 100_000), + ], ) -def test_file_fetch_accessors_file(file, resolution): - f = hictkpy.File(file, resolution) - compare_shared_attributes(f) -@pytest.mark.parametrize( - "file", - [(os.path.join(testdir, "data", "cooler_test_file.cool"))], -) -def test_cooler_fetch_accessors_cooler(file): - f = hictkpy.cooler.File(file) - compare_shared_attributes(f) - +class TestClass: + def test_attributes_cooler(self, file, resolution): + f = hictkpy.File(file, resolution) + assert f.bin_size() == 100_000 + assert f.nbins() == 1380 -@pytest.mark.parametrize( - "file,resolution", - [(os.path.join(testdir, "data", "hic_test_file.hic"), 100_000)], -) -def test_hic_fetch_accessors_hic(file, resolution): - f = hictkpy.hic.File(file, resolution) - compare_shared_attributes(f) + assert "chr2L" in f.chromosomes() + assert len(f.bins()) == 1380 + assert len(f.chromosomes()) == 8 diff --git a/test/test_fetch_dense.py b/test/test_fetch_dense.py index 2f73ed4..cf24298 100644 --- a/test/test_fetch_dense.py +++ b/test/test_fetch_dense.py @@ -11,72 +11,48 @@ testdir = os.path.dirname(os.path.abspath(__file__)) +pytestmark = pytest.mark.parametrize( + "file,resolution", + [ + (os.path.join(testdir, "data", "cooler_test_file.cool"), 100_000), + (os.path.join(testdir, "data", "hic_test_file.hic"), 100_000), + ], +) -def fetch_and_compare(f): - m = f.fetch_dense() - assert m.shape == (1380, 1380) - assert m.sum() == 178_263_235 - - ### CIS - m = f.fetch_dense("chr2R:10,000,000-15,000,000") - assert m.shape == (50, 50) - assert m.sum() == 6_029_333 - - m = f.fetch_dense("chr2R:10,000,000-15,000,000", count_type="int") - assert m.dtype == np.int32 - - m = f.fetch_dense("chr2R:10,000,000-15,000,000", count_type="float") - assert m.dtype == np.float64 - - m = f.fetch_dense("chr2R\t10000000\t15000000", query_type="BED") - assert m.shape == (50, 50) - - ### TRANS - m = f.fetch_dense("chr2R:10,000,000-15,000,000", "chrX:0-10,000,000") - assert m.shape == (50, 100) - assert m.sum() == 83_604 - - m = f.fetch_dense("chr2R:10,000,000-15,000,000", "chrX:0-10,000,000", count_type="int") - assert m.dtype == np.int32 - - m = f.fetch_dense("chr2R:10,000,000-15,000,000", "chrX:0-10,000,000", count_type="float") - assert m.dtype == np.float64 - - m = f.fetch_dense("chr2R\t10000000\t15000000", "chrX\t0\t10000000", query_type="BED") - assert m.shape == (50, 100) +class TestClass: + def test_genome_wide(self, file, resolution): + f = hictkpy.File(file, resolution) + m = f.fetch().to_numpy() + assert m.shape == (1380, 1380) + assert m.sum() == 178_263_235 -@pytest.mark.parametrize( - "file,resolution", - [(os.path.join(testdir, "data", "cooler_test_file.cool"), 100_000)], -) -def test_file_fetch_dense_file(file, resolution): - f = hictkpy.File(file, resolution) - fetch_and_compare(f) + def test_cis(self, file, resolution): + f = hictkpy.File(file, resolution) + m = f.fetch("chr2R:10,000,000-15,000,000").to_numpy() + assert m.shape == (50, 50) + assert m.sum() == 6_029_333 - df = f.fetch("chr2R:10,000,000-15,000,000", normalization="weight") - assert np.isclose(59.349524704033215, df["count"].sum()) + m = f.fetch("chr2R:10,000,000-15,000,000", count_type="int").to_numpy() + assert m.dtype == np.int32 + m = f.fetch("chr2R:10,000,000-15,000,000", count_type="float").to_numpy() + assert m.dtype == np.float64 -@pytest.mark.parametrize( - "file", - [(os.path.join(testdir, "data", "cooler_test_file.cool"))], -) -def test_cooler_fetch_dense_cooler(file): - f = hictkpy.cooler.File(file) - fetch_and_compare(f) + m = f.fetch("chr2R\t10000000\t15000000", query_type="BED").to_numpy() + assert m.shape == (50, 50) - df = f.fetch("chr2R:10,000,000-15,000,000", normalization="weight") - assert np.isclose(59.349524704033215, df["count"].sum()) + def test_trans(self, file, resolution): + f = hictkpy.File(file, resolution) + m = f.fetch("chr2R:10,000,000-15,000,000", "chrX:0-10,000,000").to_numpy() + assert m.shape == (50, 100) + assert m.sum() == 83_604 + m = f.fetch("chr2R:10,000,000-15,000,000", "chrX:0-10,000,000", count_type="int").to_numpy() + assert m.dtype == np.int32 -@pytest.mark.parametrize( - "file,resolution", - [(os.path.join(testdir, "data", "hic_test_file.hic"), 100_000)], -) -def test_hic_fetch_dense_hic(file, resolution): - f = hictkpy.hic.File(file, resolution) - fetch_and_compare(f) + m = f.fetch("chr2R:10,000,000-15,000,000", "chrX:0-10,000,000", count_type="float").to_numpy() + assert m.dtype == np.float64 - df = f.fetch("chr2R:10,000,000-15,000,000", normalization="ICE") - assert np.isclose(59.349524704033215, df["count"].sum()) + m = f.fetch("chr2R\t10000000\t15000000", "chrX\t0\t10000000", query_type="BED").to_numpy() + assert m.shape == (50, 100) diff --git a/test/test_fetch_df.py b/test/test_fetch_df.py new file mode 100644 index 0000000..46a3921 --- /dev/null +++ b/test/test_fetch_df.py @@ -0,0 +1,79 @@ +# Copyright (C) 2023 Roberto Rossini +# +# SPDX-License-Identifier: MIT + +import os + +import numpy as np +import pytest + +import hictkpy + +testdir = os.path.dirname(os.path.abspath(__file__)) + +pytestmark = pytest.mark.parametrize( + "file,resolution", + [ + (os.path.join(testdir, "data", "cooler_test_file.cool"), 100_000), + (os.path.join(testdir, "data", "hic_test_file.hic"), 100_000), + ], +) + + +class TestClass: + def test_genome_wide(self, file, resolution): + f = hictkpy.File(file, resolution) + + df = f.fetch().to_df() + assert df["count"].sum() == 119_208_613 + assert len(df) == 890_384 + + def test_cis(self, file, resolution): + f = hictkpy.File(file, resolution) + + df = f.fetch("chr2R:10,000,000-15,000,000").to_df() + assert df["count"].sum() == 4_519_080 + assert len(df.columns) == 3 + + df = f.fetch("chr2R:10,000,000-15,000,000", join=True).to_df() + assert df["count"].sum() == 4_519_080 + assert len(df.columns) == 7 + + df = f.fetch("chr2R:10,000,000-15,000,000", count_type="int").to_df() + assert df["count"].dtype == np.int32 + + df = f.fetch("chr2R:10,000,000-15,000,000", count_type="float").to_df() + assert df["count"].dtype == np.float64 + + df = f.fetch("chr2R\t10000000\t15000000", query_type="BED").to_df() + assert len(df) == 1275 + + def test_trans(self, file, resolution): + f = hictkpy.File(file, resolution) + + df = f.fetch("chr2R:10,000,000-15,000,000", "chrX:0-10,000,000").to_df() + assert df["count"].sum() == 83_604 + assert len(df.columns) == 3 + + df = f.fetch("chr2R:10,000,000-15,000,000", "chrX:0-10,000,000", join=True).to_df() + assert df["count"].sum() == 83_604 + assert len(df.columns) == 7 + + df = f.fetch("chr2R:10,000,000-15,000,000", "chrX:0-10,000,000", count_type="int").to_df() + assert df["count"].dtype == np.int32 + + df = f.fetch("chr2R:10,000,000-15,000,000", "chrX:0-10,000,000", count_type="float").to_df() + assert df["count"].dtype == np.float64 + + df = f.fetch("chr2R\t10000000\t15000000", "chrX\t0\t10000000", query_type="BED").to_df() + assert len(df) == 4995 + + def test_balanced(self, file, resolution): + f = hictkpy.File(file, resolution) + + if f.is_cooler(): + df = f.fetch("chr2R:10,000,000-15,000,000", normalization="weight").to_df() + else: + df = f.fetch("chr2R:10,000,000-15,000,000", normalization="ICE").to_df() + + assert np.isclose(59.349524704033215, df["count"].sum()) diff --git a/test/test_fetch_nnz.py b/test/test_fetch_nnz.py index b28767a..2afedea 100644 --- a/test/test_fetch_nnz.py +++ b/test/test_fetch_nnz.py @@ -10,34 +10,17 @@ testdir = os.path.dirname(os.path.abspath(__file__)) - -def compare_nnz(f): - assert f.fetch_nnz() == 890_384 - assert f.fetch_nnz("chr2R") == 31_900 - - -@pytest.mark.parametrize( +pytestmark = pytest.mark.parametrize( "file,resolution", - [(os.path.join(testdir, "data", "cooler_test_file.cool"), 100_000)], -) -def test_file_fetch_nnz_file(file, resolution): - f = hictkpy.File(file, resolution) - compare_nnz(f) - - -@pytest.mark.parametrize( - "file", - [(os.path.join(testdir, "data", "cooler_test_file.cool"))], + [ + (os.path.join(testdir, "data", "cooler_test_file.cool"), 100_000), + (os.path.join(testdir, "data", "hic_test_file.hic"), 100_000), + ], ) -def test_cooler_fetch_nnz_cooler(file): - f = hictkpy.cooler.File(file) - compare_nnz(f) -@pytest.mark.parametrize( - "file,resolution", - [(os.path.join(testdir, "data", "hic_test_file.hic"), 100_000)], -) -def test_hic_fetch_nnz_hic(file, resolution): - f = hictkpy.hic.File(file, resolution) - compare_nnz(f) +class TestClass: + def test_fetch_nnz(self, file, resolution): + f = hictkpy.File(file, resolution) + assert f.fetch().nnz() == 890_384 + assert f.fetch("chr2R").nnz() == 31_900 diff --git a/test/test_fetch_sparse.py b/test/test_fetch_sparse.py index 15dca2a..345391a 100644 --- a/test/test_fetch_sparse.py +++ b/test/test_fetch_sparse.py @@ -11,72 +11,62 @@ testdir = os.path.dirname(os.path.abspath(__file__)) +pytestmark = pytest.mark.parametrize( + "file,resolution", + [ + (os.path.join(testdir, "data", "cooler_test_file.cool"), 100_000), + (os.path.join(testdir, "data", "hic_test_file.hic"), 100_000), + ], +) -def fetch_and_compare(f): - m = f.fetch_sparse() - assert m.shape == (1380, 1380) - assert m.sum() == 119_208_613 - - ### CIS - m = f.fetch_sparse("chr2R:10,000,000-15,000,000") - assert m.shape == (50, 50) - assert m.sum() == 4_519_080 - m = f.fetch_sparse("chr2R:10,000,000-15,000,000", count_type="int") - assert m.dtype == np.int32 +class TestClass: + def test_genome_wide(self, file, resolution): + f = hictkpy.File(file, resolution) - m = f.fetch_sparse("chr2R:10,000,000-15,000,000", count_type="float") - assert m.dtype == np.float64 + m = f.fetch().to_coo() + assert m.shape == (1380, 1380) + assert m.sum() == 119_208_613 - m = f.fetch_sparse("chr2R\t10000000\t15000000", query_type="BED") - assert m.shape == (50, 50) + def test_cis(self, file, resolution): + f = hictkpy.File(file, resolution) - ### TRANS - m = f.fetch_sparse("chr2R:10,000,000-15,000,000", "chrX:0-10,000,000") - assert m.shape == (50, 100) - assert m.sum() == 83_604 + m = f.fetch("chr2R:10,000,000-15,000,000").to_coo() + assert m.shape == (50, 50) + assert m.sum() == 4_519_080 - m = f.fetch_sparse("chr2R:10,000,000-15,000,000", "chrX:0-10,000,000", count_type="int") - assert m.dtype == np.int32 + m = f.fetch("chr2R:10,000,000-15,000,000", count_type="int").to_coo() + assert m.dtype == np.int32 - m = f.fetch_sparse("chr2R:10,000,000-15,000,000", "chrX:0-10,000,000", count_type="float") - assert m.dtype == np.float64 + m = f.fetch("chr2R:10,000,000-15,000,000", count_type="float").to_coo() + assert m.dtype == np.float64 - m = f.fetch_sparse("chr2R\t10000000\t15000000", "chrX\t0\t10000000", query_type="BED") - assert m.shape == (50, 100) + m = f.fetch("chr2R\t10000000\t15000000", query_type="BED").to_coo() + assert m.shape == (50, 50) + def test_trans(self, file, resolution): + f = hictkpy.File(file, resolution) -@pytest.mark.parametrize( - "file,resolution", - [(os.path.join(testdir, "data", "cooler_test_file.cool"), 100_000)], -) -def test_file_fetch_sparse_file(file, resolution): - f = hictkpy.File(file, resolution) - fetch_and_compare(f) + m = f.fetch("chr2R:10,000,000-15,000,000", "chrX:0-10,000,000").to_coo() + assert m.shape == (50, 100) + assert m.sum() == 83_604 - df = f.fetch("chr2R:10,000,000-15,000,000", normalization="weight") - assert np.isclose(59.349524704033215, df["count"].sum()) + m = f.fetch("chr2R:10,000,000-15,000,000", "chrX:0-10,000,000", count_type="int").to_coo() + assert m.dtype == np.int32 + m = f.fetch("chr2R:10,000,000-15,000,000", "chrX:0-10,000,000", count_type="float").to_coo() + assert m.dtype == np.float64 -@pytest.mark.parametrize( - "file", - [(os.path.join(testdir, "data", "cooler_test_file.cool"))], -) -def test_cooler_fetch_sparse_cooler(file): - f = hictkpy.cooler.File(file) - fetch_and_compare(f) + m = f.fetch("chr2R\t10000000\t15000000", "chrX\t0\t10000000", query_type="BED").to_coo() + assert m.shape == (50, 100) - df = f.fetch("chr2R:10,000,000-15,000,000", normalization="weight") - assert np.isclose(59.349524704033215, df["count"].sum()) + def test_balanced(self, file, resolution): + f = hictkpy.File(file, resolution) + if f.is_cooler(): + m = f.fetch("chr2R:10,000,000-15,000,000", normalization="weight").to_coo() + else: + m = f.fetch("chr2R:10,000,000-15,000,000", normalization="ICE").to_coo() -@pytest.mark.parametrize( - "file,resolution", - [(os.path.join(testdir, "data", "hic_test_file.hic"), 100_000)], -) -def test_hic_fetch_sparse_hic(file, resolution): - f = hictkpy.hic.File(file, resolution) - fetch_and_compare(f) + assert np.isclose(59.349524704033215, m.sum()) - df = f.fetch("chr2R:10,000,000-15,000,000", normalization="ICE") - assert np.isclose(59.349524704033215, df["count"].sum()) diff --git a/test/test_fetch_sum.py b/test/test_fetch_sum.py index 724e1a6..0ea673d 100644 --- a/test/test_fetch_sum.py +++ b/test/test_fetch_sum.py @@ -11,33 +11,17 @@ testdir = os.path.dirname(os.path.abspath(__file__)) -def compare_sum(f): - assert f.fetch_sum() == 119_208_613 - assert f.fetch_sum("chr2L") == 19_968_156 - - -@pytest.mark.parametrize( +pytestmark = pytest.mark.parametrize( "file,resolution", - [(os.path.join(testdir, "data", "cooler_test_file.cool"), 100_000)], + [ + (os.path.join(testdir, "data", "cooler_test_file.cool"), 100_000), + (os.path.join(testdir, "data", "hic_test_file.hic"), 100_000), + ], ) -def test_file_fetch_sum_file(file, resolution): - f = hictkpy.File(file, resolution) - compare_sum(f) -@pytest.mark.parametrize( - "file", - [(os.path.join(testdir, "data", "cooler_test_file.cool"))], -) -def test_cooler_fetch_sum_cooler(file): - f = hictkpy.cooler.File(file) - compare_sum(f) - - -@pytest.mark.parametrize( - "file,resolution", - [(os.path.join(testdir, "data", "hic_test_file.hic"), 100_000)], -) -def test_hic_fetch_sum_hic(file, resolution): - f = hictkpy.hic.File(file, resolution) - compare_sum(f) +class TestClass: + def test_fetch_sum(self, file, resolution): + f = hictkpy.File(file, resolution) + assert f.fetch().sum() == 119_208_613 + assert f.fetch("chr2L").sum() == 19_968_156 From 74603dcc6ad7728b0ac370ecc60239c600ae0d8d Mon Sep 17 00:00:00 2001 From: Roberto Rossini <71787608+robomics@users.noreply.github.com> Date: Thu, 3 Aug 2023 17:26:41 +0200 Subject: [PATCH 3/5] Expose more properties of class Pixel* --- src/hictkpy.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/hictkpy.cpp b/src/hictkpy.cpp index 93947d6..e4e8120 100644 --- a/src/hictkpy.cpp +++ b/src/hictkpy.cpp @@ -43,6 +43,16 @@ static void declare_pixel_class(pybind11::module_ &m, const std::string &suffix) [](const hictk::Pixel &p) { return p.coords.bin1.rel_id(); }) .def_property_readonly("rel_bin2_id", [](const hictk::Pixel &p) { return p.coords.bin2.rel_id(); }) + .def_property_readonly("chrom1", + [](const hictk::Pixel &p) { return p.coords.bin1.chrom().name(); }) + .def_property_readonly("start1", + [](const hictk::Pixel &p) { return p.coords.bin1.start(); }) + .def_property_readonly("end1", [](const hictk::Pixel &p) { return p.coords.bin1.end(); }) + .def_property_readonly("chrom2", + [](const hictk::Pixel &p) { return p.coords.bin2.chrom().name(); }) + .def_property_readonly("start2", + [](const hictk::Pixel &p) { return p.coords.bin2.start(); }) + .def_property_readonly("end2", [](const hictk::Pixel &p) { return p.coords.bin2.end(); }) .def_property_readonly("count", [](const hictk::Pixel &p) { return p.count; }) .def("__repr__", [](const hictk::Pixel &p) { From 23b2f511fbe9d33acd12b3801dfd2a54566f009c Mon Sep 17 00:00:00 2001 From: Roberto Rossini <71787608+robomics@users.noreply.github.com> Date: Thu, 3 Aug 2023 17:27:18 +0200 Subject: [PATCH 4/5] Add method to retrieve file attributes --- src/hictkpy.cpp | 2 + src/hictkpy_file.cpp | 73 ++++++++++++++++++++++++++++++++++++ src/include/hictkpy/file.hpp | 2 + 3 files changed, 77 insertions(+) diff --git a/src/hictkpy.cpp b/src/hictkpy.cpp index e4e8120..cfca501 100644 --- a/src/hictkpy.cpp +++ b/src/hictkpy.cpp @@ -111,6 +111,8 @@ static void declare_file_class(pybind11::module_ &m) { file.def("nbins", &hictk::File::nbins); file.def("nchroms", &hictk::File::nchroms); + file.def("attributes", &file::attributes); + file.def("fetch", &file::fetch, py::arg("range1") = "", py::arg("range2") = "", py::arg("normalization") = "NONE", py::arg("count_type") = "int", py::arg("join") = false, py::arg("query_type") = "UCSC"); diff --git a/src/hictkpy_file.cpp b/src/hictkpy_file.cpp index c69fcce..6efc572 100644 --- a/src/hictkpy_file.cpp +++ b/src/hictkpy_file.cpp @@ -59,4 +59,77 @@ hictkpy::PixelSelector fetch(const hictk::File &f, std::string_view range1, std: f.get()); } +[[nodiscard]] inline py::dict get_cooler_attrs(const hictk::cooler::File &clr) { + py::dict py_attrs; + const auto &attrs = clr.attributes(); + + py_attrs["bin_size"] = attrs.bin_size; + py_attrs["bin_type"] = attrs.bin_type; + py_attrs["format"] = attrs.format; + py_attrs["format_version"] = attrs.format_version; + + for (const auto &key : {"storage-mode", "creation-date", "generated-by", "assembly", "metadata", + "format-url", "nbins", "nchroms", "nnz", "sum", "cis"}) { + py_attrs[key] = pybind11::none(); + } + + if (attrs.storage_mode.has_value()) { + py_attrs["storage-mode"] = *attrs.storage_mode; + } + + if (attrs.creation_date.has_value()) { + py_attrs["creation-date"] = *attrs.creation_date; + } + if (attrs.generated_by.has_value()) { + py_attrs["generated-by"] = *attrs.generated_by; + } + if (attrs.assembly.has_value()) { + py_attrs["assembly"] = *attrs.assembly; + } + if (attrs.metadata.has_value()) { + py_attrs["metadata"] = *attrs.metadata; + } + if (attrs.format_url.has_value()) { + py_attrs["format-url"] = *attrs.format_url; + } + if (attrs.nbins.has_value()) { + py_attrs["nbins"] = *attrs.nbins; + } + if (attrs.nchroms.has_value()) { + py_attrs["nchroms"] = *attrs.nchroms; + } + if (attrs.nnz.has_value()) { + py_attrs["nnz"] = *attrs.nnz; + } + if (attrs.sum.has_value()) { + std::visit([&](const auto &sum) { py_attrs["sum"] = sum; }, *attrs.sum); + } + if (attrs.cis.has_value()) { + std::visit([&](const auto &cis) { py_attrs["cis"] = cis; }, *attrs.cis); + } + + return py_attrs; +} + +[[nodiscard]] inline py::dict get_hic_attrs(const hictk::hic::File &hf) { + py::dict py_attrs; + + py_attrs["bin_size"] = hf.bin_size(); + py_attrs["format"] = "HIC"; + py_attrs["format_version"] = hf.version(); + py_attrs["assembly"] = hf.assembly(); + py_attrs["format-url"] = "https://github.com/aidenlab/hic-format"; + py_attrs["nbins"] = hf.bins().size(); + py_attrs["nchroms"] = hf.nchroms(); + + return py_attrs; +} + +pybind11::dict attributes(const hictk::File &f) { + if (f.is_cooler()) { + return get_cooler_attrs(f.get()); + } + return get_hic_attrs(f.get()); +} + } // namespace hictkpy::file diff --git a/src/include/hictkpy/file.hpp b/src/include/hictkpy/file.hpp index e39aa7f..1171943 100644 --- a/src/include/hictkpy/file.hpp +++ b/src/include/hictkpy/file.hpp @@ -23,4 +23,6 @@ namespace hictkpy::file { std::string_view count_type, bool join, std::string_view query_type); +[[nodiscard]] pybind11::dict attributes(const hictk::File &f); + } // namespace hictkpy::file From 9c54a35fed2db5da47f253703ff7f3c2b1ff6082 Mon Sep 17 00:00:00 2001 From: Roberto Rossini <71787608+robomics@users.noreply.github.com> Date: Thu, 3 Aug 2023 17:27:34 +0200 Subject: [PATCH 5/5] Add more tests --- test/test_fetch_accessors.py | 5 +++ test/test_fetch_iters.py | 86 ++++++++++++++++++++++++++++++++++++ test/test_fetch_sparse.py | 1 - 3 files changed, 91 insertions(+), 1 deletion(-) create mode 100644 test/test_fetch_iters.py diff --git a/test/test_fetch_accessors.py b/test/test_fetch_accessors.py index 9e283c1..dc99001 100644 --- a/test/test_fetch_accessors.py +++ b/test/test_fetch_accessors.py @@ -28,3 +28,8 @@ def test_attributes_cooler(self, file, resolution): assert "chr2L" in f.chromosomes() assert len(f.bins()) == 1380 assert len(f.chromosomes()) == 8 + + if f.is_cooler(): + assert f.attributes()["format"] == "HDF5::Cooler" + else: + assert f.attributes()["format"] == "HIC" diff --git a/test/test_fetch_iters.py b/test/test_fetch_iters.py new file mode 100644 index 0000000..3e7b1d6 --- /dev/null +++ b/test/test_fetch_iters.py @@ -0,0 +1,86 @@ +# Copyright (C) 2023 Roberto Rossini +# +# SPDX-License-Identifier: MIT + +import os + +import numpy as np +import pytest + +import hictkpy + +testdir = os.path.dirname(os.path.abspath(__file__)) + +pytestmark = pytest.mark.parametrize( + "file,resolution", + [ + (os.path.join(testdir, "data", "cooler_test_file.cool"), 100_000), + (os.path.join(testdir, "data", "hic_test_file.hic"), 100_000), + ], +) + + +def compute_sum(sel): + return sum(x.count for x in sel) + + +def compute_nnz(sel): + return sum(1 for _ in sel) + + +class TestClass: + def test_genome_wide(self, file, resolution): + f = hictkpy.File(file, resolution) + + sel = f.fetch() + assert compute_sum(sel) == 119_208_613 + assert compute_nnz(sel) == 890_384 + + def test_cis(self, file, resolution): + f = hictkpy.File(file, resolution) + + sel = f.fetch("chr2R:10,000,000-15,000,000") + assert compute_sum(sel) == 4_519_080 + + sel = f.fetch("chr2R:10,000,000-15,000,000", join=True) + assert compute_sum(sel) == 4_519_080 + sel = f.fetch("chr2R:10,000,000-15,000,000", join=True) + assert all(x.chrom1 == "chr2R" for x in sel) + + sel = f.fetch("chr2R:10,000,000-15,000,000", count_type="int") + assert isinstance(compute_sum(sel), int) + + sel = f.fetch("chr2R:10,000,000-15,000,000", count_type="float") + assert isinstance(compute_sum(sel), float) + + sel = f.fetch("chr2R\t10000000\t15000000", query_type="BED") + assert compute_nnz(sel) == 1275 + + def test_trans(self, file, resolution): + f = hictkpy.File(file, resolution) + + sel = f.fetch("chr2R:10,000,000-15,000,000", "chrX:0-10,000,000") + assert compute_sum(sel) == 83_604 + + sel = f.fetch("chr2R:10,000,000-15,000,000", "chrX:0-10,000,000", join=True) + assert compute_sum(sel) == 83_604 + assert all(x.chrom1 == "chr2R" and x.chrom2 == "chrX" for x in sel) + + sel = f.fetch("chr2R:10,000,000-15,000,000", "chrX:0-10,000,000", count_type="int") + assert isinstance(compute_sum(sel), int) + + sel = f.fetch("chr2R:10,000,000-15,000,000", "chrX:0-10,000,000", count_type="float") + assert isinstance(compute_sum(sel), float) + + sel = f.fetch("chr2R\t10000000\t15000000", "chrX\t0\t10000000", query_type="BED") + assert compute_nnz(sel) == 4995 + + def test_balanced(self, file, resolution): + f = hictkpy.File(file, resolution) + + if f.is_cooler(): + sel = f.fetch("chr2R:10,000,000-15,000,000", normalization="weight") + else: + sel = f.fetch("chr2R:10,000,000-15,000,000", normalization="ICE") + + assert np.isclose(59.349524704033215, compute_sum(sel)) diff --git a/test/test_fetch_sparse.py b/test/test_fetch_sparse.py index 345391a..4ec3c33 100644 --- a/test/test_fetch_sparse.py +++ b/test/test_fetch_sparse.py @@ -69,4 +69,3 @@ def test_balanced(self, file, resolution): m = f.fetch("chr2R:10,000,000-15,000,000", normalization="ICE").to_coo() assert np.isclose(59.349524704033215, m.sum()) -