Skip to content

Commit

Permalink
Merge branch 'main' into feature/better-api
Browse files Browse the repository at this point in the history
  • Loading branch information
robomics authored Nov 13, 2024
2 parents 8979756 + b0a5e60 commit 116defe
Show file tree
Hide file tree
Showing 20 changed files with 250 additions and 39 deletions.
1 change: 1 addition & 0 deletions docs/api/cooler.rst
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ Cooler API

.. automethod:: __init__
.. automethod:: add_pixels
.. automethod:: bins
.. automethod:: chromosomes
.. automethod:: finalize
.. automethod:: path
Expand Down
3 changes: 3 additions & 0 deletions docs/api/generic.rst
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,10 @@ Generic API

.. automethod:: __init__
.. automethod:: __getitem__
.. automethod:: attributes
.. automethod:: chromosomes
.. automethod:: is_hic
.. automethod:: is_mcool
.. automethod:: path
.. automethod:: resolutions

Expand Down
1 change: 1 addition & 0 deletions docs/api/hic.rst
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ Hi-C API

.. automethod:: __init__
.. automethod:: add_pixels
.. automethod:: bins
.. automethod:: chromosomes
.. automethod:: finalize
.. automethod:: path
Expand Down
8 changes: 5 additions & 3 deletions src/bin_table.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -395,6 +395,8 @@ nb::object BinTable::to_df(std::optional<std::string_view> range,
std::vector<std::uint32_t> starts(n);
std::vector<std::uint32_t> ends(n);

const auto chrom_id_offset = static_cast<std::uint32_t>(_bins->chromosomes().at(0).is_all());

std::visit(
[&](const auto& bins) {
const auto [first_bin, last_bin] = !range.has_value()
Expand All @@ -403,16 +405,16 @@ nb::object BinTable::to_df(std::optional<std::string_view> range,
std::size_t i = 0;
std::for_each(first_bin, last_bin, [&](const auto& bin) {
bin_ids[i] = bin.id();
chrom_ids[i] = static_cast<std::int32_t>(bin.chrom().id());
chrom_ids[i] = static_cast<std::int32_t>(bin.chrom().id() - chrom_id_offset);
starts[i] = bin.start();
ends[i] = bin.end();
++i;
});
},
_bins->get());

return make_bin_table_df(chrom_names(), std::move(chrom_ids), std::move(starts), std::move(ends),
std::move(bin_ids));
return make_bin_table_df(chrom_names(false), std::move(chrom_ids), std::move(starts),
std::move(ends), std::move(bin_ids));
}

std::shared_ptr<const hictk::BinTable> BinTable::get() const noexcept { return _bins; }
Expand Down
10 changes: 10 additions & 0 deletions src/cooler_file_writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,14 @@ const hictk::Reference &CoolerFileWriter::chromosomes() const {
return ref;
}

std::shared_ptr<const hictk::BinTable> CoolerFileWriter::bins_ptr() const noexcept {
if (!_w) {
return {};
}

return _w->bins_ptr();
}

void CoolerFileWriter::add_pixels(const nb::object &df) {
if (!_w.has_value()) {
throw std::runtime_error(
Expand Down Expand Up @@ -196,6 +204,8 @@ void CoolerFileWriter::bind(nb::module_ &m) {
nb::arg("include_ALL") = false,
"Get chromosomes sizes as a dictionary mapping names to sizes.",
nb::rv_policy::take_ownership);
writer.def("bins", &get_bins_from_object<hictkpy::CoolerFileWriter>, "Get table of bins.",
nb::sig("def bins(self) -> hictkpy.BinTable"), nb::rv_policy::move);

writer.def("add_pixels", &hictkpy::CoolerFileWriter::add_pixels,
nb::sig("def add_pixels(self, pixels: pandas.DataFrame)"), nb::arg("pixels"),
Expand Down
5 changes: 3 additions & 2 deletions src/file.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ static nb::dict get_hic_attrs(const hictk::hic::File &hf) {

py_attrs["bin_size"] = hf.resolution();
py_attrs["format"] = "HIC";
py_attrs["format_version"] = hf.version();
py_attrs["format-version"] = hf.version();
py_attrs["assembly"] = hf.assembly();
py_attrs["format-url"] = "https://github.com/aidenlab/hic-format";
py_attrs["nbins"] = hf.bins().size();
Expand Down Expand Up @@ -305,7 +305,8 @@ void declare_file_class(nb::module_ &m) {

file.def("resolution", &hictk::File::resolution, "Get the bin size in bp.");
file.def("nbins", &hictk::File::nbins, "Get the total number of bins.");
file.def("nchroms", &hictk::File::nchroms, "Get the total number of chromosomes.");
file.def("nchroms", &hictk::File::nchroms, nb::arg("include_ALL") = false,
"Get the total number of chromosomes.");

file.def("attributes", &file::attributes, "Get file attributes as a dictionary.",
nb::rv_policy::take_ownership);
Expand Down
6 changes: 6 additions & 0 deletions src/hic_file_writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,10 @@ auto HiCFileWriter::resolutions() const {

const hictk::Reference &HiCFileWriter::chromosomes() const { return _w.chromosomes(); }

hictkpy::BinTable HiCFileWriter::bins(std::uint32_t resolution) const {
return hictkpy::BinTable{_w.bins(resolution)};
}

void HiCFileWriter::add_pixels(const nb::object &df) {
if (_finalized) {
throw std::runtime_error(
Expand Down Expand Up @@ -184,6 +188,8 @@ void HiCFileWriter::bind(nb::module_ &m) {
nb::arg("include_ALL") = false,
"Get chromosomes sizes as a dictionary mapping names to sizes.",
nb::rv_policy::take_ownership);
writer.def("bins", &hictkpy::HiCFileWriter::bins, "Get table of bins for the given resolution.",
nb::sig("def bins(self, resolution: int) -> hictkpy.BinTable"), nb::rv_policy::move);

writer.def("add_pixels", &hictkpy::HiCFileWriter::add_pixels,
nb::sig("def add_pixels(self, pixels: pd.DataFrame) -> None"), nb::arg("pixels"),
Expand Down
1 change: 1 addition & 0 deletions src/include/hictkpy/cooler_file_writer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ class CoolerFileWriter {
[[nodiscard]] std::uint32_t resolution() const noexcept;

[[nodiscard]] const hictk::Reference& chromosomes() const;
[[nodiscard]] std::shared_ptr<const hictk::BinTable> bins_ptr() const noexcept;

void add_pixels(const nanobind::object& df);

Expand Down
1 change: 1 addition & 0 deletions src/include/hictkpy/hic_file_writer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ class HiCFileWriter {
[[nodiscard]] auto resolutions() const;

[[nodiscard]] const hictk::Reference& chromosomes() const;
[[nodiscard]] hictkpy::BinTable bins(std::uint32_t resolution) const;

void add_pixels(const nanobind::object& df);

Expand Down
7 changes: 3 additions & 4 deletions src/include/hictkpy/pixel_selector.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,11 +48,10 @@ struct PixelSelector {

[[nodiscard]] std::string repr() const;

using PixelCoordTuple =
std::tuple<std::string, std::int32_t, std::int32_t, std::string, std::int32_t, std::int32_t>;
using GenomicCoordTuple = std::tuple<std::string, std::int64_t, std::int64_t>;

[[nodiscard]] auto get_coord1() const -> PixelCoordTuple;
[[nodiscard]] auto get_coord2() const -> PixelCoordTuple;
[[nodiscard]] auto get_coord1() const -> GenomicCoordTuple;
[[nodiscard]] auto get_coord2() const -> GenomicCoordTuple;

[[nodiscard]] nanobind::iterator make_iterable() const;
[[nodiscard]] nanobind::object to_arrow(std::string_view span = "upper_triangle") const;
Expand Down
42 changes: 42 additions & 0 deletions src/multires_file.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,43 @@ static std::string repr(const hictk::MultiResFile& mrf) {

static std::filesystem::path get_path(const hictk::MultiResFile& mrf) { return mrf.path(); }

static nb::dict get_attrs(const hictk::hic::File& hf) {
nb::dict py_attrs;

py_attrs["format"] = "HIC";
py_attrs["format-version"] = hf.version();
py_attrs["assembly"] = hf.assembly();
py_attrs["format-url"] = "https://github.com/aidenlab/hic-format";
py_attrs["nchroms"] = hf.nchroms();

for (const auto& [k, v] : hf.attributes()) {
py_attrs[nb::cast(k)] = v;
}

return py_attrs;
}

static nb::dict get_attrs(const hictk::cooler::MultiResFile& mclr) {
nb::dict py_attrs;

py_attrs["format"] = mclr.attributes().format;
py_attrs["format-version"] = mclr.attributes().format_version;
py_attrs["format-url"] = "https://github.com/open2c/cooler";
py_attrs["assembly"] =
mclr.open(mclr.resolutions().front()).attributes().assembly.value_or("unknown");
py_attrs["nchroms"] = mclr.chromosomes().size();

return py_attrs;
}

static nb::dict attributes(const hictk::MultiResFile& f) {
auto attrs = f.is_hic() ? get_attrs(f.open(f.resolutions().front()).get<hictk::hic::File>())
: get_attrs(hictk::cooler::MultiResFile{f.path()});
attrs["resolutions"] = f.resolutions();

return attrs;
}

bool is_mcool_file(const std::filesystem::path& path) {
return bool(hictk::cooler::utils::is_multires_file(path.string()));
}
Expand All @@ -54,12 +91,17 @@ void declare_multires_file_class(nb::module_& m) {
mres_file.def("__repr__", &multires_file::repr, nb::rv_policy::move);

mres_file.def("path", &multires_file::get_path, "Get the file path.", nb::rv_policy::move);
mres_file.def("is_mcool", &hictk::MultiResFile::is_mcool,
"Test whether the file is in .mcool format.");
mres_file.def("is_hic", &hictk::MultiResFile::is_hic, "Test whether the file is in .hic format.");
mres_file.def("chromosomes", &get_chromosomes_from_object<hictk::MultiResFile>,
nb::arg("include_ALL") = false,
"Get chromosomes sizes as a dictionary mapping names to sizes.",
nb::rv_policy::take_ownership);
mres_file.def("resolutions", &get_resolutions, "Get the list of available resolutions.",
nb::rv_policy::take_ownership);
mres_file.def("attributes", &multires_file::attributes, "Get file attributes as a dictionary.",
nb::rv_policy::take_ownership);
mres_file.def("__getitem__", &hictk::MultiResFile::open,
"Open the Cooler or .hic file corresponding to the resolution given as input.",
nb::rv_policy::move);
Expand Down
28 changes: 19 additions & 9 deletions src/pixel_selector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,9 @@ std::string PixelSelector::repr() const {
count_type_to_str(pixel_count));
}

return fmt::format(FMT_STRING("PixelSelector({}, {}; {}; {})"), coord1(), coord2(),
return fmt::format(FMT_STRING("PixelSelector({}:{}-{}; {}:{}-{}; {}; {})"),
coord1().bin1.chrom().name(), coord1().bin1.start(), coord1().bin2.end(),
coord2().bin1.chrom().name(), coord2().bin1.start(), coord2().bin2.end(),
pixel_format == PixelFormat::COO ? "COO" : "BG2",
count_type_to_str(pixel_count));
}
Expand Down Expand Up @@ -105,16 +107,24 @@ const hictk::BinTable& PixelSelector::bins() const noexcept {
return std::visit([](const auto& s) -> const hictk::BinTable& { return s->bins(); }, selector);
}

auto PixelSelector::get_coord1() const -> PixelCoordTuple {
const auto c = coord1();
return PixelCoordTuple{std::make_tuple(c.bin1.chrom().name(), c.bin1.start(), c.bin1.end(),
c.bin2.chrom().name(), c.bin2.start(), c.bin2.end())};
[[nodiscard]] static PixelSelector::GenomicCoordTuple coords_to_tuple(
const hictk::PixelCoordinates& coords, const hictk::BinTable& bins) {
if (!coords) {
return {"ALL", 0, static_cast<std::int64_t>(bins.size())};
}

assert(coords.bin1.chrom() == coords.bin2.chrom());

return {std::string{coords.bin1.chrom().name()}, static_cast<std::int64_t>(coords.bin1.start()),
static_cast<std::int64_t>(coords.bin2.end())};
}

auto PixelSelector::get_coord1() const -> GenomicCoordTuple {
return coords_to_tuple(coord1(), bins());
}

auto PixelSelector::get_coord2() const -> PixelCoordTuple {
const auto c = coord2();
return PixelCoordTuple{std::make_tuple(c.bin1.chrom().name(), c.bin1.start(), c.bin1.end(),
c.bin2.chrom().name(), c.bin2.start(), c.bin2.end())};
auto PixelSelector::get_coord2() const -> GenomicCoordTuple {
return coords_to_tuple(coord2(), bins());
}

template <typename N, typename PixelSelector>
Expand Down
9 changes: 4 additions & 5 deletions test/test_bin_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,17 +61,16 @@ def test_getters(self):
bins.get_id("abc", 100)

@pytest.mark.skipif(
not numpy_avail() or not pandas_avail() or not pyarrow_avail(),
reason="numpy, pandas, or pyarrow are not available",
not pandas_avail() or not pyarrow_avail(),
reason="pandas or pyarrow are not available",
)
def test_vectorized_getters(self):
import numpy as np

chroms = {"chr1": 1000, "chr2": 500}
bins = hictkpy.BinTable(chroms, 100)

assert len(bins.get(np.array([1, 1]))) == 2
assert len(bins.get_ids(np.array(["chr1", "chr1"]), np.array([1, 1]))) == 2
assert len(bins.get([1, 1])) == 2
assert len(bins.get_ids(["chr1", "chr1"], [1, 1])) == 2

@pytest.mark.skipif(not pandas_avail() or not pyarrow_avail(), reason="pandas is not available")
def test_merge(self):
Expand Down
2 changes: 1 addition & 1 deletion test/test_file_accessors.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ class TestClass:
def test_attributes(self, file, resolution):
f = hictkpy.File(file, resolution)
assert f.resolution() == 100_000
# assert f.nchroms() == 8 # TODO enable after merging https://github.com/paulsengroup/hictk/pull/294
assert f.nchroms() == 8
assert f.nbins() == 1380

assert "chr2L" in f.chromosomes()
Expand Down
23 changes: 19 additions & 4 deletions test/test_file_creation_cool.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,21 @@ def setup_method():
logging.basicConfig(level="INFO", force=True)
logging.getLogger().setLevel("INFO")

def test_accessors(self, file, resolution, tmpdir):
bins = hictkpy.File(file, resolution).bins()

path = tmpdir / "test.cool"
w = hictkpy.cooler.FileWriter(path, bins)

assert str(w).startswith("CoolFileWriter(")
assert w.path() == path
if resolution is None:
assert w.resolution() == 0
else:
assert w.resolution() == resolution
assert w.chromosomes() == bins.chromosomes()
assert len(w.bins().to_df().compare(bins.to_df())) == 0

def test_file_creation_thin_pixel(self, file, resolution, tmpdir):
f = hictkpy.File(file, resolution)
if f.bins().type() != "fixed":
Expand All @@ -39,7 +54,7 @@ def test_file_creation_thin_pixel(self, file, resolution, tmpdir):
df = f.fetch(join=False).to_df()
expected_sum = df["count"].sum()

path = tmpdir / "test1.cool"
path = tmpdir / "test.cool"
w = hictkpy.cooler.FileWriter(path, f.chromosomes(), f.resolution())

chunk_size = 1000
Expand All @@ -66,7 +81,7 @@ def test_file_creation(self, file, resolution, tmpdir):
df = f.fetch(join=True).to_df()
expected_sum = df["count"].sum()

path = tmpdir / "test2.cool"
path = tmpdir / "test.cool"
w = hictkpy.cooler.FileWriter(path, f.chromosomes(), f.resolution())

chunk_size = 1000
Expand All @@ -91,7 +106,7 @@ def test_file_creation_bin_table(self, file, resolution, tmpdir):
df = f.fetch(join=True).to_df()
expected_sum = df["count"].sum()

path = tmpdir / "test2.cool"
path = tmpdir / "test.cool"
w = hictkpy.cooler.FileWriter(path, f.bins())

chunk_size = 1000
Expand Down Expand Up @@ -119,7 +134,7 @@ def test_file_creation_float_counts(self, file, resolution, tmpdir):
df["count"] += 0.12345
expected_sum = df["count"].sum()

path = tmpdir / "test3.cool"
path = tmpdir / "test.cool"
w = hictkpy.cooler.FileWriter(path, f.chromosomes(), f.resolution())

chunk_size = 1000
Expand Down
20 changes: 17 additions & 3 deletions test/test_file_creation_hic.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,20 @@ def setup_method():
logging.basicConfig(level="INFO", force=True)
logging.getLogger().setLevel("INFO")

def test_accessors(self, file, resolution, tmpdir):
bins = hictkpy.File(file, resolution).bins()
if bins.type() != "fixed":
pytest.skip(f'BinTable of file "{file}" does not have fixed bins.')

path = tmpdir / "test.hic"
w = hictkpy.hic.FileWriter(path, bins)

assert str(w).startswith("HiCFileWriter(")
assert w.path() == path
assert w.resolutions() == [resolution]
assert w.chromosomes() == bins.chromosomes()
assert len(w.bins(resolution).to_df().compare(bins.to_df())) == 0

def test_file_creation_thin_pixel(self, file, resolution, tmpdir):
f = hictkpy.File(file, resolution)
if f.bins().type() != "fixed":
Expand All @@ -39,7 +53,7 @@ def test_file_creation_thin_pixel(self, file, resolution, tmpdir):
df = f.fetch(join=False).to_df()
expected_sum = df["count"].sum()

path = tmpdir / "test1.hic"
path = tmpdir / "test.hic"
w = hictkpy.hic.FileWriter(path, f.chromosomes(), f.resolution())

chunk_size = 1000
Expand All @@ -66,7 +80,7 @@ def test_file_creation(self, file, resolution, tmpdir):
df = f.fetch(join=True).to_df()
expected_sum = df["count"].sum()

path = tmpdir / "test2.hic"
path = tmpdir / "test.hic"
w = hictkpy.hic.FileWriter(path, f.chromosomes(), f.resolution())

chunk_size = 1000
Expand All @@ -91,7 +105,7 @@ def test_file_creation_bin_table(self, file, resolution, tmpdir):
df = f.fetch(join=True).to_df()
expected_sum = df["count"].sum()

path = tmpdir / "test2.hic"
path = tmpdir / "test.hic"
if f.bins().type() != "fixed":
with pytest.raises(Exception):
hictkpy.hic.FileWriter(path, f.bins())
Expand Down
Loading

0 comments on commit 116defe

Please sign in to comment.