Skip to content

Commit

Permalink
minor to info.cpp
Browse files Browse the repository at this point in the history
  • Loading branch information
jermp committed Aug 18, 2024
1 parent fdfa686 commit 9158671
Showing 1 changed file with 18 additions and 9 deletions.
27 changes: 18 additions & 9 deletions include/info.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,12 +44,20 @@ double bits_per_kmer_formula(uint64_t k, /* kmer length */
/* summing (M-1) provides an upper bound to the num. of super-kmers */
double Z = (2.0 * n) / (k - m + 2) + (M - 1);

double num_bits = 2 * N + Z * (5.0 + std::ceil(std::log2(N))) +
M * (2.0 + std::ceil(std::log2(static_cast<double>(N) / M)));
/* A cache line is 64 B = 512 bits -->
max window_size that fits in a cache line is 512/2 = 256
assuming a 2-bit encoded stream. */
const uint64_t window_size = 1; /* 256; */

double num_bits =
2 * N + Z * (5.0 + std::ceil(std::log2(std::ceil(static_cast<double>(N) / window_size)))) +
M * (2.0 + std::ceil(std::log2(static_cast<double>(N) / M)));

return num_bits / n;
}

double perc(uint64_t amount, uint64_t total) { return (amount * 100.0) / total; }

void dictionary::print_space_breakdown() const {
const uint64_t num_bytes = (num_bits() + 7) / 8;
std::cout << "total index size: " << num_bytes << " [B] -- "
Expand All @@ -58,20 +66,21 @@ void dictionary::print_space_breakdown() const {
std::cout << " minimizers: " << static_cast<double>(m_minimizers.num_bits()) / size()
<< " [bits/kmer] ("
<< static_cast<double>(m_minimizers.num_bits()) / m_minimizers.size()
<< " [bits/key])\n";
<< " [bits/key]) -- " << perc(m_minimizers.num_bits(), num_bits()) << "%\n";
std::cout << " pieces: " << static_cast<double>(m_buckets.pieces.num_bits()) / size()
<< " [bits/kmer]\n";
<< " [bits/kmer] -- " << perc(m_buckets.pieces.num_bits(), num_bits()) << "%\n";
std::cout << " num_super_kmers_before_bucket: "
<< static_cast<double>(m_buckets.num_super_kmers_before_bucket.num_bits()) / size()
<< " [bits/kmer]\n";
<< " [bits/kmer] -- "
<< perc(m_buckets.num_super_kmers_before_bucket.num_bits(), num_bits()) << "%\n";
std::cout << " offsets: " << static_cast<double>(8 * m_buckets.offsets.bytes()) / size()
<< " [bits/kmer]\n";
<< " [bits/kmer] -- " << perc(8 * m_buckets.offsets.bytes(), num_bits()) << "%\n";
std::cout << " strings: " << static_cast<double>(8 * m_buckets.strings.bytes()) / size()
<< " [bits/kmer]\n";
<< " [bits/kmer] -- " << perc(8 * m_buckets.strings.bytes(), num_bits()) << "%\n";
std::cout << " skew_index: " << static_cast<double>(m_skew_index.num_bits()) / size()
<< " [bits/kmer]\n";
<< " [bits/kmer] -- " << perc(m_skew_index.num_bits(), num_bits()) << "%\n";
std::cout << " weights: " << static_cast<double>(m_weights.num_bits()) / size()
<< " [bits/kmer]\n";
<< " [bits/kmer] -- " << perc(m_weights.num_bits(), num_bits()) << "%\n";
m_weights.print_space_breakdown(size());
std::cout << " --------------\n";
std::cout << " total: " << static_cast<double>(num_bits()) / size() << " [bits/kmer]"
Expand Down

0 comments on commit 9158671

Please sign in to comment.