diff --git a/.travis.yml b/.travis.yml
index 7dbec261b..b99e99d6f 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -96,9 +96,9 @@ matrix:
             - *default-packages
             - clang-3.8
 
-    # OS X 10.10 + Xcode 7.1.1
+    # OS X 10.10 + Xcode 6.4 (this is the only 10.10 image on Travis)
     - os: osx
-      osx_image: xcode7.1
+      osx_image: xcode6.4
       env: COMPILER=clang
 
     # OS X 10.11 + Xcode 7.3
@@ -106,9 +106,9 @@ matrix:
       osx_image: xcode7.3
       env: COMPILER=clang
 
-    # OS X 10.11 + Xcode 8
+    # OS X 10.12 + Xcode 8.2
     - os: osx
-      osx_image: xcode8
+      osx_image: xcode8.2
       env: COMPILER=clang
 
     # OS X/GCC 6
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 15a5eec81..c411600c9 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,22 @@
+# [v3.0.1][3.0.1]
+## New features
+- Add an optional `xz{i,o}fstream` to `meta::io` if compiled with liblzma
+  available.
+- `util::disk_vector<const T>` can now be used to specify a read-only view
+  of a disk-backed vector.
+
+## Bug fixes
+- `ir_eval::print_stats` now takes a `num_docs` parameter to properly
+  display evaluation metrics at a certain cutoff point, which was always 5
+  beforehand. This fixes a bug in `query-runner` where the stats were not
+  being computed according to the cutoff point specified in the
+  configuration.
+- `ir_eval::avg_p` now correctly stops computing after `num_docs`. Before,
+  if you specified `num_docs` as a smaller value than the size of the
+  result list, it would erroneously keep calculating until the end of the
+  result list instead of stopping after `num_docs` elements.
+- `{inverted,forward}_index` can now be loaded from read-only filesystems.
+
 # [v3.0.0][3.0.0]
 ## New features
 - Add an `embedding_analyzer` that represents documents with their averaged word
@@ -609,7 +628,8 @@
 # [v1.0][1.0]
 - Initial release.
 
-[unreleased]: https://github.com/meta-toolkit/meta/compare/v3.0.0...develop
+[unreleased]: https://github.com/meta-toolkit/meta/compare/v3.0.1...develop
+[3.0.1]: https://github.com/meta-toolkit/meta/compare/v3.0.0...v3.0.1
 [3.0.0]: https://github.com/meta-toolkit/meta/compare/v2.4.2...v3.0.0
 [2.4.2]: https://github.com/meta-toolkit/meta/compare/v2.4.1...v2.4.2
 [2.4.1]: https://github.com/meta-toolkit/meta/compare/v2.4.0...v2.4.1
diff --git a/CMakeLists.txt b/CMakeLists.txt
index f72ab0e81..69175d035 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -9,7 +9,7 @@ set(CMAKE_EXPORT_COMPILE_COMMANDS 1)
 
 set(MeTA_VERSION_MAJOR 3)
 set(MeTA_VERSION_MINOR 0)
-set(MeTA_VERSION_PATCH 0)
+set(MeTA_VERSION_PATCH 1)
 set(MeTA_VERSION
     "${MeTA_VERSION_MAJOR}.${MeTA_VERSION_MINOR}.${MeTA_VERSION_PATCH}")
 
@@ -29,6 +29,11 @@ include(deps/meta-cmake/CompilerKludges.cmake)
 
 find_package(Threads REQUIRED)
 find_package(ZLIB REQUIRED)
+find_package(LibLZMA)
+
+if (LIBLZMA_FOUND AND LIBLZMA_HAS_EASY_ENCODER)
+  set(META_HAS_LIBLZMA ON)
+endif()
 
 cmake_push_check_state()
 
@@ -68,6 +73,10 @@ endif()
 
 target_include_directories(meta-definitions SYSTEM INTERFACE ${ZLIB_INCLUDE_DIRS})
 
+if (META_HAS_LIBLZMA)
+  target_compile_definitions(meta-definitions INTERFACE -DMETA_HAS_LIBLZMA=1)
+endif()
+
 if (LIBDL_LIBRARY)
   target_link_libraries(meta-definitions INTERFACE ${LIBDL_LIBRARY})
 endif()
diff --git a/RELEASING.md b/RELEASING.md
index 6bdab06b2..8b1133f99 100644
--- a/RELEASING.md
+++ b/RELEASING.md
@@ -12,27 +12,29 @@ follow a consistent releasing process.
    changes (like enhancements) increment the Minor release number. Patch
    versions should be released only for bug fixes.
 
-2. Ensure `CHANGELOG.md` is up to date.
+2. Update the version number in `CMakeLists.txt`.
+
+3. Ensure `CHANGELOG.md` is up to date.
 
    If there are *any* breaking changes, mention these explicitly. If there
    are migration strategies to work around these breaking changes, provide
    a brief explanation (or a link to explain them).
 
-3. If there are major *or* minor API changes, ensure that the documentation
+4. If there are major *or* minor API changes, ensure that the documentation
    on the website (meta-toolkit/meta-toolkit.org) is correct.
 
    Update Doxygen as necessary.
 
-4. Ensure that the build is passing on both Travis (Linux + OS X) and
+5. Ensure that the build is passing on both Travis (Linux + OS X) and
    Appveyor (Windows/MinGW-w64).
 
-5. Merge branch `develop` into `master` with a commit message
+6. Merge branch `develop` into `master` with a commit message
 
    > Merge branch 'develop' for MeTA vX.Y.Z
 
    Use `git merge develop --no-ff` to create a merge commit.
 
-6. Tag the merge commit. The tag should be both annotated *and* signed:
+7. Tag the merge commit. The tag should be both annotated *and* signed:
 
    ```
    git tag -as vX.Y.Z
@@ -42,17 +44,17 @@ follow a consistent releasing process.
    version. Remove unnecessary markdown syntax like header markers and code
    blocks. Backticks can stay.
 
-7. Push the merge and the tags to GitHub:
+8. Push the merge and the tags to GitHub:
 
    ```
    git push --follow-tags
    ```
 
-8. Create a release on GitHub using the new tag. Its title should be "MeTA
+9. Create a release on GitHub using the new tag. Its title should be "MeTA
    vX.Y.Z".
 
    The contents of the message should be exactly the same as the CHANGELOG
    entry for that release.
 
-9. Upload the model files and include a section in the GitHub release notes
-   containing their sha256 sums.
+10. Upload the model files and include a section in the GitHub release notes
+    containing their sha256 sums.
diff --git a/include/meta/index/disk_index_impl.h b/include/meta/index/disk_index_impl.h
index 3d37e67d5..5ecabec41 100644
--- a/include/meta/index/disk_index_impl.h
+++ b/include/meta/index/disk_index_impl.h
@@ -66,7 +66,7 @@ class disk_index::disk_index_impl
      * Loads the doc labels.
      * @param num_docs The number of documents stored in the index
      */
-    void load_labels(uint64_t num_docs = 0);
+    void load_labels();
 
     /**
      * Loads the term_id mapping.
@@ -83,13 +83,6 @@ class disk_index::disk_index_impl
      */
     void save_label_id_mapping();
 
-    /**
-     * Sets the label for a document.
-     * @param id The document id
-     * @param label The new label
-     */
-    void set_label(doc_id id, const class_label& label);
-
     /**
      * @return the total number of unique terms in the index.
      */
@@ -106,7 +99,6 @@ class disk_index::disk_index_impl
      */
     std::vector<class_label> class_labels() const;
 
-  private:
     /**
      * @param lbl the string class label to find the id for
      * @return the label_id of a class_label, creating a new one if
@@ -114,6 +106,7 @@ class disk_index::disk_index_impl
      */
     label_id get_label_id(const class_label& lbl);
 
+  private:
     /// the location of this index
     std::string index_name_;
 
@@ -121,7 +114,7 @@ class disk_index::disk_index_impl
      * Maps which class a document belongs to (if any).
      * Each index corresponds to a doc_id (uint64_t).
      */
-    util::optional<util::disk_vector<label_id>> labels_;
+    util::optional<util::disk_vector<const label_id>> labels_;
 
     /// Stores additional metadata for each document
     util::optional<metadata_file> metadata_;
diff --git a/include/meta/index/eval/ir_eval.h b/include/meta/index/eval/ir_eval.h
index 32b9969ed..594168489 100644
--- a/include/meta/index/eval/ir_eval.h
+++ b/include/meta/index/eval/ir_eval.h
@@ -111,9 +111,11 @@ class ir_eval
      * @param results The ranked list of results
      * @param q_id The query that was run to produce these results
      * @param out The stream to print to
+     * @param num_docs the @k parameters for each measurement
      */
     void print_stats(const result_type& results, query_id q_id,
-                     std::ostream& out = std::cout);
+                     std::ostream& out = std::cout,
+                     uint64_t num_docs = std::numeric_limits<uint64_t>::max());
 
     /**
      * Clears saved scores for MAP and gMAP.
diff --git a/include/meta/index/metadata_file.h b/include/meta/index/metadata_file.h
index 4bfb93e69..5524e263f 100644
--- a/include/meta/index/metadata_file.h
+++ b/include/meta/index/metadata_file.h
@@ -76,7 +76,7 @@ class metadata_file
     corpus::metadata::schema_type schema_;
 
     /// the seek positions for every document in this file
-    util::disk_vector<uint64_t> index_;
+    util::disk_vector<const uint64_t> index_;
 
     /// the mapped file for reading metadata from
     io::mmap_file md_db_;
diff --git a/include/meta/index/postings_file.h b/include/meta/index/postings_file.h
index 1dac85acd..47d682195 100644
--- a/include/meta/index/postings_file.h
+++ b/include/meta/index/postings_file.h
@@ -82,7 +82,7 @@ class postings_file
 
   private:
     io::mmap_file postings_;
-    util::disk_vector<uint64_t> byte_locations_;
+    util::disk_vector<const uint64_t> byte_locations_;
 };
 }
 }
diff --git a/include/meta/index/vocabulary_map.h b/include/meta/index/vocabulary_map.h
index e46d8c063..120f22ddf 100644
--- a/include/meta/index/vocabulary_map.h
+++ b/include/meta/index/vocabulary_map.h
@@ -38,7 +38,7 @@ class vocabulary_map
      * Byte positions for each term in the leaves to allow for reverse
      * lookup of a the string associated with a given id.
      */
-    util::disk_vector<uint64_t> inverse_;
+    util::disk_vector<const uint64_t> inverse_;
 
     /**
      * The size of the nodes in the tree.
diff --git a/include/meta/io/xzstream.h b/include/meta/io/xzstream.h
new file mode 100644
index 000000000..dc2c43c5c
--- /dev/null
+++ b/include/meta/io/xzstream.h
@@ -0,0 +1,104 @@
+/**
+ * @file xzstream.h
+ * @author Chase Geigle
+ *
+ * All files in META are dual-licensed under the MIT and NCSA licenses. For more
+ * details, consult the file LICENSE.mit and LICENSE.ncsa in the root of the
+ * project.
+ */
+
+#ifndef META_UTIL_XZSTREAM_H_
+#define META_UTIL_XZSTREAM_H_
+
+#include <lzma.h>
+
+#include <cstdio>
+#include <istream>
+#include <ostream>
+#include <stdexcept>
+#include <streambuf>
+#include <vector>
+
+#include "meta/config.h"
+
+namespace meta
+{
+namespace io
+{
+
+class xz_exception : public std::runtime_error
+{
+  public:
+    xz_exception(const std::string& msg, lzma_ret code)
+        : std::runtime_error{msg}, code_{code}
+    {
+        // nothing
+    }
+
+    explicit operator lzma_ret() const
+    {
+        return code_;
+    }
+
+  private:
+    lzma_ret code_;
+};
+
+class xzstreambuf : public std::streambuf
+{
+  public:
+    xzstreambuf(const char* filename, const char* openmode,
+                std::size_t buffer_size = 128 * 1024);
+
+    ~xzstreambuf();
+
+    int_type underflow() override;
+
+    int_type overflow(int_type ch) override;
+
+    int sync() override;
+
+    bool is_open() const;
+
+    uint64_t bytes_read() const;
+
+  private:
+    bool reading_;
+    std::vector<char> in_buffer_;
+    std::vector<char> out_buffer_;
+    FILE* file_;
+    uint64_t bytes_read_;
+    lzma_stream stream_;
+    lzma_action action_;
+};
+
+class xzifstream : public std::istream
+{
+  public:
+    explicit xzifstream(std::string name);
+
+    xzstreambuf* rdbuf() const;
+
+    void flush();
+
+    uint64_t bytes_read() const;
+
+  private:
+    xzstreambuf buffer_;
+};
+
+class xzofstream : public std::ostream
+{
+  public:
+    explicit xzofstream(std::string name);
+
+    xzstreambuf* rdbuf() const;
+
+    void flush();
+
+  private:
+    xzstreambuf buffer_;
+};
+}
+}
+#endif
diff --git a/include/meta/parser/trees/parse_tree.h b/include/meta/parser/trees/parse_tree.h
index 1d2c81766..5413858c4 100644
--- a/include/meta/parser/trees/parse_tree.h
+++ b/include/meta/parser/trees/parse_tree.h
@@ -22,12 +22,6 @@ namespace parser
  * Represents the parse tree for a sentence. This may either be a sentence
  * parsed from training data, or the output from a trained parser on test
  * data.
- *
- * @todo determine what parts of analyzers::parse_tree are worth
- * keeping---that class deals specifically with trees read from the output
- * of the Stanford parser. When we have our own, we may still want some of
- * that functionality to allow people to use parsers that are not our
- * own?
  */
 class parse_tree
 {
diff --git a/include/meta/util/disk_vector.h b/include/meta/util/disk_vector.h
index a9ad16359..795dc4456 100644
--- a/include/meta/util/disk_vector.h
+++ b/include/meta/util/disk_vector.h
@@ -70,6 +70,8 @@ class disk_vector
      * @return a reference to the element at position idx in the vector
      * container
      */
+    template <class U = T,
+              class = typename std::enable_if<!std::is_const<U>::value>::type>
     T& operator[](uint64_t idx);
 
     /**
@@ -88,6 +90,8 @@ class disk_vector
      * (i.e., if idx is greater or equal than its size). This is in contrast
      * with member operator[], that does not check against bounds.
      */
+    template <class U = T,
+              class = typename std::enable_if<!std::is_const<U>::value>::type>
     T& at(uint64_t idx);
 
     /**
@@ -112,6 +116,8 @@ class disk_vector
     /**
      * @return an iterator to the beginning of this container
      */
+    template <class U = T,
+              class = typename std::enable_if<!std::is_const<U>::value>::type>
     iterator begin();
 
     /**
@@ -123,6 +129,8 @@ class disk_vector
     /**
      * @return an iterator to the end of this container
      */
+    template <class U = T,
+              class = typename std::enable_if<!std::is_const<U>::value>::type>
     iterator end();
 
     /**
diff --git a/include/meta/util/disk_vector.tcc b/include/meta/util/disk_vector.tcc
index 656e01e68..7c62d4ca4 100644
--- a/include/meta/util/disk_vector.tcc
+++ b/include/meta/util/disk_vector.tcc
@@ -3,9 +3,9 @@
  * @author Sean Massung
  */
 
-#include <sys/stat.h>
 #include "meta/io/filesystem.h"
 #include "meta/util/disk_vector.h"
+#include <sys/stat.h>
 
 namespace meta
 {
@@ -16,7 +16,14 @@ template <class T>
 disk_vector<T>::disk_vector(const std::string& path, uint64_t size /* = 0 */)
     : path_{path}, start_{nullptr}, size_{size}, file_desc_{-1}
 {
-    file_desc_ = open(path_.c_str(), O_RDWR | O_CREAT, S_IRUSR | S_IWUSR);
+    if (std::is_const<T>::value)
+    {
+        file_desc_ = open(path_.c_str(), O_RDONLY);
+    }
+    else
+    {
+        file_desc_ = open(path_.c_str(), O_RDWR | O_CREAT, S_IRUSR | S_IWUSR);
+    }
     if (file_desc_ < 0)
         throw disk_vector_exception{"error obtaining file descriptor for "
                                     + path_};
@@ -30,6 +37,9 @@ disk_vector<T>::disk_vector(const std::string& path, uint64_t size /* = 0 */)
         // end and writing a byte
         if (actual_size != size_bytes)
         {
+            if (std::is_const<T>::value)
+                throw disk_vector_exception{
+                    "cannot create disk vector when opened in read-only mode"};
             auto offset = static_cast<long>(size_bytes - 1);
             if (lseek(file_desc_, offset, SEEK_SET) == -1)
                 throw disk_vector_exception{"error lseeking to extend file"};
@@ -45,8 +55,11 @@ disk_vector<T>::disk_vector(const std::string& path, uint64_t size /* = 0 */)
             throw disk_vector_exception{"cannot map empty file " + path};
     }
 
-    start_ = (T*)mmap(nullptr, sizeof(T) * size_, PROT_READ | PROT_WRITE,
-                      MAP_SHARED, file_desc_, 0);
+    int prot = PROT_READ;
+    if (!std::is_const<T>::value)
+        prot |= PROT_WRITE;
+    start_
+        = (T*)mmap(nullptr, sizeof(T) * size_, prot, MAP_SHARED, file_desc_, 0);
 
     if (start_ == MAP_FAILED)
         throw disk_vector_exception{"error memory-mapping the file " + path_};
@@ -69,7 +82,8 @@ disk_vector<T>& disk_vector<T>::operator=(disk_vector&& other)
     {
         if (start_)
         {
-            munmap(start_, sizeof(T) * size_);
+            munmap(const_cast<typename std::remove_const<T>::type*>(start_),
+                   sizeof(T) * size_);
             close(file_desc_);
         }
         path_ = std::move(other.path_);
@@ -86,11 +100,13 @@ disk_vector<T>::~disk_vector()
 {
     if (!start_)
         return;
-    munmap(start_, sizeof(T) * size_);
+    munmap(const_cast<typename std::remove_const<T>::type*>(start_),
+           sizeof(T) * size_);
     close(file_desc_);
 }
 
 template <class T>
+template <class, class>
 T& disk_vector<T>::operator[](uint64_t idx)
 {
     return start_[idx];
@@ -103,6 +119,7 @@ const T& disk_vector<T>::operator[](uint64_t idx) const
 }
 
 template <class T>
+template <class, class>
 T& disk_vector<T>::at(uint64_t idx)
 {
     if (idx >= size_)
@@ -129,6 +146,7 @@ uint64_t disk_vector<T>::size() const
 }
 
 template <class T>
+template <class, class>
 auto disk_vector<T>::begin() -> iterator
 {
     return start_;
@@ -147,6 +165,7 @@ auto disk_vector<T>::end() const -> const_iterator
 }
 
 template <class T>
+template <class, class>
 auto disk_vector<T>::end() -> iterator
 {
     return start_ + size_;
diff --git a/src/index/disk_index.cpp b/src/index/disk_index.cpp
index 63b5d3a77..c0dcbfd14 100644
--- a/src/index/disk_index.cpp
+++ b/src/index/disk_index.cpp
@@ -6,12 +6,12 @@
 #include <numeric>
 #include <stdexcept>
 
+#include "meta/analyzers/analyzer.h"
 #include "meta/index/disk_index.h"
 #include "meta/index/disk_index_impl.h"
 #include "meta/index/string_list.h"
 #include "meta/index/string_list_writer.h"
 #include "meta/index/vocabulary_map.h"
-#include "meta/analyzers/analyzer.h"
 #include "meta/util/disk_vector.h"
 #include "meta/util/mapping.h"
 #include "meta/util/optional.h"
@@ -149,15 +149,10 @@ void disk_index::disk_index_impl::initialize_metadata()
     metadata_ = {index_name_};
 }
 
-void disk_index::disk_index_impl::load_labels(uint64_t num_docs)
+void disk_index::disk_index_impl::load_labels()
 {
-    // clear the current label set; this is so that the disk vector can
-    // flush via munmap() if needed
-    labels_ = util::nullopt;
-
-    // load in the new mapping
-    labels_ = util::disk_vector<label_id>{index_name_ + files[DOC_LABELS],
-                                          num_docs};
+    labels_
+        = util::disk_vector<const label_id>{index_name_ + files[DOC_LABELS]};
 }
 
 void disk_index::disk_index_impl::load_term_id_mapping()
@@ -175,11 +170,6 @@ void disk_index::disk_index_impl::save_label_id_mapping()
     map::save_mapping(label_ids_, index_name_ + files[LABEL_IDS_MAPPING]);
 }
 
-void disk_index::disk_index_impl::set_label(doc_id id, const class_label& label)
-{
-    (*labels_)[id] = get_label_id(label);
-}
-
 uint64_t disk_index::disk_index_impl::total_unique_terms() const
 {
     return term_id_mapping_->size();
diff --git a/src/index/eval/ir_eval.cpp b/src/index/eval/ir_eval.cpp
index a93a8c410..5f18a4196 100644
--- a/src/index/eval/ir_eval.cpp
+++ b/src/index/eval/ir_eval.cpp
@@ -184,7 +184,8 @@ double ir_eval::avg_p(const std::vector<search_result>& results, query_id q_id,
         }
         if (num_rel - 1 == total_relevant)
             break;
-        ++i;
+        if (i++ == num_docs)
+            break;
     }
 
     scores_.push_back(avgp / total_relevant);
@@ -217,22 +218,21 @@ double ir_eval::gmap() const
 }
 
 void ir_eval::print_stats(const std::vector<search_result>& results,
-                          query_id q_id, std::ostream& out)
+                          query_id q_id, std::ostream& out, uint64_t num_docs)
 {
     auto w1 = std::setw(8);
     auto w2 = std::setw(6);
     int p = 3;
-    uint64_t max = 5;
     out << w1 << printing::make_bold("  NDCG:") << w2 << std::setprecision(p)
-        << ndcg(results, q_id, max);
+        << ndcg(results, q_id, num_docs);
     out << w1 << printing::make_bold("  Avg. P:") << w2 << std::setprecision(p)
-        << avg_p(results, q_id, max);
+        << avg_p(results, q_id, num_docs);
     out << w1 << printing::make_bold("  F1 Score:") << w2
-        << std::setprecision(p) << f1(results, q_id);
+        << std::setprecision(p) << f1(results, q_id, num_docs);
     out << w1 << printing::make_bold("  Precision:") << w2
-        << std::setprecision(p) << precision(results, q_id, max);
+        << std::setprecision(p) << precision(results, q_id, num_docs);
     out << w1 << printing::make_bold("  Recall:") << w2 << std::setprecision(p)
-        << recall(results, q_id, max);
+        << recall(results, q_id, num_docs);
     out << std::endl;
 }
 
diff --git a/src/index/forward_index.cpp b/src/index/forward_index.cpp
index 5d7edd041..40e1528df 100644
--- a/src/index/forward_index.cpp
+++ b/src/index/forward_index.cpp
@@ -241,8 +241,6 @@ void forward_index::create_index(const cpptoml::table& config,
             metadata_writer mdata_writer{index_name(), docs.size(),
                                          docs.schema()};
 
-            impl_->load_labels(docs.size());
-
             auto max_threads = std::thread::hardware_concurrency();
             auto num_threads = config.get_as<std::size_t>("indexer-num-threads")
                                    .value_or(max_threads);
@@ -261,7 +259,7 @@ void forward_index::create_index(const cpptoml::table& config,
             impl_->save_label_id_mapping();
             fwd_impl_->total_unique_terms_ = impl_->total_unique_terms();
 
-            // reload the label file to ensure it was flushed
+            // reload the label file
             impl_->load_labels();
         }
     }
@@ -309,6 +307,9 @@ void forward_index::impl::tokenize_docs(corpus::corpus& docs,
     bool exceeded_budget = false;
     std::atomic_size_t chunk_id{0};
 
+    util::disk_vector<label_id> labels{
+        idx_->index_name() + idx_->impl_->files[DOC_LABELS], docs.size()};
+
     parallel::thread_pool pool{num_threads};
     corpus::parallel_consume(
         docs, pool,
@@ -342,7 +343,7 @@ void forward_index::impl::tokenize_docs(corpus::corpus& docs,
                 });
 
             mdata_writer.write(doc.id(), length, counts.size(), doc.mdata());
-            idx_->impl_->set_label(doc.id(), doc.label());
+            labels[doc.id()] = idx_->impl_->get_label_id(doc.label());
 
             forward_index::postings_data_type::count_t pd_counts;
             pd_counts.reserve(counts.size());
@@ -447,10 +448,11 @@ void forward_index::impl::create_libsvm_postings(corpus::corpus& docs)
 {
     auto filename = idx_->index_name() + idx_->impl_->files[POSTINGS];
     auto num_docs = docs.size();
-    idx_->impl_->load_labels(num_docs);
 
     total_unique_terms_ = 0;
     {
+        util::disk_vector<label_id> labels{
+            idx_->index_name() + idx_->impl_->files[DOC_LABELS], docs.size()};
         postings_file_writer<forward_index::postings_data_type> out{filename,
                                                                     num_docs};
 
@@ -482,7 +484,7 @@ void forward_index::impl::create_libsvm_postings(corpus::corpus& docs)
 
             md_writer.write(doc.id(), static_cast<uint64_t>(length), num_unique,
                             doc.mdata());
-            idx_->impl_->set_label(doc.id(), doc.label());
+            labels[doc.id()] = idx_->impl_->get_label_id(doc.label());
         }
 
         // +1 since we subtracted one from each of the ids in the
@@ -490,7 +492,7 @@ void forward_index::impl::create_libsvm_postings(corpus::corpus& docs)
         ++total_unique_terms_;
     }
 
-    // reload the label file to ensure it was flushed
+    // load the labels
     idx_->impl_->load_labels();
 
     LOG(info) << "Created compressed postings file ("
diff --git a/src/index/inverted_index.cpp b/src/index/inverted_index.cpp
index 88caeb68f..b5fb0d201 100644
--- a/src/index/inverted_index.cpp
+++ b/src/index/inverted_index.cpp
@@ -137,8 +137,6 @@ void inverted_index::create_index(const cpptoml::table& config,
     postings_inverter<inverted_index> inverter{index_name(), max_writers};
     {
         metadata_writer mdata_writer{index_name(), docs.size(), docs.schema()};
-        uint64_t num_docs = docs.size();
-        impl_->load_labels(num_docs);
 
         // RAM budget is given in megabytes
         inv_impl_->tokenize_docs(docs, inverter, mdata_writer,
@@ -201,6 +199,8 @@ void inverted_index::impl::tokenize_docs(
     corpus::corpus& docs, postings_inverter<inverted_index>& inverter,
     metadata_writer& mdata_writer, uint64_t ram_budget, std::size_t num_threads)
 {
+    util::disk_vector<label_id> labels{
+        idx_->index_name() + idx_->impl_->files[DOC_LABELS], docs.size()};
     std::mutex io_mutex;
     printing::progress progress{" > Tokenizing Docs: ", docs.size()};
     uint64_t local_budget = ram_budget / num_threads;
@@ -237,7 +237,7 @@ void inverted_index::impl::tokenize_docs(
                 });
 
             mdata_writer.write(doc.id(), length, counts.size(), doc.mdata());
-            idx_->impl_->set_label(doc.id(), doc.label());
+            labels[doc.id()] = idx_->impl_->get_label_id(doc.label());
 
             // update chunk
             ls.producer_(doc.id(), counts);
diff --git a/src/index/tools/query_runner.cpp b/src/index/tools/query_runner.cpp
index ec90fc601..ca0dd686f 100644
--- a/src/index/tools/query_runner.cpp
+++ b/src/index/tools/query_runner.cpp
@@ -145,7 +145,8 @@ int main(int argc, char* argv[])
                     break;
             }
             if (!trec_format && eval)
-                eval->print_stats(ranking, query_id{q_id});
+                eval->print_stats(ranking, query_id{q_id}, std::cout,
+                                  max_results);
             ++q_id;
         }
     });
diff --git a/src/index/vocabulary_map.cpp b/src/index/vocabulary_map.cpp
index 9219bcd68..1a8b36817 100644
--- a/src/index/vocabulary_map.cpp
+++ b/src/index/vocabulary_map.cpp
@@ -4,6 +4,7 @@
  */
 
 #include <cstring>
+
 #include "meta/index/vocabulary_map.h"
 #include "meta/util/optional.h"
 
diff --git a/src/io/CMakeLists.txt b/src/io/CMakeLists.txt
index e36257f50..a44baf267 100644
--- a/src/io/CMakeLists.txt
+++ b/src/io/CMakeLists.txt
@@ -10,6 +10,10 @@ if (WIN32)
     list(APPEND META_IO_SOURCES mman-win32/mman.c)
 endif()
 
+if (META_HAS_LIBLZMA)
+    list(APPEND META_IO_SOURCES xzstream.cpp)
+endif()
+
 add_library(meta-io ${META_IO_SOURCES})
 target_link_libraries(meta-io meta-util ${ZLIB_LIBRARIES})
 
@@ -20,6 +24,11 @@ else()
         ${META_PROJECT_SOURCE_DIR}/deps/meta-stlsoft/include)
 endif()
 
+if (META_HAS_LIBLZMA)
+    target_include_directories(meta-io PUBLIC ${LIBLZMA_INCLUDE_DIRS})
+    target_link_libraries(meta-io ${LIBLZMA_LIBRARIES})
+endif()
+
 install(TARGETS meta-io
         EXPORT meta-exports
         DESTINATION lib)
diff --git a/src/io/tools/compressor_test.cpp b/src/io/tools/compressor_test.cpp
index d05176c04..3d829cc06 100644
--- a/src/io/tools/compressor_test.cpp
+++ b/src/io/tools/compressor_test.cpp
@@ -3,26 +3,24 @@
  * @author Chase Geigle
  */
 
+#include "meta/io/gzstream.h"
 #include <array>
-#include <iostream>
 #include <fstream>
-#include "meta/io/gzstream.h"
+#include <iostream>
+#if META_HAS_LIBLZMA
+#include "meta/io/xzstream.h"
+#endif
 
 using namespace meta;
 
-int main(int argc, char** argv)
+template <class InputStream, class OutputStream>
+void test_compressor(const std::string& infile, const std::string& outfile)
 {
-    if (argc < 3)
-    {
-        std::cerr << "Usage: " << argv[0] << " input output" << std::endl;
-        return 1;
-    }
-
     std::array<char, 1024> buffer;
 
     {
-        std::ifstream file{argv[1], std::ios::in | std::ios::binary};
-        io::gzofstream output{argv[2]};
+        std::ifstream file{infile, std::ios::in | std::ios::binary};
+        OutputStream output{outfile};
         while (file)
         {
             file.read(&buffer[0], 1024);
@@ -31,8 +29,8 @@ int main(int argc, char** argv)
     }
 
     {
-        io::gzifstream input{argv[2]};
-        std::ofstream output{std::string{argv[2]} + ".decompressed",
+        InputStream input{outfile};
+        std::ofstream output{outfile + ".decompressed",
                              std::ios::out | std::ios::binary};
 
         while (input)
@@ -41,6 +39,22 @@ int main(int argc, char** argv)
             output.write(&buffer[0], input.gcount());
         }
     }
+}
+
+int main(int argc, char** argv)
+{
+    if (argc < 3)
+    {
+        std::cerr << "Usage: " << argv[0] << " input output" << std::endl;
+        return 1;
+    }
+
+
+    test_compressor<io::gzifstream, io::gzofstream>(argv[1], argv[2]);
+#if META_HAS_LIBLZMA
+    test_compressor<io::xzifstream, io::xzofstream>(
+        argv[1], std::string{argv[2]} + ".xz");
+#endif
 
     return 0;
 }
diff --git a/src/io/xzstream.cpp b/src/io/xzstream.cpp
new file mode 100644
index 000000000..2ff18c1ee
--- /dev/null
+++ b/src/io/xzstream.cpp
@@ -0,0 +1,256 @@
+/**
+ * @file xzstream.cpp
+ * @author Chase Geigle
+ *
+ * Based heavily upon the examples in the xz repo.
+ * @see
+ * http://git.tukaani.org/?p=xz.git;a=blob;f=doc/examples/01_compress_easy.c
+ * @see
+ * http://git.tukaani.org/?p=xz.git;a=blob;f=doc/examples/02_decompress.c
+ */
+
+#include "meta/io/xzstream.h"
+#include "meta/util/string_view.h"
+
+namespace meta
+{
+namespace io
+{
+
+namespace
+{
+
+void throw_if_error(lzma_ret code, std::string msg)
+{
+    switch (code)
+    {
+        case LZMA_OK:
+        case LZMA_STREAM_END:
+            return;
+        case LZMA_MEM_ERROR:
+            throw xz_exception{msg + ": Memory allocation failed", code};
+        case LZMA_FORMAT_ERROR:
+            throw xz_exception{msg + ": Input not in .xz format", code};
+        case LZMA_OPTIONS_ERROR:
+            throw xz_exception{msg + ": Unsupported compression options", code};
+        case LZMA_DATA_ERROR:
+            throw xz_exception{msg + ": Compressed file is corrupt", code};
+        case LZMA_BUF_ERROR:
+            throw xz_exception{
+                msg + ": Compressed file is truncated or corrupt", code};
+        case LZMA_UNSUPPORTED_CHECK:
+            throw xz_exception{
+                msg + ": Specified integrity check is not supported", code};
+        default:
+            throw xz_exception{msg + ": Unknown error", code};
+    }
+}
+}
+
+xzstreambuf::xzstreambuf(const char* filename, const char* openmode,
+                         std::size_t buffer_size)
+    : in_buffer_(buffer_size),
+      out_buffer_(buffer_size),
+      file_{std::fopen(filename, openmode)},
+      bytes_read_{0}
+{
+
+    stream_ = LZMA_STREAM_INIT;
+    action_ = LZMA_RUN;
+    stream_.next_in = nullptr;
+    stream_.avail_in = 0;
+
+    util::string_view mode{openmode};
+    if (mode == "wb")
+    {
+        reading_ = false;
+        setp(&in_buffer_.front(), &in_buffer_.back());
+        throw_if_error(lzma_easy_encoder(&stream_, 6, LZMA_CHECK_CRC64),
+                       "Failed to initialize encoder");
+    }
+    else if (mode == "rb")
+    {
+        auto end = &out_buffer_.back() + 1;
+        setg(end, end, end);
+        reading_ = true;
+
+        throw_if_error(lzma_stream_decoder(
+                           &stream_, std::numeric_limits<uint64_t>::max(), 0),
+                       "Failed to initialize decoder");
+    }
+    else
+    {
+        throw std::runtime_error{"Unrecognized open mode"};
+    }
+
+    stream_.next_out = reinterpret_cast<uint8_t*>(&out_buffer_[0]);
+    stream_.avail_out = out_buffer_.size();
+}
+
+xzstreambuf::~xzstreambuf()
+{
+    if (!reading_)
+    {
+        action_ = LZMA_FINISH;
+        sync();
+    }
+
+    fclose(file_);
+    lzma_end(&stream_);
+}
+
+auto xzstreambuf::underflow() -> int_type
+{
+    if (gptr() && (gptr() < egptr()))
+        return traits_type::to_int_type(*gptr());
+
+    // keep decompressing until we fill the output buffer, reading input
+    // from the internal file as needed
+    lzma_ret ret;
+    do
+    {
+        if (stream_.avail_in == 0 && !std::feof(file_))
+        {
+            stream_.next_in = reinterpret_cast<uint8_t*>(&in_buffer_[0]);
+            stream_.avail_in = std::fread(&in_buffer_[0], sizeof(uint8_t),
+                                          in_buffer_.size(), file_);
+            bytes_read_ += stream_.avail_in;
+
+            if (std::ferror(file_))
+            {
+                setg(&out_buffer_[0], &out_buffer_[0], &out_buffer_[0]);
+                return traits_type::eof();
+            }
+
+            if (std::feof(file_))
+            {
+                action_ = LZMA_FINISH;
+            }
+        }
+
+        ret = lzma_code(&stream_, action_);
+
+        throw_if_error(ret, "Decoder error");
+    } while (stream_.avail_out != 0 && ret != LZMA_STREAM_END);
+
+    // on LZMA_STREAM_END, we might not have filled the entire buffer, so
+    // compute the actual number of bytes we have in the get buffer
+    auto bytes = out_buffer_.size() - stream_.avail_out;
+    if (bytes > 0)
+    {
+        setg(&out_buffer_[0], &out_buffer_[0], &out_buffer_[0] + bytes);
+        stream_.next_out = reinterpret_cast<uint8_t*>(&out_buffer_[0]);
+        stream_.avail_out = out_buffer_.size();
+
+        return traits_type::to_int_type(*gptr());
+    }
+
+    // if we get here, we must have exhausted both the input file and the
+    // input buffer, so finally report EOF
+    setg(&out_buffer_[0], &out_buffer_[0], &out_buffer_[0]);
+    return traits_type::eof();
+}
+
+auto xzstreambuf::overflow(int_type ch) -> int_type
+{
+    if (ch != traits_type::eof())
+    {
+        *pptr() = traits_type::to_char_type(ch);
+        pbump(1);
+        if (sync() == 0)
+            return ch;
+    }
+
+    return traits_type::eof();
+}
+
+int xzstreambuf::sync()
+{
+    auto bytes = pptr() - pbase();
+    stream_.next_in = reinterpret_cast<uint8_t*>(pbase());
+    stream_.avail_in = static_cast<std::size_t>(bytes);
+
+    // Two cases:
+    // 1. We are still compressing the file, in which case we should pump
+    //    the loop until all of the available input bytes are consumed; or
+    //
+    // 2. We are done receiving input (action_ == LZMA_FINISH), in which
+    //    case we should pump the loop until we get the LZMA_STREAM_END
+    //    return code indicating that all input has been processed (note
+    //    that processed != read, hence this second case).
+    lzma_ret ret;
+    do
+    {
+        ret = lzma_code(&stream_, action_);
+
+        if (stream_.avail_out == 0 || ret == LZMA_STREAM_END)
+        {
+            auto size = out_buffer_.size() - stream_.avail_out;
+
+            if (std::fwrite(&out_buffer_[0], sizeof(uint8_t), size, file_)
+                != size)
+                return -1;
+
+            stream_.next_out = reinterpret_cast<uint8_t*>(&out_buffer_[0]);
+            stream_.avail_out = out_buffer_.size();
+        }
+
+        throw_if_error(ret, "Encoder error");
+
+    } while (stream_.avail_in > 0
+             || (action_ == LZMA_FINISH && ret != LZMA_STREAM_END));
+
+    if (bytes > 0)
+        pbump(-static_cast<int>(bytes));
+
+    return 0;
+}
+
+bool xzstreambuf::is_open() const
+{
+    return file_ != nullptr && !::ferror(file_);
+}
+
+uint64_t xzstreambuf::bytes_read() const
+{
+    return bytes_read_;
+}
+
+xzifstream::xzifstream(std::string name)
+    : std::istream{&buffer_}, buffer_{name.c_str(), "rb"}
+{
+    clear();
+}
+
+xzstreambuf* xzifstream::rdbuf() const
+{
+    return const_cast<xzstreambuf*>(&buffer_);
+}
+
+void xzifstream::flush()
+{
+    buffer_.sync();
+}
+
+uint64_t xzifstream::bytes_read() const
+{
+    return buffer_.bytes_read();
+}
+
+xzofstream::xzofstream(std::string name)
+    : std::ostream{&buffer_}, buffer_{name.c_str(), "wb"}
+{
+    clear();
+}
+
+xzstreambuf* xzofstream::rdbuf() const
+{
+    return const_cast<xzstreambuf*>(&buffer_);
+}
+
+void xzofstream::flush()
+{
+    buffer_.sync();
+}
+}
+}
diff --git a/tests/ir_eval_test.cpp b/tests/ir_eval_test.cpp
index 2de9f4bb8..339383807 100644
--- a/tests/ir_eval_test.cpp
+++ b/tests/ir_eval_test.cpp
@@ -108,7 +108,6 @@ go_bandit([]() {
                         1.0 / idcg);
             check_query(eval, results, qid, 0.1 / 0.6, 0.5, 0.1, 0.2,
                         1.0 / idcg_5, 5);
-
             results.emplace_back(doc_id{1}, 0.8); // relevant
             check_query(eval, results, qid,
                         (2.0 * (2.0 / 3.0) * 0.2) / (2.0 / 3.0 + 0.2),
@@ -126,7 +125,7 @@ go_bandit([]() {
             results.emplace_back(doc_id{38}, 0.2);  // relevant
             results.emplace_back(doc_id{754}, 0.1); // relevant
             auto avg_p_5
-                = (1.0 + 2.0 / 3.0 + 3.0 / 4.0 + 4.0 / 5.0 + 5.0 / 6.0) / 5.0;
+                = (1.0 + 2.0 / 3.0 + 3.0 / 4.0 + 4.0 / 5.0) / 5.0;
             auto avg_p = (1.0 + 2.0 / 3.0 + 3.0 / 4.0 + 4.0 / 5.0 + 5.0 / 6.0
                           + 6.0 / 7.0 + 7.0 / 8.0 + 8.0 / 9.0 + 9.0 / 10.0
                           + 10.0 / 11.0)