Skip to content

Commit

Permalink
added support for binary io to streams with compression if desired
Browse files Browse the repository at this point in the history
  • Loading branch information
rmsalinas committed Oct 16, 2016
1 parent 70bd510 commit 46fbe8b
Show file tree
Hide file tree
Showing 13 changed files with 1,274 additions and 70 deletions.
3 changes: 2 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,8 @@ ENDIF()
#------------------------------------------------
ADD_SUBDIRECTORY(src)
IF (BUILD_UTILS)
ADD_SUBDIRECTORY(utils)
ADD_SUBDIRECTORY(utils)
ADD_SUBDIRECTORY(tests)
ENDIF()


Expand Down
2 changes: 1 addition & 1 deletion src/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
INCLUDE_DIRECTORIES(. ./utils/)

FILE(GLOB hdrs_base "*.h" )
FILE(GLOB srcs_base "*.cpp")
FILE(GLOB srcs_base "*.c*")

FILE(GLOB hdrs ${hdrs_base} )
FILE(GLOB srcs ${srcs_base} )
Expand Down
19 changes: 19 additions & 0 deletions src/DescManip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,25 @@ void DescManip::toMat32F(const std::vector<cv::Mat> &descriptors,
}
}

void DescManip::toStream(const cv::Mat &m,std::ostream &str){
assert(m.rows==1 || m.isContinuous());
int type=m.type();
int cols=m.cols;
int rows=m.rows;
str.write((char*)&cols,sizeof(cols));
str.write((char*)&rows,sizeof(rows));
str.write((char*)&type,sizeof(type));
str.write((char*)m.ptr<char>(0),m.elemSize()*m.cols);
}

void DescManip::fromStream(cv::Mat &m,std::istream &str){
int type,cols,rows;
str.read((char*)&cols,sizeof(cols));
str.read((char*)&rows,sizeof(rows));
str.read((char*)&type,sizeof(type));
m.create(rows,cols,type);
str.read((char*)m.ptr<char>(0),m.elemSize()*m.cols);
}


// --------------------------------------------------------------------------
Expand Down
3 changes: 3 additions & 0 deletions src/DescManip.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,9 @@ class DBOW_API DescManip
static void toMat32F(const std::vector<cv::Mat> &descriptors,
cv::Mat &mat);

/**io routines*/
static void toStream(const cv::Mat &m,std::ostream &str);
static void fromStream(cv::Mat &m,std::istream &str);
private:
/**Returns the number of bytes of the descriptor
* used for binary descriptors only*/
Expand Down
225 changes: 187 additions & 38 deletions src/Vocabulary.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
#include "Vocabulary.h"
#include "DescManip.h"
#include "quicklz.h"
#include <sstream>
namespace DBoW3{
// --------------------------------------------------------------------------

Expand Down Expand Up @@ -131,7 +133,7 @@ void Vocabulary::create(
const std::vector< cv::Mat > &training_features)
{
std::vector<std::vector<cv::Mat> > vtf(training_features.size());
for(int i=0;i<training_features.size();i++){
for(size_t i=0;i<training_features.size();i++){
vtf[i].resize(training_features[i].rows);
for(int r=0;r<training_features[i].rows;r++)
vtf[i][r]=training_features[i].rowRange(r,r+1);
Expand Down Expand Up @@ -898,58 +900,47 @@ int Vocabulary::stopWords(double minWeight)
// --------------------------------------------------------------------------


void Vocabulary::save(const std::string &filename) const
void Vocabulary::save(const std::string &filename, bool binary_compressed) const
{
cv::FileStorage fs(filename.c_str(), cv::FileStorage::WRITE);
if(!fs.isOpened()) throw std::string("Could not open file ") + filename;

save(fs);
if ( filename.find(".yml")==std::string::npos){
std::ofstream file_out(filename);
if (!file_out) throw std::runtime_error("Vocabulary::saveBinary Could not open file :"+filename+" for writing");
toStream(file_out,binary_compressed);
}
else{
cv::FileStorage fs(filename.c_str(), cv::FileStorage::WRITE);
if(!fs.isOpened()) throw std::string("Could not open file ") + filename;
save(fs);
}
}

// --------------------------------------------------------------------------


void Vocabulary::load(const std::string &filename)
{
cv::FileStorage fs(filename.c_str(), cv::FileStorage::READ);
if(!fs.isOpened()) throw std::string("Could not open file ") + filename;

this->load(fs);
}
//check first if it is a binary file
std::ifstream ifile(filename);
if (!ifile) throw std::runtime_error("Vocabulary::load Could not open file :"+filename+" for reading");
uint64_t sig;//magic number describing the file
ifile.read((char*)&sig,sizeof(sig));
if (sig==88877711233) {//it is a binary file. read from it
ifile.seekg(0,std::ios::beg);
fromStream(ifile);

// --------------------------------------------------------------------------
}
else{
cv::FileStorage fs(filename.c_str(), cv::FileStorage::READ);
if(!fs.isOpened()) throw std::string("Could not open file ") + filename;

this->load(fs);
}
}

void Vocabulary::save(cv::FileStorage &f,
const std::string &name) const
{
// Format YAML:
// vocabulary
// {
// k:
// L:
// scoringType:
// weightingType:
// nodes
// [
// {
// nodeId:
// parentId:
// weight:
// descriptor:
// }
// ]
// words
// [
// {
// wordId:
// nodeId:
// }
// ]
// }
//
// The root node (index 0) is not included in the node vector
//

f << name << "{";

Expand All @@ -976,6 +967,7 @@ void Vocabulary::save(cv::FileStorage &f,
for(pit = children.begin(); pit != children.end(); pit++)
{
const Node& child = m_nodes[*pit];
std::cout<<m_nodes[*pit].id<<" ";

// save node data
f << "{:";
Expand All @@ -992,6 +984,7 @@ void Vocabulary::save(cv::FileStorage &f,
}
}
}
std::cout<<"\n";

f << "]"; // nodes

Expand All @@ -1013,9 +1006,165 @@ void Vocabulary::save(cv::FileStorage &f,

}

void Vocabulary::toStream( std::ostream &out_str, bool compressed) const throw(std::exception){

uint64_t sig=88877711233;//magic number describing the file
out_str.write((char*)&sig,sizeof(sig));
out_str.write((char*)&compressed,sizeof(compressed));

//save everything to a stream
std::stringstream aux_stream;
aux_stream.write((char*)&m_k,sizeof(m_k));
aux_stream.write((char*)&m_L,sizeof(m_L));
aux_stream.write((char*)&m_scoring,sizeof(m_scoring));
aux_stream.write((char*)&m_weighting,sizeof(m_weighting));

//count the number of nodes
int nnodes=m_nodes.size()-1;
aux_stream.write((char*)&nnodes,sizeof(nnodes));
//nodes
std::vector<NodeId> parents={0};// root


while(!parents.empty())
{
NodeId pid = parents.back();
parents.pop_back();

const Node& parent = m_nodes[pid];

for(auto pit :parent.children)
{

const Node& child = m_nodes[pit];
aux_stream.write((char*)&child.id,sizeof(child.id));
aux_stream.write((char*)&pid,sizeof(pid));
aux_stream.write((char*)&child.weight,sizeof(child.weight));
DescManip::toStream(child.descriptor,aux_stream);
// add to parent list
if(!child.isLeaf()) parents.push_back(pit);
}
}
//words
//save size
uint32_t m_words_size=m_words.size();
aux_stream.write((char*)&m_words_size,sizeof(m_words_size));
for(auto wit = m_words.begin(); wit != m_words.end(); wit++)
{
WordId id = wit - m_words.begin();
aux_stream.write((char*)&id,sizeof(id));
aux_stream.write((char*)&(*wit)->id,sizeof((*wit)->id));
}


//now, decide if compress or not
if (compressed){
qlz_state_compress state_compress;
memset(&state_compress, 0, sizeof(qlz_state_compress));
//Create output buffer
int chunkSize=10000;
std::vector<char> compressed( chunkSize+size_t(400), 0);
std::vector<char> input( chunkSize, 0);
auto total_size= aux_stream.tellp();
uint64_t total_compress_size=0;
//calculate how many chunks will be written
uint32_t nChunks= total_size/chunkSize;
if ( total_size%chunkSize!=0) nChunks++;
out_str.write((char*)&nChunks, sizeof(nChunks));
//start compressing the chunks
while(total_size!=0){
int readSize=chunkSize;
if (total_size<chunkSize) readSize=total_size;
aux_stream.read(&input[0],readSize);
uint64_t compressed_size = qlz_compress(&input[0], &compressed[0], readSize, &state_compress);
total_size-=readSize;
out_str.write(&compressed[0], compressed_size);
total_compress_size+=compressed_size;
}
}
else{
out_str<<aux_stream.rdbuf();
}
}



void Vocabulary::fromStream( std::istream &str ) throw(std::exception){


m_words.clear();
m_nodes.clear();
uint64_t sig=0;//magic number describing the file
str.read((char*)&sig,sizeof(sig));
if (sig!=88877711233) throw std::runtime_error("Vocabulary::fromStream is not of appropriate type");
bool compressed;
str.read((char*)&compressed,sizeof(compressed));

std::stringstream decompressed_stream;
std::istream *_used_str=0;
if (compressed){
qlz_state_decompress state_decompress;
memset(&state_decompress, 0, sizeof(qlz_state_decompress));
int chunkSize=10000;
std::vector<char> decompressed(chunkSize);
std::vector<char> input(chunkSize+400);
//read how many chunks are there
uint32_t nChunks;
str.read((char*)&nChunks,sizeof(nChunks));
for(int i=0;i<nChunks;i++){
str.read(&input[0],9);
int c=qlz_size_compressed(&input[0]);
str.read(&input[9],c-9);
size_t d=qlz_decompress(&input[0], &decompressed[0], &state_decompress);
decompressed_stream.write(&decompressed[0],d);
}
_used_str=&decompressed_stream;
}
else{
_used_str=&str;
}

_used_str->read((char*)&m_k,sizeof(m_k));
_used_str->read((char*)&m_L,sizeof(m_L));
_used_str->read((char*)&m_scoring,sizeof(m_scoring));
_used_str->read((char*)&m_weighting,sizeof(m_weighting));

createScoringObject();
int nnodes;
_used_str->read((char*)&nnodes,sizeof(nnodes));
m_nodes.resize(nnodes + 1); // +1 to include root
m_nodes[0].id = 0;



for(size_t i = 1; i < m_nodes.size(); ++i)
{
NodeId nid;
_used_str->read((char*)&nid,sizeof(NodeId));
Node& child = m_nodes[nid];
child.id=nid;
_used_str->read((char*)&child.parent,sizeof(child.parent));
_used_str->read((char*)&child.weight,sizeof(child.weight));
DescManip::fromStream(child.descriptor,*_used_str);
m_nodes[child.parent].children.push_back(child.id);
}
// // words
uint32_t m_words_size;
_used_str->read((char*)&m_words_size,sizeof(m_words_size));
m_words.resize(m_words_size);
for(unsigned int i = 0; i < m_words.size(); ++i)
{
WordId wid;NodeId nid;
_used_str->read((char*)&wid,sizeof(wid));
_used_str->read((char*)&nid,sizeof(nid));
m_nodes[nid].word_id = wid;
m_words[wid] = &m_nodes[nid];
}
}
// --------------------------------------------------------------------------



void Vocabulary::load(const cv::FileStorage &fs,
const std::string &name)
{
Expand Down
15 changes: 10 additions & 5 deletions src/Vocabulary.h
Original file line number Diff line number Diff line change
Expand Up @@ -235,13 +235,13 @@ class DBOW_API Vocabulary
void setScoringType(ScoringType type);

/**
* Saves the vocabulary into a file
* Saves the vocabulary into a file. If filename extension contains .yml, opencv YALM format is used. Otherwise, binary format is employed
* @param filename
*/
void save(const std::string &filename) const;
void save(const std::string &filename, bool binary_compressed=true) const;

/**
* Loads the vocabulary from a file
* Loads the vocabulary from a file created with save
* @param filename
*/
void load(const std::string &filename);
Expand All @@ -261,7 +261,7 @@ class DBOW_API Vocabulary
*/
virtual void load(const cv::FileStorage &fs,
const std::string &name = "vocabulary");

/**
* Stops those words whose weight is below minWeight.
* Words are stopped by setting their weight to 0. There are not returned
Expand All @@ -276,6 +276,11 @@ class DBOW_API Vocabulary
*/
virtual int stopWords(double minWeight);


//io to-from a stream
void toStream( std::ostream &str, bool compressed=true) const throw(std::exception);
void fromStream( std::istream &str ) throw(std::exception);

protected:

/// reference to descriptor
Expand Down
Loading

0 comments on commit 46fbe8b

Please sign in to comment.