-
Notifications
You must be signed in to change notification settings - Fork 38
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #108 from wush978/dev/95
resolve #95
- Loading branch information
Showing
17 changed files
with
340 additions
and
46 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,112 @@ | ||
// [[Rcpp::depends(jiebaR)]] | ||
// [[Rcpp::depends(FeatureHashing)]] | ||
|
||
#include "jiebaRAPI.h" | ||
#include <callback.h> | ||
#include <Rcpp.h> | ||
|
||
using namespace Rcpp; | ||
|
||
struct jiebaRCallbackFunctor : public CallbackFunctor { | ||
|
||
enum Type { | ||
MIX, | ||
MP, | ||
HMM, | ||
QUERY, | ||
KEY | ||
}; | ||
|
||
Type type; | ||
Environment cutter; | ||
SEXP cutter_pointer; | ||
|
||
typedef SEXP (*Cut)(SEXP, SEXP); | ||
|
||
Cut cut; | ||
|
||
void set_type(std::string _type) { | ||
if (_type.compare("mix") == 0) { | ||
type = MIX; | ||
} else if (_type.compare("mp") == 0) { | ||
type = MP; | ||
} else if (_type.compare("hmm") == 0) { | ||
type = HMM; | ||
} else if (_type.compare("query") == 0) { | ||
type = QUERY; | ||
} else if (_type.compare("key") == 0) { | ||
type = KEY; | ||
} else { | ||
throw std::invalid_argument("Unknown type"); | ||
} | ||
} | ||
|
||
std::string get_type() { | ||
switch (type) { | ||
case MIX: | ||
return "mix"; | ||
case MP: | ||
return "mp"; | ||
case HMM: | ||
return "hmm"; | ||
case QUERY: | ||
return "query"; | ||
case KEY: | ||
return "key"; | ||
} | ||
} | ||
|
||
void set_cut() { | ||
std::string fname("jiebaR_"); | ||
fname.append(get_type()); | ||
fname.append("_cut"); | ||
cut = reinterpret_cast<Cut>(::R_GetCCallable("jiebaR", fname.c_str())); | ||
} | ||
|
||
explicit jiebaRCallbackFunctor( | ||
SEXP _src, | ||
std::string _type, | ||
SEXP _cutter | ||
) | ||
: type(MIX), | ||
cutter(_cutter), | ||
cutter_pointer(NULL), | ||
cut(NULL), | ||
CallbackFunctor(_src) | ||
{ | ||
set_type(_type); | ||
set_cut(); | ||
cutter_pointer = wrap(cutter["worker"]); | ||
} | ||
|
||
virtual ~jiebaRCallbackFunctor() { } | ||
|
||
virtual const std::vector<std::string> operator()(const char* input) const { | ||
return as<std::vector<std::string> >((*cut)(wrap(input), cutter_pointer)); | ||
} | ||
|
||
}; | ||
|
||
RCPP_MODULE(jiebaR_callback) { | ||
|
||
class_<CallbackFunctor>("callback") | ||
; | ||
|
||
class_<jiebaRCallbackFunctor>("jiebaR_callback") | ||
.derives<CallbackFunctor>("callback") | ||
.constructor<SEXP, std::string, SEXP>() | ||
.property("type", &jiebaRCallbackFunctor::get_type, &jiebaRCallbackFunctor::set_type) | ||
.field("cutter", &jiebaRCallbackFunctor::cutter) | ||
; | ||
|
||
} | ||
|
||
/***R | ||
generate_jiebaR_callback <- function(input, type = "mix", ...) { | ||
worker <- jiebaR::worker(type = type, ...) | ||
callback <- new(jiebaR_callback, input, type, worker) | ||
callback | ||
} | ||
FeatureHashing::register_callback("jiebaR", generate_jiebaR_callback) | ||
*/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
% Generated by roxygen2 (4.1.1): do not edit by hand | ||
% Please edit documentation in R/callback.R | ||
\name{init_jiebaR_callback} | ||
\alias{init_jiebaR_callback} | ||
\title{Initialize and register jiebaR to the formula interface} | ||
\usage{ | ||
init_jiebaR_callback() | ||
} | ||
\description{ | ||
Initialize and register jiebaR to the formula interface | ||
} | ||
\details{ | ||
This function will register the callback of word segmentation | ||
function provided by jiebaR to the formula interface. | ||
For example, `~ jiebaR(...)` will use the feature of word segmentation | ||
provided by jiebaR to segment a given column of the data. | ||
The first argument of the jiebaR is a character which will be segmented. | ||
The left arguments are the same as \code{\link[jiebaR]{worker}}. These | ||
arguments will be used to initialize a jiebaR worker which will segment | ||
the input data. | ||
} | ||
\examples{ | ||
\dontrun{ | ||
library(FeatureHashing) | ||
init_jiebaR_callback() | ||
m <- hashed.model.matrix(~ jiebaR(title, type = "mix", df)) | ||
# the column `df$title` will be feed into `worker <- worker(type = "mix")` | ||
# the result of `worker <= df$title` will be hashed into the sparse matrix | ||
# the result is `m` | ||
} | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
% Generated by roxygen2 (4.1.1): do not edit by hand | ||
% Please edit documentation in R/callback.R | ||
\name{ls_special} | ||
\alias{ls_special} | ||
\title{List the Registered Specials} | ||
\usage{ | ||
ls_special() | ||
} | ||
\value{ | ||
character vector. The specials which could be used in the | ||
formula interface. | ||
} | ||
\description{ | ||
List the Registered Specials | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
#ifndef __HASH_FUNCTION_IMPLEMENTATION_HPP__ | ||
#define __HASH_FUNCTION_IMPLEMENTATION_HPP__ | ||
|
||
#include <hash_function.h> | ||
#include <Rcpp.h> | ||
|
||
class NullHashFunction : public HashFunction { | ||
|
||
public: | ||
|
||
virtual uint32_t operator()(const char* buf, int size, bool is_interaction = false); | ||
|
||
}; | ||
|
||
class MurmurHash3HashFunction : public HashFunction { | ||
|
||
uint32_t seed; | ||
|
||
public : | ||
|
||
MurmurHash3HashFunction(uint32_t _seed) : seed(_seed) { } | ||
|
||
virtual uint32_t operator()(const char* buf, int size, bool is_interaction = false); | ||
|
||
}; | ||
|
||
class MurmurHash3LogHashFunction : public HashFunction { | ||
|
||
uint32_t seed; | ||
Rcpp::Environment e; | ||
std::map<uint32_t, std::string> inverse_mapping; | ||
|
||
public: | ||
|
||
MurmurHash3LogHashFunction(SEXP _e, uint32_t _seed) | ||
: HashFunction(), seed(_seed), e(_e) | ||
{ } | ||
|
||
virtual uint32_t operator()(const char* buf, int size, bool is_interaction = false); | ||
|
||
}; | ||
|
||
# endif // __HASH_FUNCTION_IMPLEMENTATION_HPP__ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.