Skip to content
This repository has been archived by the owner on Apr 10, 2019. It is now read-only.

Commit

Permalink
implements parallel process
Browse files Browse the repository at this point in the history
  • Loading branch information
icoxfog417 committed Apr 7, 2017
1 parent 79251e8 commit 751e975
Show file tree
Hide file tree
Showing 6 changed files with 46 additions and 22 deletions.
Binary file added docs/top.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
32 changes: 17 additions & 15 deletions elephant_sense/evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@
from sklearn.externals import joblib
from scripts.features.post import Post
from scripts.features.post_feature import PostFeature
import scripts.features.length_extractor as fext
import scripts.features.length_extractor as lext
import scripts.features.charactor_extractor as cext
import scripts.features.structure_extractor as sext


class Evaluator():
Expand Down Expand Up @@ -32,21 +34,21 @@ def evaluate(self, post_dict):
def get_features(self, post_dict):
post = Post(post_dict)
pf = PostFeature(post)
si = fext.SentenceInfo(post.body)
si.analyse()
cleaned_rendered_body = cext.RenderedBodyPreprocessor().clean_rendered_body(post.rendered_body)

pf.add(fext.TitleLengthExtractor())
pf.add(fext.SectionCountExtractor())
pf.add(fext.SentenceMeanLengthExtractor(si))
pf.add(fext.SentenceMinLengthExtractor(si))
pf.add(fext.SentenceMaxLengthExtractor(si))
pf.add(fext.KanjiRatioExtractor(si))
pf.add(fext.HiraganaRatioExtractor(si))
pf.add(fext.KatakanaRatioExtractor(si))
pf.add(fext.NumberRatioExtractor(si))
pf.add(fext.Header1MeanLengthExtractor())
pf.add(fext.Header2MeanLengthExtractor())

pf.add(lext.TitleLengthExtractor())
pf.add(lext.SectionCountExtractor())
pf.add(cext.KanjiRatioExtractor(cleaned_rendered_body))
pf.add(cext.HiraganaRatioExtractor(cleaned_rendered_body))
pf.add(cext.KatakanaRatioExtractor(cleaned_rendered_body))
pf.add(cext.NumberRatioExtractor(cleaned_rendered_body))
pf.add(cext.PunctuationRatioExtractor(cleaned_rendered_body))
pf.add(lext.SentenceMeanLengthExtractor(cleaned_rendered_body))
pf.add(lext.SentenceMeanLengthExtractor(cleaned_rendered_body))
pf.add(lext.SentenceMaxLengthExtractor(cleaned_rendered_body))
pf.add(sext.ImageCountExtractor())
pf.add(sext.ImageRatioExtractor(cleaned_rendered_body))

pf_d = pf.to_dict(drop_disused_feature=True)
f_vector = []
for f in self.features:
Expand Down
23 changes: 20 additions & 3 deletions elephant_sense/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import tornado.escape
import tornado.web
from bs4 import BeautifulSoup
from sklearn.externals.joblib import Parallel, delayed
from elephant_sense.evaluator import Evaluator
from elephant_sense.qiita_api import search_posts

Expand Down Expand Up @@ -49,9 +50,16 @@ def post(self):
message["posts"] = [self.trim(p) for p in posts]
self.write(message)
else:
posts = search_posts(query, n=50)
posts = self.scoring(posts)
message["posts"] = [self.trim(p) for p in posts]
posts = search_posts(query, n=200)
process = 4
batch_size = len(posts) / process
tasks = [(int(i * batch_size), int(i * batch_size + batch_size)) for i in range(process)]
dones = Parallel(n_jobs=process)(delayed(parallel_scoring)(self.evaluator, posts[t[0]:t[1]]) for t in tasks)
posts = []
for scoreds in dones:
posts += [self.trim(s) for s in scoreds]
posts = sorted(posts, key=lambda p: p["score"], reverse=True)
message["posts"] = posts
self.write(message)

@classmethod
Expand All @@ -78,3 +86,12 @@ def scoring(self, posts):
def write_json(self, message):
serialized = json.dumps(message, ensure_ascii=False)
self.write(serialized)


def parallel_scoring(evaluator, posts):
scored = []
for p in posts:
score = evaluator.evaluate(p)
p["score"] = score
scored.append(p) # sort after merge
return scored
3 changes: 3 additions & 0 deletions elephant_sense/static/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ var app = new Vue({
data: {
debug: false,
query: "",
working: false,
results: []
},
methods:{
Expand All @@ -14,13 +15,15 @@ var app = new Vue({
}
message["debug"] = this.debug;
var self = this;
self.working = true;
self.$http({
method: "POST",
url:"/e/search",
data: message,
xsrfCookieName: "_xsrf",
xsrfHeaderName: "X-XSRFToken"
}).then(function(response) {
self.working = false;
self.results = response.data.posts;
}).catch(function(error){
console.log(error);
Expand Down
8 changes: 5 additions & 3 deletions elephant_sense/templates/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,17 @@
<img src="{{ static_url("encircled_elephant.PNG") }}" >
</figure>
<p class="title">
Elaphant Sense
Elephant Sense
</p>
<hr>
<div class="field has-addons region" style="max-width:800px; margin:auto">
<p class="control is-expanded">
<input v-model="query" v-on:keyup.enter="search" class="input" type="text" placeholder="Please input the key words">
<input
class="input" type="text" placeholder="Please input the key words"
v-model="query" v-on:keyup.enter="search" v-bind:disabled="working">
</p>
<p class="control" style="width:30px;">
<button id="search" class="button" v-on:click="search">
<button id="search" class="button" v-on:click="search" v-bind:disabled="working">
<span class="icon">
<i class="fa fa-search"></i>
</span>
Expand Down
2 changes: 1 addition & 1 deletion scripts/features/charactor_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def __init__(self, regex_text, text):
def character_ratio(self):
pattern = re.compile(self.regex)
count = len(re.findall(pattern, self.text))
ratio = count / len(self.text)
ratio = 0 if len(self.text) == 0 else count / len(self.text)
return ratio


Expand Down

0 comments on commit 751e975

Please sign in to comment.