Skip to content

Commit

Permalink
update project
Browse files Browse the repository at this point in the history
  • Loading branch information
moh4med committed Dec 29, 2016
1 parent a591ab1 commit ac06627
Show file tree
Hide file tree
Showing 8,294 changed files with 593,587 additions and 0 deletions.
The diff you're trying to view is too large. We only load the first 3000 changed files.
11 changes: 11 additions & 0 deletions .idea/filtering.iml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 7 additions & 0 deletions .idea/inspectionProfiles/profiles_settings.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions .idea/misc.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions .idea/modules.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

533 changes: 533 additions & 0 deletions .idea/workspace.xml

Large diffs are not rendered by default.

85 changes: 85 additions & 0 deletions Classifier.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
import math
class classifier:
def __init__(self,getfeatures,filename=None):
self.fc={}
self.cc={}
self.getfeatures=getfeatures
self.thresholds = {}
def incf(self, f, cat):
self.fc.setdefault(f, {})
self.fc[f].setdefault(cat, 0)
self.fc[f][cat] += 1

def incc(self, cat):
self.cc.setdefault(cat, 0)
self.cc[cat] += 1

def setthreshold(self, cat, t):
self.thresholds[cat] = t
def getthreshold(self, cat):
if cat not in self.thresholds: return 1.0
return self.thresholds[cat]
def fcount(self, f, cat):
if f in self.fc and cat in self.fc[f]:
return float(self.fc[f][cat])

return 0.0
def totalfeature(self, f):
totals = sum([self.fcount(f, c) for c in self.categories()])
return totals
def catcount(self, cat):
if cat in self.cc:
return float(self.cc[cat])

return 0

def totalcount(self):
return sum(self.cc.values())

def categories(self):
return self.cc.keys()

def train(self, item, cat):
features = self.getfeatures(item)
for f in features:
self.incf(f, cat)
self.incc(cat)

def fprob(self, f, cat):
if self.catcount(cat) == 0:
return 0
return self.fcount(f, cat) / self.catcount(cat)

def weightedprob(self, f, cat, prf, weight=3.0, ap=0.5):

basicprob = prf(f, cat)
totals = sum([self.fcount(f, c) for c in self.categories()])
bp = ((weight * ap) + (totals * basicprob)) / (weight + totals)
return bp
def acceptword(self,word,cat):
return 1
x=self.weightedprob(word,cat,self.fprob)
totals = self.totalfeature(word)
if x>.01:
return 1
else:
return 0
def classify(self, item, default=None):
probs = {}
max = float('-inf')
best=default
for cat in self.categories():
probs[cat] = self.prob(item, cat)
if probs[cat] > max:
max = probs[cat]
best = cat
# Make sure the probability exceeds threshold*next best
if max==0:
return default
# return self.handlezero(item,default)
for cat in probs:
if cat == best:
continue
if probs[cat] +math.log10(self.getthreshold(best)) > probs[best]:
return default
return best
Binary file added Classifier.pyc
Binary file not shown.
46 changes: 46 additions & 0 deletions Naivebayes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
from Classifier import *
class naivebayes(classifier):
def docprob(self,item,cat):
features=self.getfeatures(item)
p=0
l=0
acat='ham'
if cat=='ham':
acat='spam'
for f in features:
if self.acceptword(f,cat):
p+=math.log10(self.weightedprob(f,cat,self.fprob))
#if self.acceptword(f,acat):
# l *= self.weightedprob(f, acat, self.fprob)
# if p+l==0:
# return p;
return p#/(p+l)

def prob(self, item, cat):
catprob = self.catcount(cat) / self.totalcount()
docprob = self.docprob(item, cat)
return docprob +math.log10(catprob)

def handlezero(self, item,default):
features = self.getfeatures(item)
p = 1.0
l = 1.0
acat = 'ham'
cat='spam'
catprob = self.catcount(cat) / self.totalcount()
acatprob = self.catcount(acat) / self.totalcount()
length=len(features)/2
for f in item:
if length<=0:
break
if self.acceptword(f, cat):
p *= self.weightedprob(f, cat, self.fprob)
if self.acceptword(f, acat):
l *= self.weightedprob(f, acat, self.fprob)
length-=1
if p==0 or l==0:
return default
if p*catprob>l*acatprob:
return cat
else:
return acat
Binary file added Naivebayes.pyc
Binary file not shown.
Binary file added SPAM FILTER.docx
Binary file not shown.
Loading

0 comments on commit ac06627

Please sign in to comment.