-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathHelper.fs
57 lines (44 loc) · 1.7 KB
/
Helper.fs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
namespace NaiveBayes
open NaiveBayes.Ingestor
open NaiveBayes.Classifier
module Helper =
let docTypePercentages (docTypes:SMSType[]) data =
let allLen = data |> Array.length
let dtLen dt =
data
|> Array.filter( fun (l, _) -> l = dt)
|> Array.length
docTypes
|> Array.map( fun dt ->
let len = dtLen dt
let proportion = System.Math.Round ((proportion len allLen) * 100.)
(dt, proportion)
)
let vocabulary (tokenizer:Tokenizer) (msg:string seq) =
msg
|> Seq.map tokenizer
|> Set.unionMany
let allTokens (tokenizer:Tokenizer) doc =
doc
|> Seq.map snd
|> vocabulary tokenizer
let evaluate (tokenizer:Tokenizer) trainingData classificationData validationData =
let classifier = train trainingData tokenizer classificationData
validationData
|> Seq.averageBy (fun (docType, msg) ->
if docType = classifier msg then 1.0 else 0.)
|> printfn "Correctly classified: %.3f"
let top n (tokenizer:Tokenizer) (docs:string []) =
let tokenized = docs |> Array.map tokenizer
let tokens = tokenized |> Set.unionMany
tokens
|> Seq.sortBy (fun t -> - (countIn tokenized t) )
|> Seq.take n
|> Set.ofSeq
let rarest n (tokenizer:Tokenizer) (docs:string []) =
let tokenized = docs |> Array.map tokenizer
let tokens = tokenized |> Set.unionMany
tokens
|> Seq.sortBy (fun t -> countIn tokenized t)
|> Seq.take n
|> Set.ofSeq