From f39cb1f1276f84ca5ebe01c761cc1df454938cb1 Mon Sep 17 00:00:00 2001 From: Akash Bahai Date: Fri, 24 Apr 2020 09:48:06 +0200 Subject: [PATCH] Added files for protvec representation --- alignment/__pycache__/__init__.cpython-35.pyc | Bin 0 -> 107 bytes alignment/__pycache__/__init__.cpython-36.pyc | Bin 0 -> 141 bytes .../__pycache__/pairwise_align.cpython-35.pyc | Bin 0 -> 7170 bytes .../__pycache__/pairwise_align.cpython-36.pyc | Bin 0 -> 6671 bytes alignment/pairwise_align.py | 229 ++++++++++++++++++ .../__pycache__/__init__.cpython-34.pyc | Bin 0 -> 108 bytes .../__pycache__/__init__.cpython-35.pyc | Bin 0 -> 108 bytes .../__pycache__/__init__.cpython-36.pyc | Bin 0 -> 142 bytes .../classical_classifiers.cpython-34.pyc | Bin 0 -> 9922 bytes .../classical_classifiers.cpython-35.pyc | Bin 0 -> 9860 bytes .../classical_classifiers.cpython-36.pyc | Bin 0 -> 8944 bytes .../cross_validation.cpython-34.pyc | Bin 0 -> 9161 bytes .../cross_validation.cpython-36.pyc | Bin 0 -> 8938 bytes classifier/cross_validation.pyc | Bin 0 -> 9407 bytes ...sequencelist_representation.cpython-35.pyc | Bin 0 -> 5237 bytes ...sequencelist_representation.cpython-36.pyc | Bin 0 -> 4923 bytes .../sequencelist_representation.py | 121 +++++++++ utility/__pycache__/__init__.cpython-34.pyc | Bin 0 -> 105 bytes utility/__pycache__/__init__.cpython-35.pyc | Bin 0 -> 105 bytes utility/__pycache__/__init__.cpython-36.pyc | Bin 0 -> 139 bytes utility/__pycache__/__init__.cpython-37.pyc | Bin 0 -> 143 bytes .../__pycache__/file_utility.cpython-34.pyc | Bin 0 -> 9476 bytes .../__pycache__/file_utility.cpython-35.pyc | Bin 0 -> 9423 bytes .../__pycache__/file_utility.cpython-36.pyc | Bin 0 -> 8702 bytes .../__pycache__/file_utility.cpython-37.pyc | Bin 0 -> 8681 bytes utility/featurizer.py | 19 ++ utility/visualization_utility.py | 130 ++++++++++ 27 files changed, 499 insertions(+) create mode 100644 alignment/__pycache__/__init__.cpython-35.pyc create mode 100644 alignment/__pycache__/__init__.cpython-36.pyc create mode 100644 alignment/__pycache__/pairwise_align.cpython-35.pyc create mode 100644 alignment/__pycache__/pairwise_align.cpython-36.pyc create mode 100644 alignment/pairwise_align.py create mode 100644 classifier/__pycache__/__init__.cpython-34.pyc create mode 100644 classifier/__pycache__/__init__.cpython-35.pyc create mode 100644 classifier/__pycache__/__init__.cpython-36.pyc create mode 100644 classifier/__pycache__/classical_classifiers.cpython-34.pyc create mode 100644 classifier/__pycache__/classical_classifiers.cpython-35.pyc create mode 100644 classifier/__pycache__/classical_classifiers.cpython-36.pyc create mode 100644 classifier/__pycache__/cross_validation.cpython-34.pyc create mode 100644 classifier/__pycache__/cross_validation.cpython-36.pyc create mode 100644 classifier/cross_validation.pyc create mode 100644 make_representations/__pycache__/sequencelist_representation.cpython-35.pyc create mode 100644 make_representations/__pycache__/sequencelist_representation.cpython-36.pyc create mode 100644 make_representations/sequencelist_representation.py create mode 100644 utility/__pycache__/__init__.cpython-34.pyc create mode 100644 utility/__pycache__/__init__.cpython-35.pyc create mode 100644 utility/__pycache__/__init__.cpython-36.pyc create mode 100644 utility/__pycache__/__init__.cpython-37.pyc create mode 100644 utility/__pycache__/file_utility.cpython-34.pyc create mode 100644 utility/__pycache__/file_utility.cpython-35.pyc create mode 100644 utility/__pycache__/file_utility.cpython-36.pyc create mode 100644 utility/__pycache__/file_utility.cpython-37.pyc create mode 100644 utility/featurizer.py create mode 100644 utility/visualization_utility.py diff --git a/alignment/__pycache__/__init__.cpython-35.pyc b/alignment/__pycache__/__init__.cpython-35.pyc new file mode 100644 index 0000000000000000000000000000000000000000..afe6eaa35655eb477475600b959a096ff9acdd84 GIT binary patch literal 107 zcmWgR<>fkg=x`JR5IhDEFu(|8H~?`m3y?@*UP3)AMpu i^GfvN<1_OzOXB183My}L*yQG?l;)(`fpioDF#`bXFcsGT literal 0 HcmV?d00001 diff --git a/alignment/__pycache__/__init__.cpython-36.pyc b/alignment/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9780b98e2f087ca7b81f7007ebb832fc8f9e8c72 GIT binary patch literal 141 zcmXr!<>l(q-yY2X1dl-k3@`#24nSPY0whuxf*CX!{Z=v*frJsnFDw0w{M=Oi#H7TG z#7zCvg3OZqf>hmtqSTbk7&-6`t88mm>9R`G=ztnpr_snv^}?no_c6b>7hTD9`?{4awy!MdNm5P-+RMdE)^w>5tQN% zXXoe5n>X)$@4eBf@$t$>zx%N9hl@gdEOH+c^;>wNH59H84U|&2hGg)@ZTTVCL3{|vXf7^NBZK727oyfPJwZFL;If3W8 zeqi5@HXP+w9)WAI+%sopuC%C{ehgG@;fXpZJZuYlLTU6F1#b=B6Ii7>GwfPi$Hq#+ z9pO4QRu*oB>u4Mm?ikmxqjBL*a2*3Cg*(Odig1r%r_;&kYpb{0{zkCv1#znY9_T~A zByV~m;(Q-GNb#m2)H(645L=L#!D0^Xa-Ye4*VDd&?t7m5irn{l+Gpv$V`pXFL`dQwEcVUizu4Ux{1idO%ns2hnfb2X8^vhdEu;|q`h_NaCHO2_fl zLqGDG+%(hKdt6y;L#VBAyK|GcjfnrU=v~WM#7wj42Y%da&MdaWmeY=IqLNO^RTDW| zmq`JKJX<$Z34{{4rCZwTnx!)FRY8$o>(b|4OO0aYa=n~Pw-}h35zfGiaHy>~DR!Vn zaZ>PheY|u#9Z#t-;(-{NOQu?zUhB;jZ|5GJxF7E&me#mr6kBRF^-H~=CWu9vsOSij z5zKCqe((nBQuot7l95y{Q+SImpb(`oIVJm#QIREMQcjxBNYu=WM(@N?8*P!q=|h_v z$8!fyG=kO`nj^`O4-Ccu)P*=Oh0hw*M5D=Aug3r@fQyExY50K70X||FI#7#Yv)@fd zkFVMhG4PT>L&kt0*nT`hs=CF{a!?fX%JqFvu|yTf7yX=P^$|SDSO`! zr1%~&#pnj6h@qE`oSB=oyTt6J`&wGN#(b5<0oDSsvZ&Ff;cLdl)@WY@mk&bCrZw=zi+k7nE*VaET(j8Emq#bd+~ogpUE0m5sM zgF4asb%u-DB}afR(t1A(9JObM>$WxpsyXepO?Ga#GCa#>D=+S8=EH|-6lV|Tp3X~%mE-5EWs6+U$Cx8WW>xmY4A3{0&&Hj=hS^qfV3J+2)iNF%Q& zNC)r99q227uP&DX9VI_<{S80v0?`B0zotA z-aRB}#_d;95yMB!eQptF-lBKoNyPxk)Akqa-j}pQ!l1pElQ43aaFP_GP8;sDk||o0m`<>#$m6MFC=#=cNo9{s0J%b9B0NkATcIDYD3dbw z*X#s9j`}G3&c-+_tGVvCy=KRWHxtW8c-QtCB{VNNv6B>8-4nUouuyZo-K0P*NdZS4 zHLO;@jaWK-IFsOL22b=26y%J@jIx|Gju+083pOUrQ~Y~Tn({mzQ`Y2|IW14ik~}A` z6ioRv+A7?J@v?W}vo$<(-DKe=o;!G=c@&v}Q3c+GUGAIkx=&FGv4U7X8$o`85-#}Z zm3pD~L58gqf4{y79NG4eV7b5+1TY9q*M~z>Ixl-&h1=Y`?nJSJ_`B^5(Sd9+UE}`X zEbXY{wfuGe(*Q0b@%l^4brVPH#p+q?j*lNkLZK1|NF3B3K8zFSMevJQ28FQB%7QT^ zd*_BkdMGB#(2Ibb9v0}I0BjH!QY8{4Cxzr7@rCEeck~+#vHKzgOiOF?6fV&ul%_bL zr=wQGA<(l3!5?Zg;qG4$t3-i<7*If9r709r3N#!3UsKQvXs)=CC_Ds(sss-@`l6U< zX{{*1&F;U6aK8B;=@J~!q31La=U-k3i*v$@q>UJ{IMm`%cq{cY&(k#>tJJCp+y+}Z zOvn^fL}Xcix){qt@3M+;4WShRAOva~CPiq2kP8bkliBVOu52OpJKWxnU-E-VDv4O# z`B1DP9Hr}p?`GXt>E(IMBder2ZiTJ z@l`UW#JU?qo{AG|$!jA_Xp9h=y@IspupOMjB#wA&R79~RPnzR^o0{a(tjenKsQg4K zrCrx&Zetw$u{3xuKgsQ)c4!xaQQ0O4f+(3cY1&5`hG-u!U_b!i2HQBTK}a{JO>Fq@ z;eyB%?@6)yH-aNfLBbL)v;A{D0z@!1^wxk1EEPhm?4s#cscq?{YPdJ3f-KF#Fwn&M zwRe#!VU_+XC5stjJ2#k^!M9KUJenAeBc+g^N6nvWHLS6UeF7)n(HBjeE#jgn25d1v zgANG>>7pq(RVs>iB;QFP+p)xF4n;NyoG*#p>jX z-GbXc15j5IJ3o}GNP;m0!1)&QA(3aU)W>>%&I}=SwCmk~Q!3ksnK-fkz(c6H5e~fT z{caq?(IOGvg zZf$l0L;zU+`qe?I5oHv#VG!~$@I98#XPQi=NCr-p7!mNnGee8k&X5nq`9NhzYjEJ? ztFnuwN3UTh16mprkDvjGnGwxuoFfzA%-|zb(%Yk%ylL^K`hXO~xx_2W3e+vk{i3Wg zk)v6iJH+bm2dt_Z>OG@US+19I{vmS+>IL+3ypveC(^P(^?-tZ`>bQZTQRee~4e;NX z;woJuH^Xk+>Bfz6G~3kYhU#2a<)$X0=Eg`mAm#lw5vBmPkHf?A`zbo2ub~iDRUS7i z!z!LNP8wt8NqG`CPUFUyaZ*-{@j?af6NV+9!=Gi0%LzFxCyZh29wwcg04WMRjNQLL zg-npK0d@{wlVM#IAW;S2;2MeA$m9^=Z1D?BeXU8T01Jodzl~{y)cRq`6w`ntgB?L% z75{pm%WvF{%wsJAGhU0Xt@UhJKTrf;T67+(?#v6eZG8z3-Ce0yme~e+#r@0pdo1^R z#(pJ{m$R*r9Tf1a;EB$l=-a_m{}uvN2HLOfi6oKNeyvQ@$_K>C|5qYMw2>BH{e<%8 z+~|?&PZoPLhv(4K=a8>{v#en_aaWfy?tk(4F~;I0O@cX3;VM3Sn8@gmRFWyyt=!K4Ld#F15KzZ6P{=9cxN#b)ddV10 z3Jy$G)`It|i_GC)O{hoZ1ceVvM%R4h^P zG8KGsgMtB!j*g~Al-MF|yfAiSYz(D^zZ;5raqC=aHr=p=bFMqL$Y!(bdRN9Y1j5cjcD-xj#MWZ$*nkqkD#TF?L&B&%)jQkk z+3wlu8hdxtBSjh`Y{(&zkT{V$IU$7L#)%6YkdP3EK5)s&{0Tk)zgIQWGqc`&I1)4J z>guX`uU@@+@AqESx#?-|7wdog@!y^{j1P^GpN0BmJn4%lxKTAY<7SgpS;+RxJ&RjC z#&nxqEvY`I>Zo_QTK=xVZC<)-@KVpLy4<{ORGiKaysIJ%WXSy__OG{Ne>=JDZ*KY< z&tKfwym;;#{^r?@=e)~7Tka+zz36{0xE=1I6gPu3@}Kd)y_*Jc$axg|SJFlxBJTl2 zGqyX{+S+=Zs)L`2%4Iz1D=0#v${@cr4P`j#?FxtTb9xd6g zmwGI{3rcrL`7>jOsYSUMQ{t10#x?2WU#*fiRFt@iiK0~^Y=#%RfGt`qJ2fObmJ8V$2Qo(Y=kH%7(G&9$>Ri-d!3 zRXn`BzMJfa>%q-nH;C55RwR>F_-sprJgUnmiPvSA%5|(+8|!*2h{T;J4Qr}tt#$O! z+iF6|^<=+wX_dsF^*Bc7+=z*<)oM{3$y#k~tC`e;W_k&g0ntb`XB!ok+t~DtiYdxq zl(RkE(%jKPPmtsuiqW}NKJ8p$5|rDOiF~fEP^6Y(Pr?+NT$g#N1+$X59UeybXt!FS z5K|-r3ACBd)_24Dt?S{z8)N~IJjxwyf%znsRIlleiY;bHL?RTlrMV008rh3lK!xdk zT1V~<^fHUD^b884?6N7=e@vG-rpG+%6j~f>+3d`pw9q?@dkqU&XD zHnD`&?wNdgZ<2a52j5-;@VaHZ%MQF7#=$!`jH6{fLr9J_Jf$^MNV;4|`owUqQOURq z$yoXhB%2+MkW(3A80iAKo>p;SzVrJ6y>wJd0Wf{X3&fG1?D*P1sTMSwK6#&CFHonT z7NKm5I8`t+K1MSbWg0XSfaje^?)r_O<+s8(Xv!l6umwNNmD~w#HUZ*~PL?xI0m>K( zkqxHwET8~>&2$-pj?pvTz^)#c91vAu+wkRbl=7$%$-aXN7vqO`B*-rwPHZ*8I6Q2L zOWz`v#M?GH8>7bmsQ->&%YeG`#!E3AT!eM$?<4{q>hn!AYt2)jdi3Hbw(s1rOUb6<0Rnl`f;#fT3SB;v11ef@mUCCXdWI zw~(*p_FfXjN|pHp)>muA0Ex<^3SQM|cv5XAYKFB|Aa`>oLcY}ut7SA_3uKU&lzHcD zyXv6E!^7OBmfXe`r&XukZzGY8Z_X2tXbq1#F;FygO^12rjJ-fH(DbZX^q$^kNP}Yu}!v&8saC~FrIbJe7t2Bv6DR7!lPm*=qLXoLxsct=kFL z=|sXU;F{g}Wr3WO1HZltsMrs20^xuUBn?OpcgE90+9H2PB>Sp)CrD+09Jm>d@qs!) zu2FrseSX>s>(Ng1Q5aURc>O!u6$`r=O7RR9q;?#BLFtJ~N>pNfT>mNV)I(p!YC`V=2c|Q ztR-Z=^lU-LL|?R6TgJgNHw@5GMzMwrhUjcfM>Lt;rz4JPUe!W$KLOppGm~;0%1J<< zcdf&JWNf76&x#}FtmJ+s|+cK6*`=yGQv}+kVe``W$@p{Ur8o_R z%$;jt6Op^>65zaoqr-8#rL;rkA3iEZX^zcU9yN1r)T~mN&RU^|EBa70u0cBE;y_poLcUMB7^MBh!Am7N`3y#1ry~ef z3-!N;kt!q`c4v81#PMNcQW9_}Dd+l9a&jMX0~k3>p!B#Bt4-B=OkqXGmXBS$D8x^`Xa;43&wtKYBa?er+jMC2-?pxfC=*87o zw|uynl>lC=T^zu0q?lMVo;TOa3G`GA=0x(G7M8U+fpK8A$_H*4oT;OQ6@4`?Yk7(P~U-ns|JNM zh*mfUtB`{<8a?PnHhnb8K%Kuof&|H9eYYJWlw$f9&JNS^wBVp^mLN$X(nI!aVPxba zLq7Y_~M^ZGz;l)fhH1Lqssx101s0^~` zwNYzVi|Io|EJqF}MFuS{gZ8tEsEAJ%ar2mne;tY_mQe4Qp0`~oj{tRn%;Gupt3)Ap za3Lq6Rv(|m3)FD|MRh{$=c~Zs>a42LC1x#Y%T`-fC(@0Y-X~Nyi>hj>A;YY?g8>8C zuMp!GsUV%^Gvj*uHQLhiC@2+~F>TW>&6#tis{q|aV(6N4ND)2T1;9_4HammAjZ2d$ zHjDA&C_2u%&^#FiJqktdq5?02Uvq1(k2|=Au;EK4SD_s>>+p5L4|w6rU7O?LgDwiR z|7Inm2}mh2i%CCGx|(I2Y9KvYe8opK?%5^Y({*Oic3^nqvB;M)Q$}c1w&*AM4Hl(4hN<) z?ge)WrGx(kLA|)}EZ1s0sbgL%JxX(1pW6c5amG9^C+V7MFq1TQWV9dRz-Wq4(VjGA W?r!OIT%tSLl;FdrLl++Wt^Wbju{yT^ literal 0 HcmV?d00001 diff --git a/alignment/pairwise_align.py b/alignment/pairwise_align.py new file mode 100644 index 0000000..560b345 --- /dev/null +++ b/alignment/pairwise_align.py @@ -0,0 +1,229 @@ +#! /usr/bin/python + +# -*- coding: utf-8 -*- +""" +Created on Sun Nov 22 17:12:34 2015 +@authors: Kaveh Karbasi & Ehsaneddin Asgari +""" +import sys +sys.path.append('../') +import os.path +import timeit +import re + + +class PWAlignment: + def __init__(self, seq1, seq2, seq_type): + ''' + the class constructor gets: + (1) the first and (2)the second sequence and (3)the scoring matrix + ''' + # Characters score matrix , gap penalty , input characters + # this method sets the self.scores, self.characters,self.gap_pnlty of this class + if seq_type == 'dna': + self.create_scores_dict("../config/dna_dist.txt") + elif seq_type == 'rna': + self.create_scores_dict("../config/rna_dist.txt") + elif seq_type == 'protein': + self.create_scores_dict("../config/protein_dist.txt") + else: + print("Not a valid type") + exit() + # Sequences + self.seq1 = seq1.upper() + self.seq2 = seq2.upper() + + # check validity + if not (self.checkSeqValidity(self.seq1) or self.checkSeqValidity(self.seq2)): + print("Characters in the sequence does not match the scoring matrix") + exit() + + # Alignment score matrix + self.matrix = [[[0, 0] for i in self.seq1 + '1'] for j in self.seq2 + '1']; + # Calculate the alignmet score matrix + self.calc_matrix(); + + def create_scores_dict(self, mat_file_path): + ''' + Creates a dictionary of scores of all pair characters + returns the pair character score dictionary along with gap penalty and + allowable characters + ''' + # open the score matrix file + infile = open(mat_file_path) + matData = infile.read(); + # init the score matrix valuse + self.scores = {} + lines = matData.split("\n") + # detemining the characters in the scoring matrix + self.characters = lines[0].split(); + + # ******* Error handing + # check if the header does have any numerical character + if (any(i.isdigit() for i in lines[0])): + print("Incorrect format detected in the scoring matrix:\n ** no numerical character is allowed") + exit() + + N = len(self.characters); + # ******* Error handing + # check if the number of lines is consistent with the number of characters + if not len(lines) == N + 2: + print("Incorrect format detected in the scoring matrix :\n ** # of lines doesn't match the character set") + exit() + # setting the score matrix values + for lindex, line in enumerate(lines[1:-1]): + try: + vals = [int(x) for x in line.split()]; + except: + print("Incorrect format detected in the scoring matrix in line: " + str( + lindex + 2) + ":\n ** only numerical value is allowed") + exit() + # ******* Error handing + # detecting the inconsistency between # of char and the int in each row of matrix + if not (len(vals) == N): + print("Incorrect format detected in the scoring matrix in line: " + str(lindex + 2)) + exit() + for cindex, char in enumerate(self.characters): + self.scores["".join([self.characters[lindex], char])] = vals[cindex] + + # parsing the gap penalty + # ******* Error handing + try: + vals = [int(x) for x in lines[-1].split()]; + except: + print("Incorrect format detected in the scoring matrix: \n ** incorrect gap penalty: a single number ") + exit() + # ******* Error handing + # if more than one value is specified + if not (len(vals) == 1): + print("Incorrect format detected in the scoring matrix: \n ** incorrect gap penalty: a single number") + exit() + self.gap_pnlty = int(vals[0]) + + def checkSeqValidity(self, sequence): + ''' + This method checks if the sequence read from the fasta file + matches the characters specified in the scoring matrix + ''' + # compares if the characters in the sequence are are subset of chars in characters + if set(sequence) <= set(self.characters): + return True; + else: + print(set(sequence)) + return False; + + def calc_matrix(self): + ''' + Calculates the alignmet score matrix + dynamic programming + ''' + J = range(len(self.matrix[0])) + I = range(len(self.matrix)) + + # initialization of the first column and the first row + for i in I: + self.matrix[i][0][0] = self.gap_pnlty * i + self.matrix[i][0][1] = 2 + for j in J: + self.matrix[0][j][0] = self.gap_pnlty * j + self.matrix[0][j][1] = 1 + + # following the dynamic programming rules for pairwise alignment + for i in I[1:]: + for j in J[1:]: + # we need to calculate three options and calculate the optimum to assign to the current cell + Match = self.matrix[i - 1][j - 1][0] + self.scores["".join([self.seq1[j - 1], self.seq2[i - 1]])] + Insert = self.matrix[i][j - 1][0] + self.gap_pnlty + Delete = self.matrix[i - 1][j][0] + self.gap_pnlty + # 0 is diagonal, 1 is horizantal, 2 is vertical + pathList = [Match, Insert, Delete] + # assign the best value + self.matrix[i][j][0] = max(pathList) + # keep the pointer to the previous cell + self.matrix[i][j][1] = pathList.index(self.matrix[i][j][0]); + + def getScoreMatrix(self): + ''' + Alignmet score matrix getter + ''' + return self.matrix; + + def getAlignScore(self): + ''' + Alignmet score matrix getter + ''' + return self.matrix[-1][-1][0]; + + def printAlignment(self): + ''' + This function iteratively go from the buttom of the table + to the head to find the aligned sequences and finally + print the aligned sequences in chunks of 80 characters + ''' + J = len(self.matrix[0]) - 1 + I = len(self.matrix) - 1 + # aligned version of the first sequence + s1 = "" + # aligned version of the second sequence + s2 = "" + # in this loop we start from the final score and track the + # path to the starting point and incrementally build the aligned sequences + # in s1 and s2 + + while not (I == 0 and J == 0): + # find the previous cell + alignPath = self.matrix[I][J][1]; + # Previous cell is the diagonal cell + if alignPath == 0: + # no gap penalty + # update the aligned sequences + s1 = self.seq1[J - 1] + s1 + s2 = self.seq2[I - 1] + s2 + # update the pointer to the current cell + I = I - 1 + J = J - 1 + # Previous cell is on the left + elif alignPath == 1: + # update the aligned sequences + s1 = self.seq1[J - 1] + s1 + s2 = '-' + s2 + # update the pointer to the current cell + J = J - 1 + # previous cell is on the top of the current + else: + # update the aligned sequences + s1 = '-' + s1 + s2 = self.seq2[I - 1] + s2 + # update the pointer to the current cell + I = I - 1 + # now we need to print them in chunks of 80 + alignment_output = "" + s1_chunks = [s1[i:i + 80] for i in range(0, len(s1), 80)] + s2_chunks = [s2[i:i + 80] for i in range(0, len(s1), 80)] + chunk_num = 0; + # preparing the alignment output by adding pair of alignment in the length of 80 + for s1_chunk in s1_chunks: + alignment_output += s1_chunk + "\n" + alignment_output += s2_chunks[chunk_num] + "\n\n" + chunk_num = chunk_num + 1 + # We print everything once to be efficient in time + print(alignment_output.rstrip()) + + # Print the scoring matrix + # it prints every lines together to save time + def printScoreMatrix(self): + row_num = 0; + # first row + matrix_out = "|*|*|" + ('|'.join(self.seq1)) + "|\n" + # the second sequence in on the vertical header + ver_header = "*" + self.seq2; + for row in self.matrix: + # getting the scorse for the current row + s = [i[0] for i in row] + # adding the fist colums + s.insert(0, '|' + ver_header[row_num]) + row_num += 1 + # join othe columns + matrix_out += ('|'.join([str(i) for i in s])) + "|\n" + # print the scoring matrix + print(matrix_out) diff --git a/classifier/__pycache__/__init__.cpython-34.pyc b/classifier/__pycache__/__init__.cpython-34.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f48eddedcbf6ed60bdbe11b7a911249c866b3ea6 GIT binary patch literal 108 zcmaFI!^@@9qaDQn1dl-k3@`#24nSPY0whux7=kq!{Z=v*frJsnFG)Q;{p6g);^NG- j%+wfkg=x`JR5IhDEFu(|8H~?`m3y?@*Ut222({ literal 0 HcmV?d00001 diff --git a/classifier/__pycache__/__init__.cpython-36.pyc b/classifier/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..39c2ac1a2b775d5a84246bf7668f18f747748242 GIT binary patch literal 142 zcmXr!<>l(o-yY2X1dl-k3@`#24nSPY0whuxf*CX!{Z=v*frJsnFKhjb{M=Oi#H7TG z#7zCvg3OZqf>hmtqSTbkX6qc7x?{n8ab%lU`$cv!s=%RMS=6Q{(FD z&aE29(_~;_ykr-#E5RZm;AMG1o0rwT>=gXU2|` ztOScFyUyHu>(;G%@A>)8Ik*3{G*$T1AN_g#t+Pb`NF!GgfX==|}6?MmZmaqz}?D9Z|=37!15j2cKz}zLCaFEPQHG7tCzl9eX06V?@N}n z-Pm^2eaCb5tB$=_?x?a8zg6w`*m1n}PR9#-7k7GY)pmMkwzh7UFP695rc>Us{MMe; za>}<=r{M&F>$iHVJ7L%jUcPk6^Sag1@h=@QhikoLwR)+N(D^ObcPw@H%}t#ILCe&w zn;p;IRGlDr)AIE4ZROZb6TR%w_KKmG?^wRwY2WN9CkQt^3m41qtxn4gLbq|p zY2jhuc6_uh-l{onYiGNog0bGko37`4Ep$CM+;8yY2G<-4H*g29K>i%aA0!VHk_Rb- znXqDQEv$j!3+MZyPZgP7R`vWsMw$etp5nj-c_9F3URnkBIADtOnz&?_jNB72ou z82B_Q5RY}KJc9~K=yL|8XQfm|={d5`N{4fFm7ji&?DI04^#6w0Tp;`NG8^dJS%~BF z%;7hBmkQn3d8~r?&7~Bh}{$|4rlGWX4z^?}G;h z-A|DE84960!j(fZ-X}7#Cn@4Wg8b(wjCm$}I;2FuPp04JeM1U(B_RcLBTPJ7Qzd*z z8=Ecs!S!z&I^&7l>dbvd`HrWPYP(sy@x+)j=ospD;1^KOn|Oe~F$)SQZ^!VZzfmDlp+qJxVY^H){RSAM*8A8~r8! z-A3zI7k~Kr-}fF33ctV6@AKhC>%Tty_rHE)`}T&K!mB#rUDnyI<6B<1-}=tK{P_F- z`4>OB-nzKC{?Gq#_4I~Lh8^$bJ)N@mpx#qWEW-6eM>U*oh*z)Zln2$sd2=Xx&O^BohW%XA)C-kya)+0ax{YKcEpg2CGJ!ocKFbw)lM z=)~rmA|DZ1;f_;dHiH&37PtsweIMJ?Q z`=q*yEyMm|??lU3xv+g{Y@cARY+nupx^$%oON9c%7?P&gHgJXH?n;SU?s z3)mC&B9n_us!T31d5Ot7NDZpan>}Q!`dO}CW+L273pW+(n9jLD#{-`OSSURI0uxsD z5oVqmW0l#BF^}gG@ilkGc;4u(ALHvt=Sr6C0!yN4>VnCZWe=ih>XK=`3vKH^$(p9! zX_%%u#gq6sCO;ofXMAF+hzBfgK@Oyp%osDp@1~`-frj|vHQ&G;Tz>*oK7z%IaB`r0 z(7dOCO2)}W;N6lS-!e)o1gK=3Ttx*g)tXe+Q9%hJ$%uJ|7%I^xf=X^a8{y^d7RvK^Jq7<|=(_aQVgcW}Gl`C|Ib^fQ_a<}Uc@Stdbd^ou01Nx@ z2KA2M4RFZtmaK`{jKbm!(~960nY#3nNe63b1A5}#;2*>@G|$(tBk&nU!uTx54h z8F-b5Gah4Gr)oOC-RXn@e4g$-oo^@?{vP7dda4EgUC*^$-wdpF*K-0h=pqCao2N@{ z>!CUDkaWu03p;vtw6Et_O|gu+20V);d>RA(Cf*C!vJnh8q6s$Q zOQSm*+wfY0dCC{ZMzJxR1^%;pjfgBq$|V=fLKRRWJZ*tyfX!J!=^RQDCNoMGQ5l5F zE%cv3eO9{7p)}7!a<>J6@B;*N#~fGq2ne&`@iR+~JFZJIv1Jal;XJV8x(dftoCkJX z`*CERRLKZ4w40SK>$HNr(c4)%NYO!>4l;Cra*hu2^mdXC3UpB9NBAQ0Oz-0zd>139 z2>!EyDPq=lE6@BiyTi2zo*Aemf)o)`tRp(>_@U}__th(0O9k($P`!$xVl|4uTVPy} z>=(JM%;YsD477r?kwH7b=LXK*xj8hIy#lJ$uG_fpIhFiGR5js#E0#x{^E#GocD8r* znU>=_3NAQ?Fx@t19V|bL6|7Pkdh3Fd2unw8yv}>pw&R(B6Y5l><2QA28#-r7Bxvdx zsl-ZX>SE)8IfzhnNuEYA2_Ef>*>xMCV@nQT(v4y?eg}8J&N<~u#r&xC^_>mFW?MoA7G3xYHXOYyos1^Pw9k9xYxC%Y7R60`A}% zh#Rfuim(_GnqY64^RB`%gpnwK%aHq3F!)=d0ju8|U2sCD;IHoJv>W)AkJQ4~FswFQ zZ3LBsVhf;?L1;&d=JXTRYXdW7y}yI{XIyVQmQLPx_QfmFS>+(a51f12c<&=4ud{uY zM43Q_f%#ikn5UP9VqB8*;G{=T`giaer z0Uey9aCoB&+|Y7D#85iZz$r^3&?!9ib^1MytaNI(t%-OWVaLOq>rOTMjr^TYy}Ig zEKsq$3d((eLcqB#f(y57oKL)9d=Wv_wy|KGm4A-Wd*Kr(Y$f+JQ99#mBpegqxPb=@ zpAz43Dz}uKUz%T1+}^+)U###qa0g62K3E67Wct}u2Dm6pM_3PBlecQM-V1R= ze(NHi_(BTGe*Zl7iD5ab5knl7e|%C?7kzW^f#8ToL0p{2pHCAP|1nR1JsggU z`E(6S8VQAYbiVj+oJRN)RQkG(Q1~{v(0ibqxCN0X5JCS$2D~Lf@YM*O*~gxJLd1)o z2LQhw7x4nm@xJ_C0J(E^ewX1^OZXd~bgkARnHE0*>5G!_s`z}pIcC*(gv1}>i~o+ZD~^yNc6}VL;7KvNrjekK z-){6nA{;d!CvZGL0d62afXo1#ffEuGU>ClB0=uT*klDf=fXPe&dPhamD_S$kit~qJKn(BwUEf z`@*$;Ot?lOh)VWp?DbHLr6lO_YxvY3@aI_X#8jtea*A6k&+)+?@_{=Yg@e7KX?kwM z@dG~PLxnvUWEWAH#_^wpgLT{v_spB7g9b0^hlAB!{OTgAyyt8O08dns!9D=B%dT0* z>H=5odNDx0#ZrDXc+i%gH^?s<>TAQ_BUEt+9t0*1vt8Q?@%2cN*?81v2XFHf+uNQa znaTR%pgB5UiGBhhr%v^`!K2uAi_zALd(kg+s&d-iuS9vCG<*moq2QL1>N0tY3Dl3| zpi;8Yf^xEhBX%R+aGkprot5(OB?S!g8Dr#MqLi2gf-fTt6a6b84TFD)h4RAM!fXEx Dp=>+5 literal 0 HcmV?d00001 diff --git a/classifier/__pycache__/classical_classifiers.cpython-35.pyc b/classifier/__pycache__/classical_classifiers.cpython-35.pyc new file mode 100644 index 0000000000000000000000000000000000000000..082ba55af8f21b7dc3cf209873c2d3b71c1b26c8 GIT binary patch literal 9860 zcmeHNON<=Xb-mU9-~4|Z&X8(~BE>evCMhS5ttcGYBT;cU8q*}LLXHYdNKEjx%8jspbwEWC_1K{nZBnE-~pO?EGv*xqCi1W14Y0fa1a?yc_X?%|N8 zWCMnS)J)%g_3BmCd-r$ly{{K1CQ3i}`+qRsexB&ZG;*ZTzK&1$vO(k!nV<%l201A* zQ__7puI}wD!t)Mkuyz?u*zvVz!FcPYlfUz>6(?U)99KbXI{GI zrRzC#Es(P)U5nCHWd_`-E%jzz-wy4-bsR6Kem-p3%G2pLZf*3kSL;{mS9+hbrSJNd ztM0kJyH|Ie-D*cw-Q=!%f5f)yx3@cf)Vs9Z^XiVOk3RqMAxn6(m#)_@*HXH;;RUX(?!2|G(;(=XxOt=F zJL{?whHu%vUc9AT$8BPiGumG>^x|zha60W99p#46x^LsfP0v-hGP}`fd12%=Zo4g9 z482Z(-uauGuGiY$>ZovRboqwoyI+Ys-;4GdJh{O!gT{4y!cRm0T<9M(4-}dQErjNw z0h*ylIE4yA|3vekYS1yKL{6EU3ON(xOj0b{Se&s0A+**s%gtG$7M^^99O%hOYT>a^ zj8$?@N&B=kFyJ|IsxtBnInR@Gmalqw>3M;ib21IFfuGwn&Xe<^Oapv43kAHuGJUgm zxm4{RcXqqoj*6=H-9`jGYuk-&2)5ctgeN0)W&^i1GJQGlcNU*@7KIJ+5L`FtUW(MG zD1xziq{)vV!}hIVXISs<>4If>ffrfU3hoLgLCB~WvqmL#!YJae zR{_)NgF$Kn*S&wm?7aVi_8xv8r-xV@9lp$Do=J%bbM`RB1jDHH=YPh>57zqY{+G4Z zSLgrwjUV#aJ!M2@nPt=?`2()1zYd4kw&_(W&3u} zaGlKtvzQNNEYDdK;U*doKIFM0GTea{K=>&*Kx046IY4JrnH)t_5dhx%X{<*TzGqd0NHqWx+o2b;7;^ET-7|yTr=xV!1C>2Wv&^L8RMjO zOny0;&iKSs85ek1SOBS{bH-HpgRJy6(2*PoI9_=QI6i{MgX#<{4`%m_;K;}*K@B70 zQXd={84*4SICA?Gqa+UiM8@EV&NB>-=-dDMS+L{2(;J{)AJAaXF`xke8PL+30tgA3!S}FuMRSKL zT}#J0oWRu)r4{x0Rerhn9e@vDXK`#HPXZuyf?v1Hgq@_=lz3Woice27c@E^00g$Tl z{b!gUiJ-^PN1a9Yjl1dv9&(<^iy)h34t9FqRc6umwqkrwn4Q6Z#A*KQ(I)l`j5Wu3 zADy-~ZV$cBW0*4HcVZL~zY{we@k>XHnoa&XKBIuKGA_6=fAs&*5o6USgAO*4q3}Ky zbf{nAd!K|F;wRIWAY4$@CwH9T?ryoazF&`ZQzp} zY%tgH32%eA@ci)1pzz5f>C&SF5eDA)5i(r_88J)EPxYQ06^r*|CMMo9PUD`Gz#UKyaU?Q}C-~NhO|!Vw=|mx-obFv+ zY$y*A9&%1I(?Z0qXWCw1g?78^yP*|!k!Fg!)0MXUz#7CvI%Drf9X&ld)_3iuI7M9v zEabRe+eX$DN>mRAp2reCi35KN_l4})2o4CT5Dk>hxpb^@$ zBp#1Y7i2z?N+XZv2z40|s>BK$q4pETA}KPW9PQ*~$SN(NB=k<6_A|7frTrZ3qg|l= zBE6HQ{Sxh$`4ZlU64FE5gZJXeSwf&}V7hl|r+$%x-c9ud2sMUe24~3=MYq(i;Z*0{ zAX1&~o?7FxO!%IP)O9q~H6{Ys0^I^(pXWZt?;*IxPVAV-8rXLG#?U?XN;s`|y~aJ? ztrd^tPDg@l#cHbye#dsK&eo1T-Esq0A<)JnEU(SA1v`imqpDSgF}YwPlFw1Uu8Y3C z<@#3WMmp2z1WjGuf}L5C?^$|EP7=ejbh&Zg8ss6mB3I+g1DE#1)q0J{btKb28agoo zzl%@Eyru$Y?nT(%lyMq=7mPgGSB>7uF*_WGWQnZK-C){ zwaF19I}*M8IuH6Hliy&%QMB4%@+Ol_CSPJQ$K*)JDhVq3A2XOUqXu*C$p$kL`y}Bk z;}|EnX?}1aHK-*n!T&=|cgE{UsKFbkAaVr>W|1buB0MNduK_pWr1rotzL}yA4Ei8N zO$?Hv$k!2CeE>wk-zW*Hzdh{qc6t8zd`L|g9QJLLSeB}(${6zxky2%Y(Mir&l79Z_K3XBc42 z0FbaJVqs$=RlkA?v2xubn*LGgRW5}!I_>VYKjgRYD~r@y8&mD4BVGMfzk`}_cnyug zvk+1~@Iu8;OIE{r&(%OSxbRgFFJ8^dBH}^h#T8YN7}7;FAZ6(NG6eh$v4G|8j4n8( zGl)yKb=C_5J3tA63yj+Ev=P=)iamf%hmjL6n(LTYuLGQv_5Kmgf6DbH&(i68?p_=( z=aq}BK6LMD9q?7bmx6sy@Ef7dY+%oo!&-!0pF(<_vGDDA^6P;v`V?F0REIn^Mn3sfVlhp*EU|<1kSL zo$hu*oer=~*q4*ALSTy*81sWs4sMM?u3V8(G8T<AH!g4 zg=fje`OUF%905l*T~4zlkK-b3B|p0`yP&wAmyjH6@YnGPnf&CW9QcwODVN6SIMR3E z8mHhzoxZub*}Irz-ZwAt?k%*S8ua&8pPGL2Y}R=;7eJEqJK4H}mjPH{>kekcjih*~ z%Yij`L3qedLH=9B*C)w;|AHsL5f10Syl;jjjikK%bUyey3A6taCw*H-%6pUJ?PNnF z&U)jNH_3P7WVZ!uA3!rgYcKVL>=wKH0IDZtw}5iE8QJznQ5;Odrtwl-6;HOWmN!)o zbBukCBxhg6=^?u>sjwdMf)Ddd3QUA~tK6Cbk#elg$6Ifq&hG{u?5M2^4(ynB;hvn# zWh}5y0?9*uEw)J*Uxv=&Nboo)G4nRbar+{&Iq_Xw>{lo%#+rnCy&0C$ctXT4;)B1( zC6y#Zkd%aOakLhPXUA?qu%zVY7hID_B=vCX_val@(hB_3(aMey$+DP%u}9zb~j`Q#%YTY)*TQ9pUY<4YSKIrw4cQ z6x&<_WaCk4&4O*h!Q&877ha9`KCf}$9WH96za)%nu z&}U|4EkpLUa_v0mA;=9ajKBzj0)7b!6h(vNy=b3W^zon%y}Wc01W14Y1&lo8`_GU= zE+uVj#V-jutMhZtf6koCfByT?hi7IAuW$Y1m;Uu}Mfo3P;?ePb1tt0q0H#<9Q!SNg zmWFqo>6XC^E5kBYmStJa&sjM)*EZXEW*(`k0$koNbc$B7Q?g33Uhro+v({{9&YF|; zqCekRuomQd$zSZ8vChc%8Gos>Y%R<8S%0OoYOTumoPX9@lXI+<)Ubx!_nSR2nN zY>v&}QP}*EW}QcEfi0r8h}s3z&aframQZ^HwPm(~+6roAmU}^|to9Z@vmZHu%a|9G zKNYnc?g{<5>$|d9%FhHQn;A6SNMTmRr8g!-gA0UeM~T?Z!8a{?B2u7=!= z;vL_?#7)oTXj$A1TV51fi;V3)4dT(7lX4|#N=ck!y{yMG*ez84=hXmO)w z9xqo=q8|ac3b;f8m#E;9$C%EHJ1Tf3Lnp;dmS+W4WFEEw>TlxI0c`AM`CZQX0yh!q~-U4Fj8}hki3CJ1Eho0s1;qZ)$uaR+z^0yJ}yt zRb~Kc!Ah(HC--t!m-r0f`_#wiYk(Q1U}a{FmoQQb1<%j6aZ2ir$oi5eES=Yz!nnZ? z+=|X;&_QHxx;$`wtJvjX-KonXt7JKew8(Tq=K8{j54$d>#R~O{Lj9A9A&kiNoBRw= zdw(SL8Y}@BCZZr#^sHR#{O>JJy;;&Z%O{=3$-v46h-Y--|IsRw#JZ1zw zM1U50Tp&O+Kh_AKo6`E5zozG3wudzP&+XP%mwxcvfA#K+UcR|K?DN)k>z8l+{NJCi zU*8s*|5WSlPygY~|NE!EyWHB`+4#wSUs~V3dIR_m3A66_PS9{!t&&T~oa?%QW#4c~R-PkgG$fqdD-P^B_T*VDK26~A2ITXn$7x5A+eWi`FZ$o;R zc2^(heZ397XoDA+-UrvLD#R^!Gth{{7r7cI`hguXO(=s%n9Zkbuqh9PVS|AD9JLyv z&~>=eaj~dKWTFN%fyg%@&2|8+$Od*htVfj$CmN0!KrF-ufons1*zPUIKXCBZ00Ppp!n)qe1JShkjEE z{Q)Xt3PC#bBa$Mbj}8hc?M&ZbDyXD^O3*u!N*Pco+xt-}(l@Z950*#U!Q1VaJB_&9 zbe;GB`ze3GYnGj|54Bhhn`5-Y-73x$xmB1E24Z=+n9iJWlFja}Ek|WE=BjZm2Axtj{eH>=Q zE7VSMG<{-cPn?HM5}w!=nm0K-b4guNdmHy0-bCk$D%DmcN8(PbX4|4*lS(-N@7bbc z+b=_AhAlbUW?{p&`B_>Joi@L8a%rT4IB^s&0#LWe1Tb|&D-^G1B(?!W`lOcWhd`aM zOpvEL3i&XnTN2o%CD?~O))Yo8{GAf275muIVX&wCg zsF;DF9^2^Nxs=Ct_ol5;s$EFSP}xC=HUMCMNUKQuBQ1<3EsQQTiC!BjN-{1|yHyNM z#Fgwb<||kjlKnAaigV=QZPS!e!lBWj%U4b;|2|rsuhE$61c=EwEmvwbeu3T|Ay5W* zIIYefrBCmp*7@UTs9Aa6t0$&(sMz`YfamWg@B{(USo{M3Ru+IBI#GD8B655=1$N|gy1pCPQ5T_*D0Q6M_Q-t@^OGHX*J+AUV8bc)Iu0g?pj649 zc^IYo626Pb(^aH{Tp-1|JW0D$uh!C7go1|DfBHTu@FUfmCci|H2vMkIuq>7*>(;sORLI} z`ciIS3^If4AU7}v`Iq!TVNe{9*Jk!}ed*gC8SrXV@Wk?Ov%gkL%orIo8L&ux5;-@B zd5E~jh+gI~zl_$zs!97sKD=Z;X^`N{5jS(PqWn_?t`MM5QW_!HXQ?cPreg2vDT=69 zu=lQ3yI$j_?^g0t{{57X$cedx>4y%p!+Kj>Xt{yQ;U{7U+v`vy>;!Sj>Xp)zsX^$I zIMInuEAqZmcYQl@V_`JHpec%V2!buc4_nO1N-7z)C^l}{BS%`4WNYF&qv=p8HN==M zlcDLf(?RNo6jMYlgkqX%K{X&BCc+EyQcZ-Fj{(zLKOrwCZJhL1@HNGvsm%N>Dil>f zWuR+)or%tLCB1Cdgb!902Xy`j{c8A{DiVn|*fHKoDK02&Qa zlkaEneI}KfrQbQJ;pC(6&k*>H1SScguF^OU4xy$bg}7nD$(3GI8cj(_;m=W*&jM6t zCnRCYn17Bsk@@D=2<#Ggoi0jGt0$a+)QKab zK=Bh9@8}kiC8Vxbl>ykPohBfTG*yWaRuj|TQP7fk7D}8<#$D}e+H2}-T2rGG+#NH{ z#!&KYqNCj3`F^%tI8x$by9C36uh!^Wn3uaSI(_3+ZB0S1nf7cy!-_}RAlJ{e z=UAydKS~S63z&NmY_8F$BqG3^N$TU4vlw+%X=O0>A_gWiZ@;9GkwJf? zBAN24_PW|$>SKjX%=|UwH3f;pGuRi_o{TBiU6A;td+^24>f!|&12CWLNWPi0x8ZsV zwA$f&k=`fQ_VT}dLBUANN6IjHvs|-^(5xfZp+RXYdMJlQXY&F|vve2x zO=)VcgVENMZ%k}a69ycoeUb5^zzGn1?qL|AdP0pV8YkfrdK9x{o0J*A4k;HYclaID zBZ|YkdD019I92mJmedTR6FLJ|!Nb)ik=Ij+7|J-JH5O0Rf zO}u}8^e)uG2?EjjB!S9!ktcmNArM?O1!-d=Axd)YF#XgmIsPFrbdTK@>TOUzwZ9JM z&f%CQ{06>x6D2ur^*5TIPI_{FUMB<@Uo4J|8j~yy&HFu+;k=a6>{8#(GI?H7zJeH)*)S67V zAqPMS)JUjJ{uL}t9=XuFVI=eb_b8;q!KjiHQv1^e6MB%b_&Q2Nxdh60&%-HM(Pr^m zQrA%y)l2HEdKT`>tSsy5MYZ?jyAaNbR;$TkzLUt0)|0Twl;!y(8oq!c;|)r$nYqGh zVU-h}jyiov#IK-8@xE^Wm`?-1LK*;4;8zs-wOXzBWE$^X+oTIF@Ig5kUO$~0^wG>a zH1j)1Z5>l*2uLxoM`KeYmIwA>BC#LPw9;Kn(px2LWFq*Y$>^b9vbA!R#12xTO@5Km zaN_`Vd@0tF*J5xZ=ibn!!ma|Fzfah;00$73k1o*CD}m)&q9SC@?5mjdm^6&cqaD+l zkIA<^HVMf6Q|xg(2OyJfB6kpY;QWp-86=OcJD_L#XcoET_98kFei%0Peu`IxScWXX zSmdW-j!`<1^3wN9M9Vbr-=i@iN|MMdsw>j_=`9e|PrC3l6+QHOlJbcQfARiwXEUHY zt|LMDc}e+S@48P@{&yg3ErV!DOiEnC2Lxnv`ls~vX9WJ7z+Vs`uHk=4fW(g<5x4^| zWETD^At*rQe?{QIZuh&nxOfMNvL9o%gjEm(rmV7Y535Z0#TB(i{~kJ3MjrZGG%d-L z)Rmcn$q9?ZJ#IUYS=tXdZaa#$?RyP3i0JAA74jdDQ$l4HcOnk%3{hO_nYV2h2tVnE zYo|6Ypp(igZasocLq(4cVduK!9c7&^%pJ3e5yE_j2i53SM_%^I`(5kY`0lletHCI; zahvBchi+^W@g&7y&5ypOC3fn*D^pe0(g>ZT$de0Uc{N}yjG9udDs`$@I!OMPKvmu` z4l7A|Ap_IiJ>*SH$U6jHByf`e#UwJtD9wzd;Qfq~FrP^-kS>$MEDetVGn)QsrdEPM UC6k)`A^N93efF{0^Rs)u0zf4y=l}o! literal 0 HcmV?d00001 diff --git a/classifier/__pycache__/cross_validation.cpython-34.pyc b/classifier/__pycache__/cross_validation.cpython-34.pyc new file mode 100644 index 0000000000000000000000000000000000000000..76137ce0f37e776314326009d0ccddea5f603c47 GIT binary patch literal 9161 zcmeHN%X1t@8SmNI_paWSWW|oX2`0%Bw2}zeV2n}X#0Fx7iIh0m#7sszz0%0D5BJR4 z(N=t*Hhll&G-I#^v~i%{#Wn)p}O#x5Fd!4D~b9#?l5l&;R#ViX^E;Oyo9JG7-9?07S*Ki zlA@XtUP@Hc!ox2kf{dtUgv^M&tk}y5FDni#VNqXB1bNXYh-yJJilSN+jR{em5RFMu zoirozBA615X;Gav?ScqqL}OM|XGLR9RObw|D1sxRF)ym~rad8o1<_a()kR@l@+O5h zCGG&@DIt%F17Nu%E_u_!n_=jTkV_1G2+&#K%`tSY4?PCx5#h}3M#$^mMr6Dqq`|u3YY>)+(!&)$TW3hOKW)bzKH>zaqU(sijIXo>l3M*p)$J zw-rR)<=w7d@nrYJ<;zz}%caYHU6wAp&7F?BBTH9Qt0u$HZ|-!LcB7~ro?Tf9f_7zi z{gu1Kxs9$}sjQTfx_C+X-llX_ZTG@UdTKLLZsgZ}={+;jt{t)%DI zB0V(#g|(KFWlI+>`hk2g@&iBGFDLYLt<|h|Lci5?8g8WgH}nJ$bKFkN>CZTc)oMNu zLB$OMr_NnY*lD+0DxyUj?saJ=4v(1>IA#W73)mZ+~)i@h;* zXU$oQd)v-xTvwn769!t&JCShA@Ykm2I62Wl&d(n5T}S__a_c@%Zo&s8=m; zp^Q)_wSqm28j*NFF)HnSopl_)=|_&UgbCqs6vE2k&rW14^EYjozl=3$br&xjlCVmD zb}T_zERe*_5>y~lY>C^@2b7Q#l+Xc`Y4OHCL?lE6`AQJ)@q6~#U&QSUi(E!5k-*_` zJt5RKOCK7LxL%tz>Vl^rJA^idus#F4Q=@>Jf5^f_N>9l43maJ3O#Lzusb!SJ!17aFW z2H6#|nk*S}sDS;%2{xG^-9f?vg*J4?y(twOiB7S8lr5FR;IhrwE4Y23Gy6T0Oj15T zbYS6m=C{!q5>15FJwAe1-xct{2Srhpu_n-Cr5#5P!qCTx@hj2j*!l3RX8`(0)M)t) zo!<7FuG+7qF@-KtuqoV3Mk+?g2)FYlw1bT@Y%&SHaW6TN#e+opwIrz@6wUk~x0~fc z_qVYaJ)mG3F%H~q8H7XfYQQ~Kj=@8>(GH|@OZq#z(NJ$+%HfP_odCjJA}d&`BOHOu zm)MrYi0b6Fj9m37XrR*u0T};Q=y092!Eh>j%T-MWi+q@=FSEJV6NfN>jC~cW;E|_? zQ4|(6^*8`(jS6DA`WgytwYBApDhb)o=WvJ9C`7K~~FU}g5b|2oVRS=O`Yl#09YLYG+Havw|%uHqqc=vSxKPC&+8I0AI8fD@` z0AIy@Zzhg9zrIuc)H!I>Gv?rXpHmLj*FHt=oqb^L)knE^3`_X;<=)(g3Q{0dE>Hl} zbQ{ufbl!0qEw2N+u<(dIkUhZ8 zo0I}M0m2u`35t4xbEy8icfT3S((}u8Bv&OknJ`>NZq*A4OHrqZpt9sPy^;jD!=WP2 zx=YrPm9j=C(nZeVu({6PGPN1)NT&q>~hKkC2`=5P>T>y&tmA1B$TL0vh!oyA$f3-nX^tH z%*7mqC)p5+gCgW_$^Wg%p+5U=A zn&D)tE?q?IGF%yK){AapfX>C}>OAA;?BF0oJqO0Bhe-fZO4j{I7o<`x)r)z_3+l8% zjzv*h`|Kn)$}`HQse}2&9VMmbgnYE3lYt*b+74yJS)#VFwh?AhV?DJEbetF^N0Fc^ z(r%p%5y?5N?LA%KK};}57iu@1o`}L^au2^o=#+CtGrmK!&ATz}b&98|%LLEGq|`Z% z*?=fs*5oBP<&mCF(kae}Cg~YaTRPBFr)Qw#FR)xVi$dg2Sw|D|iIdjjc;>95R`=v# z4I7)garfW2py}r@U6ILE=c%Cd!6^PUs$63M9kS-sw@!no zkoUCaeY*%{g{I!Y@uZ2`*_y+Sla)tTferfP23RD$*rGqe@??Nx)|~s))7aSrQzSe! zVkuqpq??WRE2xYZZ?Y`VT-#z34g&*2(oZcYm*|^tGVtgH&f~r8oHomu?k{7pe*kmt zH=9x+a=({RZ*`!|_vsrp;$wQ2*$W|`$UG#!A9@WW_`nUbV3oVhC9BC`3b|tf)Er5M zUbuo&D7ZO%p|}V|kL}utnE8VN$TwgLxFZ&L3Q7r0n^1Wl#fTN75EPaS7U~ZA*ox6% zBkrFI@lzq{=wrL}6Lcf?FyW~gf#bg&!KamVij^WnEqroK3tW4FPEg2XCuwDL;QbJ- zyp_P=CE_adWyB(21X!f~5CEBurQ6V@G(}ky1m#Fu&~4q=5mx~w1q9f{#TW>b0b%lO zmb0IITwJq2M7oHg6!em$hGeCXTmhA9^WwFHxQ+J#X23Eb-bsiXI7S+XEDj(O6&zFh z-W%17AGYWyXV9xkSnI?5Ir@@QYc<;E|3({-a^*z#sR6f##UVC%m2*K0$3x-y%3yws zF|9#jIY$ewpaAy><1Z4rJzjIf3|<5EJt1Hri8XJVHz+3#Ek!TSOFcGN>NqWhxjz%) zdYY6S6*QN6D-7PUoKPO7>7*aY20kU_8*bZ)S`KJp29?kYDo{|yRQNmEeJY&)_*97J zgEIZ4uw9B%n;sI?94U@N{Wu}n#YO`Z6u2se_kI8l#2dzBJ0vq-z{))iEcM9t?6o4x zO+vhFiyPBpa>F!JPhg_9_6Edckzq{VM^}$Z8|qOsV1;APjEK6~Y2f%6;+CN`aya=m z@2v`5-utw%!gH9Bq5#%jJPZ}n10gK`Sr*!-qW zMzHpbe4pAS+J!v(UvY=D9Wubgk~L+`;ec?-TCwuhBJx3GgeO0O^_&{=n;XYZwvVnH z?*He8|0}uS$B9HJi5i3>_sIcGO!5O7dW8y-J@vzQCRt1M0u@;*NNG4SQCn1dk&2g4 z4Ciy|DnVbSf-&G|1^;d#)T>myM#V@>OTo(rxbK&vJ#inI9}j2LkFd-qifL+?_-Dd_ z6owDs=J}I;`C&Twb34&4Qf%b34%EFSn;{(oqJfW5{2v02p2Ybev^&C$#KX}!$B_U8 zW?Wmf_HY3!BL|OknjeWgm1AXk|c+t zm?eayKGHG&&QRe9x7T1ERj1;0kj?wG(6EneK=s5(SEc_6xW-3${Eef!FgnQO<>uRS z8ks)OGIn*HaH8OksWrq|=5YxfI(yC>oSvu1Ata;R|3=79yO@}^9>r(m1^Se1FYGRq H7f$^L+QRhp literal 0 HcmV?d00001 diff --git a/classifier/__pycache__/cross_validation.cpython-36.pyc b/classifier/__pycache__/cross_validation.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..41c64fa4df70193785724980741e95f58543a98e GIT binary patch literal 8938 zcmds7&66BQm9MX^uKt*BNn_bUZZKaBJ3S)|>>`Aakr@Nlb|`FNLt$@G)0sU}t?CbD zRZE_Z?gKF*Fb6>p>~UlFVh-F0f(TAT5FFrd*u3t^XD);xZ+9Mo?hCBH`|4Brr@Zwkw|P`8EcJHqh` zqTsv26~(yd7iF>IbxOiJF--%sQe5s<{7Sd#SGz0zN_W*?WqLVY>#qCjT(86%-A#Y9 zyX9|jyBeSCp7+mleI>ro-S)Rl<0V6^inWJ^Sd&kk*#1Sd*Tn|f8}cIBPouplw$R?< z_A_Xo6X(%B&+SX1_^MIAFxvR~Q4;o~5K*u8LUI_YNZT*nzBMXbY3w!jMqdhbWjz&>K$kF9nw{F*VYPX`ctlbKGhr{qt)^4kQ zOD0LwI~-j+O4C7db$2(82aUz|@4gGx8qJa2XzbPi62iBvw2%w{}`x3wZ3f=U?2L7TgRWH=b~RZ1`IhIeH)LA7Yjv&Tl|22#>Q zCJiWv0R=IkAeOKn8l)tmfU-bJg3=Y9D4{GuWL2^9(DXgA#o{&n62wM=HT|+ELuBXm z>P_15YhfITFzu_+r^+>Y?i|V53kM0<(@JYRWG5fPBU-d9dZ^q$x`~v$foyD!jldXN z$juaa$3DqQ=fo7yZea_D>4hoXotDLE3GEiV8QO>_i^{*P<6R|TV8PX<_QFxzV6S}zVx&=ABU*i3ujSIrI{ifgjZrJu zx?g6gCcz-MlGhdL)hWFlTv_ND1bYjk_D+r3^X;^!6meEpYM`5%p2T|G|$Q`_x4hqzNHRH<{N82|~X=r0MKi5DHuOLgx8u&+4ggLR19Hau0 z`_?8Hrbxw`9RcGw*80Qq- zQa_o_G4Y@eayULAxg z?ADHhR1M|Tvt$y&0$Is)sCrjxcQAUVClA5>`?3aGC-bSw0;k?JkwM@E1)D0h9m}3B zwBvr5);;x6^l3NyB$f0H-@0?#xAu|lG*LFS8L@6;*>jkoo<;T#k%gloFH=b-&Y%)x>gZDmWMG7Ebx)5Fx^XwVW zzJ~NyS3~b^YWE=Oh3dHFVhTNU0?J6ttdFBIW7ZhD3|)m@3FndbhBI*|Mfw}NV4*wu zb*>Fhs9%{j#^HgClLfWQ$Wv*Yp(kN?5X<0!j1G^|h2EL6!Y{UlF?6&>wyM?!GJxvV z*q-DIRiD82Sa;tmws+X9)?Mu!$TZY$wjlV0eiCqjlXG(N+SGdNgdeV+jbqFuGZHncJBh2@7hg zb$xB#v?Y$i&RqQXaoNikicca4kCFYkwzD7CGN1_f7- z?1}f-zAE>XB4bj94wS{pHDg>txr)+5xpvJ+*E<`4tnRqX-`(V#zHWE6I6pV0UASYn zZ66j-unXZqYt~1$jf_63u zS8F1Ifyc*=ceB&77_+ox2<$AA-TB9+03|0d{HPaoDPOfN^D=`&+%Bkn2wl~X0anX) zl$l$dF>daiSgh7COn8=(PhZJjj_7Fe+C?>hcnx!=QiA z(G?!V5)5>ubw8M%AJ}N_VTL9_y^Z_@0H%d8B#z*E&f6TOtnR+kv}VFO@$7P5eC zQm9ukPQ8N6FJ<2ZJC{zr-hej90Ky2SmQ(%|6OwDlj4D|HiexB8EnBPBIkP&AQaz?u zolA&roQ!6>s8Li)f1_tl+l@0{X+ofc{#lOi{x=?kFl1EJA!Xk{1_SpMs$Zv!;udv- zGWJ_-sd2GUu#RHb_uTo#dA!c7cn1yNzQ zwAzGtrO5r#NBMAM#4|pL`{q03JQQY2!$h;c_$*(>S7N( zo1$H-7bzoG)P$oVue0FCyljE3``XKv)4E?TE_pM*vak!kvasyT!4a%lP!QG)(wsjb zB@|m5_@}$T6iQH)XjU0lj$VjPgQI`7v^)fYyGmtjp zkLIku3EO5cg#a^*sokQK9r?8!t4Hklv^sp1b`+g zZ6$w*TQ`sr+S@4rJ-Z682mllT1-K*E zZ-UGu5BhTQ4zjs*B7jWSQ4bj=)6L?E`D5cpM%$zt?uTA#cO2+4(vaRr3+IPnd5K|pS^3?D3T;51!^Um_I`s2@o29=ho*O}SolQ28~V9u^nT0t>2vul&s zg&TMenqmFSzE@q=3}%-4EXHfIGv^{J1uw8T*M{P2$-DGx6IY%x-yICyimwWQTG%n4ST)@v|QBOOB z+j;RbCXJ7O|U^pueDX!GCE zIO~Wxj{o4%CPi`NmN^DRiJ0?|gE+E+I?NToS%G86YG;LGKpJOvvgj|12PxvTJL_j+ zKr&Iq{~s~n|L*cRI>Fh?{XP_8AF@I5Jc?$0V=4a10Km7=r(UCsej`(KZJE7-%$ItD zGQwP};|v4Tzfd!v?EA>-rFqPxLZTi}Mxjy$4FuIBGVM@|xO~vs=)pe{7|EwmF)DMY zFpGS$h^K6>13J*n2uC@eejm*}3nDnI0UTn?aEMQQ_z)+?>^~zVbPo6iZd(zp{t>$N z3x|l06|Vg%0ZwsC_7b`SL<7GU@sII=Ud81g4sD7AtRLu75J(W>Y#hSv4sHsv#shhf zz>MR%(8m|FGYHco^#E0WJGmPpV(v9?&nROApD`xk_s&eZWX(A+W@M+&4YU6oXfXbq zHdvPaTK*nH+)*pZ#7sxqzr57dnElM!;tM|h1;f9vJSdBiv)_kZMBP|cW-ec#H4(mI uZY?Ol$B0JuN_*F`Thr^%gv?>ie#+z0_N=P;6oTM&q^iAsw7$Q7<$nOh(NFjQ literal 0 HcmV?d00001 diff --git a/classifier/cross_validation.pyc b/classifier/cross_validation.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ff777eb5e3775fc16d39849321b9f50d5849a34d GIT binary patch literal 9407 zcmdT~OP3o*6|R=%CC$S#w#W7mLzG~MNSKF#Oo;Ib34SDyL71}R#7dk_kGeE!x%F^Y z%g*F@7M$@mtNZ{~Y&a}{lP#=Z!-@^t`~bFWIS0P)mei6p9%7#ajv1*-RkvQZ>b`GJ z{vY$jYu6j^)>Qv5kKb4E*rKOYs8kiDr>dR`bE=vXNM40`Rh>}bgsM)ea8gyLREY1i zil5r#$u*R9sZ8Syi1?tvOYlQ>}SbomZ^|Rb6l+iYh*$T8pZ> z=-RU?UQ(^2s(Mtlj;ZP~2c1*#an)K@)n(V7SMdqeI;pBB(OyvIl&S;OBkI0V`%2xY zuBdQPnH7O9Wzesv@aO<~EQ3C!!s7$zat1xkgIz73;GHa?xYV#g+k|1%F1==Vf+R|L zWv^e^vXl5dySA~mVP8fqkj_`PO>)b`W_Qhmy;3JBnQZ!6e?-H?twtwKKU!^k6s?8E zehss?wyu;`OIuOhl(vHQPA}Lor7KCNW~`0cJ9Y&_8fn_K=hoNbxVtvG%X;)L{8cQ` zT3cH$PvAG5L-BeNg;!0G)EXDAr$+#|nkGRS)gu$WdASpZDUVl6I@bDkg4pjSCX8yS zj2uF3t&^BCctfSLC@x2_c`J?LDBUgR2wdbEop!xvqfXmz1!)p}n9c(TQ-=A;7t%}WI=JRucfQ?N6b6)bxnJU%0J2bmTKy6|<78G#g)zFIDF z$LCNKOZ>amFs0YFEv~ngmITq&3{x3VBXI`L1y24mihG{Ar<9LMPAZ;?7F8(T6VlG9O`HPXO`HK}Z8|yJgyaFw!PrR|Tg=*1(w@!Q)6$;H+9I~$JXTv!_-;a; z@ZE$Y;ky|wDSS5}LHKTl#}vMs#2Xg_o+=;5Nh-oM8M(m+;Gl6>MA!^Q2en!cSGAiK z(8w4fl}jbNS-V{Q)^~OO8s8!-P^d z#AHJ*u#d$?O76!V?kv|TI-Lf9cLJqRCcnuDYvQ`5meG{JG9FjhW2gZ;rvNBekm0rN zuI%CaQ9DX~pQ=Yxl=2Gr9P<{v>HcRCb$+J31y4AfMmm#bLu7FzzNv&!5#jT?cm7{Xai49m>k`~b-pmZ0CoTjx`Y#^)a@7eeVbA{Dc=co zavA5lD&(;5;&zk&lL0TrUGF;8aLrO%S)OZ+D z`bBg+!W}+?VPfGNHbSE9s2wD`HL6IGk=jZwjm6ld!C2gbz=*kkwV*6atFTCW0S?{I zO}#fkiG=LAFIf3%nM%k09!17kkIAc!v*Tdf#CC**9k@@p18^I(y0P)^nrNqyj`R)~ zAO1qE7lYqRREVWI+$)GdsTK#;W{8T$+cs$+Vj#EW&~VnKV|{6gq&QOX2JZ$*+sA^I zoKjyEvm)yG0i1-vMu#|#;{ro*5{}cWfNA=LvWGs8nn+DoO8*h){w$_k!ei-Tslr@e zb|^v1-j_{!FaUUij<1mZQ|y@C=#xcyXZsEzzWzQC7s9){KgTAXJ{5j|+9>&iEMJVM z`W37xG)*3XreDY4w~54mou*ErM``+7?E8e$w0`ypq-XsR=y?WrO8$B%Ju4zheP^m2 zw2beIPwM-vPS}H@^TjUteuz-X*W6AFrq~}%wJ*A?(rFt_WuVDDnp+MrliFc^oTfv3 z9FL$km7mHLmI}*-d?9}nf1=(b5WLV2t!fx5fArzRpzYf(B4_Ko^hU2WNZSb}C>7=eDDIAFCqBi=lZ;5?no!*@;|vD0s$cgQV=G75+M0*@u})bug$YjE{7 zO@SmA1YyKwy*RaEc^LU*PRA8^I3NgbU(!bO*KqqnBHS`7fZ*yu72G?#j~PdsY0@Uo z!vSLslRy>DGHnr>dDX;V_$jC(z%35OsGJ95-XGzhSW)I@8}H{m_%p~^I5TSN_rpthvWL!O!o;_?G~w=WKG zwuKRd3k>7R@1uPLRX11y|hzq8#GOl7=~#&?+C@%vKdi(Zg^xiIUOiJ24h zpK>v3$RWk$24yAvbN2G285yM|UM~*iu6TEl?n9tQZO=Q?Z=s^E4W>K1kP`fS{UMn| zFo^M?{QSNyHy>jqmpLe&@lNHIb5DEEd8gzvxoGw# zm;j`nw9lXrR(G2mFmTd0G-bleHEQ?4BlheD0Au!ysuZd!?}=4|#S(i)s|Iz7plx8) zX!FpEkPcdOtQROxLXYq|>@Nq1GFkZ;7A|VHO@grKVRo+5gQhv8Z`7V0P&%D%>LX_* zie%U+{65;HF9T^!V=qRmA}1L$i~7gt7O`@e*6v8dp;o)&Ea`JxVrU!1s1460ayN1o z*-SSuXUKVnG98Ef4v#RMDy+b4>eJkA3q|I+FsCP!-bdxJ4CTI>0#7>=Nkah=J3|NJ zIs*up2$?!kS+&p%lM>+o0d%Z zd|?}+sqS=zMP@Mc&)B?zLWIhZSEK2CZl~1QCFVt6XN_uZ$OpQKrZ8PwG=wuZqNg>J zbWSS?@5uhh(kuM|i-5g&TyZFGP8R3WfAZ&wfkPcyYzNq1N zVfO+|>zHDrqYmYU-JrUl)tLCDzc;CHCTOrG^qaitxOnqLdVzpK5jLsvkRd)T8DjWM zlj_!#O18w=LN53HoVvZGSiY>_++h-2xer@RY{GMe*M=>))6E-duPAFmii)a4e-4ST zoI3gR$%{8v*opSVn{(pq&8qkF>h`DV_OI0*+(j9?vNx~b@xdFt2hcnP3k!R=RW!4|U0Ay}1tM_th4>u0nfM-s9 zf?H$84@3G~okvZ=j5YUKAqqbf|39)F=H7-Q4`Y2x*W^6kCDuBv?kk_6C7INMi|8uO zli?sS^UghiPd4|6`^{v3Kx6Ve7EO&5Im z&`XN%VcZUy}b+@FkK!+Kyt42Y#9{0BNitKy%i(SY_B0FXkx@?F&4mUSVu_aezZ`DS|~k^qUZkmVoe;Ze#0qE zDuQe^gMY-R*+ZSnZCem6p5I z%u*kOb&^r1$KLW66zHXYM0+ZF=zXufH2o8d0_pF~ic5)Bj1&dB(mc-0&b*mt8cO|DaP(0rgcB{Ra?_$O08)3G#Ae2kX0eU?Ft1|>ek4rrR^#O6J$-8u{8?n)NPQ}NZ(CRFiG7hvZhFU;nm4&&`Ye` zAURFPSZ#*B@FvNdGQCqIXH4%bc~x3(O%G;1-_lMfJeQV&iJvq3%w)Uuby}9OZ%I2CA zcKS|7HXo_Tm0J5@XRx>x$34Bgv=jur_G$i0-#~!W63N7)r=JFXC+x~FMsw=PrtfY3 zO1g2R{DD*t(N}*U52cs(X|!vucI~bcEC0Y{F2kpU$|{O}3?iW%s09>iG5Ud8pe3jU zI(Z4@q)kwxoR?4_UR79TkS(jOM&5*}V?Z8)tS3|JaKS%6%T9t z165;EXVPH8LQ+*GlOTU2HH)9uAGg~}U1vwyXF2Jm;YI^r$73(sy~Ctp+kWWBwml6l znmJOTDu#daVz4l#ul8_)o9DrJ_!mx|YVGF&%7Vywk4E;e8$mlcQme*v@@x%pj<#Vm z#*BDyfsTc^cbw|DXy#*D$xPd4w}~TJ z1e;wL9A#Fy+;f!EZJwCsa&v!6##>S~nk~`4Ga#L~vJYLsU*ht-|+q-j>8S^St8r&MUz>(Xq8?{%0$aMmJ2bJM;Ytd1m z=kChRmLKeNH+wqXKiu0G9Q3241P_r;Y~{Vs`5z9X=CHe#Q1kqgQx~at8`|bj!opk+A-h=Yolkln^#1eU}w2 zQenUdLxG;_t@7Y@=EQKCCXk|eHa=vpm&ctxDL4ndR_ue{u%O(b@3XX6YV5MQ4RYf9 zAJDIJ?EXgxy@FyQoCRCjDU&KORGvVAaSg@-c2=mI9VZcfBKFW(M$F_;Ip#lic&QUK zSov<{o;})mxbUzwsjl#XjZMV77zVC40fQ#aCgPbj(?NvZ1UmGUv_q#WwPJKLkv7SN zy`%`dRWixzem5z{k)xDzn3U2;k7Kg!%dn5|@5B<|VC!M^2+x#kuW=8Vd)HuzHahb?6VhlqkC!UpL_Vmd9LR&2c7YDBP;Am0P{+jm) z)I$$gq7#*s-LL5>uZyf}d|iw<&XIUU^h%ga3YW170+Nju$U!XUVD}6J}eIfjh)H_Npw;IfHcoI>;JP0aA-I zltYhkv@=+jPm29s55cu)m?uROKTmRoL>!zlN@I`g!B5x-cy;7IOK;jumV}=Na%Xm* zA@$F|tYe_z`6mq1+3s2DH~^yAM@G=6UM+^H>{G1@T3kGHu$=lBOw~_7h8fUpQ%`E< zPGozy4vjD{1wTF=uKdH(J#0-IS4+K>cWYe?BF8hi#w{cVHUl&+s=P`vao-Q*FJnLO zV_aR#xM*V=FcTwnF{?3LCv$EPX zOp1B5>f(~PoO?evc;_u@&!C?}4h!uriar3z)Cv^?91x6=cO2#E;7bz~jDg;J{(z1Q zECCJJgRfEyG=buW_Zxf=IYN>H|1GkC1Aul4JS^+sAP??&+Pz9o0Z!|!;^1i(Aoza0 z7DYQIj&Ra(qL1dzb%e%j*vVbS*vja(@kS=%|1%E}a=QImtoRH6{`T8eX=Hg;%?~|! zaEo(Og@NgP*=itcn2#dsZlok8a6B3&p-#}3R{3O8>NC8t^7w`_?ueh@#cxnFD~(EZ zQNrH^XlM>`x-M#Cu{wqH_PMs#Dk_fn>PJjIV!}KXuW)MBQB3+VUi=hAGx}h`ImJD# z3mop8e!LYagu4mb4t!UJ_)K9Mds1n9JwRg;LDBK?HNg)hkF47G4B_}eIu0rMHnQ%t zu`f3?(skRm7H}I(KO(hzj<5E8@HJgVtk!#aCy=;SwTJnU$qropUeS)Z&RbQSpR@2o z?RVP-@3b;rUDvMPJ8Wwt9a^TTQVFhzeWd=+=Q~dnNo6IC?{^rKP3q5T$o_KEVj9W- I&CTEb4YB2!7^vOtN{9{F(z4xDVyjZMg_eP3H1WN5#u?8z z_l}d;az43IQy@tE3-%ZAC-?>Pl`rgHsF0TD+)13o4b}1iNB7KqJ@@6D^PF??@%+5| zr{>>(_oIu9{ga*g*{I(|QGW*E%wyc}3~qWR>K3;=o7-N27rY`b@={dlIi6!M)Sal@ zb3Iqr%h8-Shdwu|^ya;J-8UCidkfye(fFcw0ppcusdv%4Xs`#2&-3a_#;fAe#PTkq zy}%dIUKE$nzQW5-S^dK3!snYRhy~|iT)U?_febV2!Q+Rc!bgo;ja#En0^N6aL&z-= ziD5(VK`oIrFDKqtu~uc)Nh>T-~1ZS#Og*i_xSm}DD1>N5vOP_JZp!%{TtCr6B&+#e1yKr z7h))Q-lx!ZTdL*vf>efk#9%#)#IMsZ3e#bWm>&P861jyQ!5|F=X;iq)3oi}GWS%5} zn#~ru#zk%%ng&CwIAM^eiG8fmhi>iE@3SaSs@6(kmC6CV!gRE5 zW~G4=KHev@<3#oz)k~S9#I~Of`ywl&rjM(#!j_LCXA4q*N!UsS_qV8Hkwd(x@I|km z4*gaVWmP3oKcy%4ckt%fwUf3KeXNI6f;3Fxx|7*TL~U8YW#t@^c_LLJWg;$-1(2_p zgmJJpFPfW4Pc(y#U^57tq93M7U)%&!jw69(GsW?my`M%QLU*En9#vS>1X^raJP0saFO2!+YNfg0S{GUboC$*q?x zHMv59Kt{@dVGo3nu2v+&l0x8mgj8~@|vQl3rsR-kifv2UPi9!bR6h&PD>EOk`tnR}o zyD*!H@mF)(#>tJ0irr1Z82WBDZe`}4()yH>OIZ5GT_eS4CB6Qg4~ZW!FS60?S*f2W zGbwjtElOHJr0$?{yxdz3WXxOJqO%!BTfKH)rMttOjnUp9!7+-^OA(}*#pB?Azlpqx zW7Q3LjXtJa#z(FYA+0^RjeHNyAEF#x!?T05)yXXR|K9WFnLP*C({61B^4WUb(H8Bw zJ9w{+K@>=sTo4D*5P+P`#U1FAFM;+#8-3;~Shl~LNUpMSlg60t2wAdnmDsc1AB;1(*eU{m4;-N>d#==ZO4eNk$3-idi zSSewImGgha+J<&&PCAd?!o)bnDPDv{l}4XT?Ggr2gYnnS;_Y;Jb=Hn0Sr*x2uotSY z>88_b({<_E8iH%6XoJ4?dvo8|H^)ZG4gi*0C25b%+_bgE6E8A8$cpSCGs4W+L1zh` zi5$|5S=`4QMRG2(qA^voo+E|&sy6UU`?Jh=kzMG(6SiUP z11bDC=m{kWTeL^Z%($Nw0Gv`}mKyZ3Qkn!(2E(kFdsJB@&Sen~;AVqV!0NRs(&M1s zBu0c>@&X8j3yOwWVbQ3-ZQTI9gi{N>M_(%1kr7v@0I8qR0jNR+fw*;J z^z=l=xjRr|dv!()YIu)Y5ZA;WUZ;i$I+{3HIIA3xAw8(1@Jjv!1RzHTz!Qn>8Ia4C z=y}gd=>g_y%wvVojoEv@J-u!t09&t{>pJZ5wTyf}bA7*;@IfR{ulWA1 zQt(*LmQG*j^WMgjv+*NAAQfV&GLRH9JAk`K>}XS@I4)^WK#o8m2wXnGkVEsp^z8$) zYri&zD=ESTUO+ftPmHU`B(B2VUbE*cUNR6{Vx2Pqko=a#<_q@907R6y^U}fUGQq>X zvv*_c+-F~|?3c&o0|rao!R*eYu6?(1*Oo*Js$Za&5h>Y7|r|Fdu0+QrKG=(HG0|JW~ zNZN-nblC)eq~XZu8WVOJlFY0y==b5-3fgxax$4ZIln0zWT zTkk@snO$y0i9&d<^Ny*ckgWte!p8!yEP5L`3d@gZfyI;5bJ3}@)Xs~#tB>ndr(J#< zdEUFz53d}H-J?Ln*ws=4ND(sThbAk}?@rk87)5;_gxQM-0&I=NCcamI$%M$R;UGMy z8mAw3NGZ2$xsuo6Qtf?fd*wMB6TTEi&!@Odx#Pnm+0yV$&i&}=Yg;$rjHc6$ z(g3XvCxQA~9zXmSAW28AZ;Z9@+P}Z~rd~WzF3$~PF7|FytS1S(bSCJ{1Fdxu|Fo4z zfd%@ZDL+KS7DNN#m5vT2e~C`dLe75*iZu$8H|U}$YO$gNIoNo62c-h)%=pF}QXA*0 zS>1xCs2AkVz(}qU`ILzE2ei$ptxvqb9Sr;mMTZ-X;jT#Psb3`r6AaSLM8d($`F<3( zM2sw4H+FlI1WKG_&j@%MQXQpNDLyi#RoJ?1~MA0N3}uYsjYZ|9mw1 zK`3`a@YP-VZN2x^RwR(0G>*d(?U{A%+(@ZTU?;x^LK28o*lTFGQj#< value: + value = PWA.getAlignScore() + closest = w + return closest diff --git a/utility/__pycache__/__init__.cpython-34.pyc b/utility/__pycache__/__init__.cpython-34.pyc new file mode 100644 index 0000000000000000000000000000000000000000..bfd9945c5372af5a0588ca08a58293499bb380c3 GIT binary patch literal 105 zcmaFI!^`#VfL0U(5IhDEFu(|8H~?`m3y?@*UvzivR!s literal 0 HcmV?d00001 diff --git a/utility/__pycache__/__init__.cpython-35.pyc b/utility/__pycache__/__init__.cpython-35.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cef3c358872f8c176d62f519a3ebbdd2d986daca GIT binary patch literal 105 zcmWgR<>fkg=x`JR5IhDEFu(|8H~?`m3y?@*UhMA-yY2X1dl-k3@`#24nSPY0whuxf*CX!{Z=v*frJsnFAM#Q{M=Oi#H7TG z#7zCvg3OZqf>hmtqSTbkg`kf;Ro_(IEOUh=2h`Aj1KOi&=m~3PUi1CZpd2 QKczG$)edChXCP((0GuTviU0rr literal 0 HcmV?d00001 diff --git a/utility/__pycache__/file_utility.cpython-34.pyc b/utility/__pycache__/file_utility.cpython-34.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ce1b7080dc7f39dec1aecebf0a3f20937993dabe GIT binary patch literal 9476 zcmd5?+ix6cRzKBOce{P-#BrQ!Z6-5IFO&An&d$tEHiTs3%#fXo6XFb+DaIWC`X!=&rIB9&&$n>-XEh?5$Uv!)p^=>!b7a)hey7MdCC^pzPE%)|jQP0VBzb43vp~i|{9YsPEOi#iSd8DN z$U8@!^JJWl->1pDK%I+ZT#Vmm$h$^b@z9GD|Vr$bQvH70YlQ4iN!_h=M{PTvv)6R&f!a`r-ce@+m;${%^{p(9hp4V@V z&cF1W<+u{&o6V($78M@$dLHh@RcGhk13XM_I9*4%Ez|B=c+I08w+e2LoK{cSc&U0l z%QgdR-E;6(^81$Zofc1<{z`bbh0A{od^o@dkbx4&5Qu6F3NDpeNUqqy|{2rZoT%`T#1mv}S^AAY__fjY>NC3_(1fYcfeyhRl&&PlrrO z4KQ*#t*J@PJlSW`nklJSAp2}uGc7fXWS>iGW~Amk*%#89S*f{5_NBB2;J}72ldY#U zb*TYXuB0`mq~;Z}uckGprRG(#UrTGSw*s*B^=R^a*KmT41yM_@2Xc2bVvM zA`s0K1tSd0t%MhOoIZ#L>b>v9H{2fQN$bc{@hepS}k zS)<)gxf%uxi!HBxNC%U&kG~3Smq>ldZFpTw=KHr~zNA*>#3tfCqw{`AJ73ZhO#dXn zN+6>~`!%+d&=4kW_iuPx>8d}XgQ>I~ltLXaDYO7o#(a|3#h6zc1)hN?NoI^kR7OT% zCCv8~27zPQG8i=dvOzmd?Fq|thQ$PRUBJ?ZIatvez&I8DXH`#rj~^^QxUP2wUf{yY zIC|G{Y+nz0`nsdH-Cf7ddR~XQu{x*(25naXy*;&e{lsQxH*mUs;P$%5+IwAZ&*VA$ zV@-Wn(PNbztKWxRH9c$H@vig6#7;tKbU&~Hz0GFZYHjM$LH8ZrW#D*w!;L8?7^sn* zMAhLY!qaUF^9WTwCL45Z-R*|+AFwgD0<5eDoAFY<9!|Ecz)w1d`giX=xbyI?e*5Fc z5A+Z2uH1ce`|;gnef9C}$M;qr-@CK=Les-)chG?s(0gtDg?2r>Y(2%a8(?!*?K?XI zrwjLib@lL)AK2XaW$jZ-9Vg%&2hCtNSZU0v3e-3{4Kre!LdsA!iZwjS`+T{}ZDX=G2>OFSHWd{G5@JQ;Vv4u0nBvD|PU5B4fzxG>Q)fcebAEJ{+iyLqN;s)9|`eBZC zO69I z6$;-RUn>V~(%kuo2ct}T&(KUgoR(ZO2Q6zL-8`1^K$3($vu^;0QC?*hG zJk)Y2J=nQxN0YYWx0KswYZZYU#XM6iBE>R7#jo=75(>cH**StI)kFi!Frh3e3Piz{ ziUTEPBjP9)MO1vI+`wrRjhX=3vTcQ!F)G3A_6NT3&8T4k)3wdMMydbFJeJ?XwjyYGyQMVGHO)}bfZ1cHdz0mH#AVlBXN zwJ{NXXK39=ypos=-|F-|XT*;jHUH`xpfb{SVoB|&xA6KxTYCOq_^I#UrBO}=3yhK}=LXZhJq%5D6QpRDzLN z)LzLgYPuF)8da0y!%mjsPchm5V<`p*bHuv+U&&H^8w9KOxL{eT>s+uPV`)x^G}R5R z7?-8`HLknK#XJhAI-5z6pSs0QV#YFCrp}`7Gh8Q0YRm;3msH|hTdZpQW3OGtB`)Bf z@c@&>akqrwhol`8Ql}u{_4mbeL(#+|6h*ux+Wn9fZwtDIcvxHkbPpjq90Ds%j?eZ8 z#CcLN=ZBiMymm&+WSJxKeY8!`0s{PT^EBRt_N->vCc!@!`p}jzJ?Jv z98JawE-pXr#0F8!Fb)i<86I~qfwJ7L+3F*xaMT8Zx|1ACRB3Gvy4xH|JZJMC6@y-2 zd7*ws47AP-5^lE+l@VJMNRVH>Qnf{^_Xt9)=?#k!Vy;?n_$%A=qO zfCq-TGt@y&r81GcKuHpd^V1W(N5;(&-!bA`hN0}S_Th12q$zPbItMTu(Ti;2bH?ks zg=2=LBOd1i4KN|~{OopZXE)oPV%0fWe>fgV^yk$?e-O&qF`sj^2#)i47#gmnDh2x{ zrjh+$Y#KE&e&Y%zf%*}}`U8yc3n(g-*TQcdb4oGpG6tTcU19DU%`r+C9umTx!9zkx zEurL>+JK>D5D<&zL-wiWXN&Lxyk}X2V|zD_f9HpBA{(+C{aGB}4(Cl{zSl5FeUFQA zM}VM~k5UrU!ZN2T|F0O6P0%Qc^NG)K(})-b%6Di!g068(ou$9N0UOe85fII#~;HKWomW4JMicIEDxwiQ?+Z*?Bke5~>t1*4KN z0Zxh2zee#Bvlk4EX`cPwviVV*B{{OYXhs5&;ST~V893D7zQxs3w)x=(GM2*D3BOv(J77|p4ddc`3Z{EMt&tMH~VgD+jFAavvq{c zg&vX&Q7Ne_*n>_VwG-{+^dA+UTHXL>`u0(pCM4^C^y!~tBVv>Y$Mo^6XWAvh1>4aK z$ZT*x0l*8u0?wcV+XYlPO&jGDwx>QuALKreZ2sk{!?bu!jd0vbd>%8nuo7v{ z!r~xkU;B2XRT~99SK54ZtKk;UqxIzk#jbB`3QQ}Wfm8xrFloF8@b7e$Pv-Crxsf4{i$M5j0 z%-E>H8pbR6FM&uUoAt-(y1>X72eTd-NSORg;aGN~AYuE8$SFR0gO31+SX?laA2?lI zNRvUx`N5z>WrP(jqVcGNhd%OwMSs)kJB^aMh{=s2%P>^nF{w>1U^PjB$DHY5+($)A zDQoWtH=`Mmef$~91k`?pu8BroI$~Om^vtlrlxA4rGh0R}fDR`E4dwHV#EyLa2ibDV z(_&jN#C7;WfTIlq=tLR{LZ$i)vnw`!MiqNSdXWuoN??hF;HYC1ifo1y{sO}j3TKW` zc%)}WA)B5GF3|>d;d|3eB$NOYu0HA3NYgZE$m*wEjbdw=TVYYik73=$)c7oFI;!`#=#uDa9$`CzwR58gAMc#Ot%Ol51lBm;M1kem2t68;#`4;2`YccI zA+Sj}pC+Oa)WLq@OHrHx`eHnpiSsCj0Ql2(&WPQDI|zR_^WBM?50r!b_X4E&k#Fx? z!6t{p{G5fAM~9#ja%t;cwaweC!=!0ps0*Q zBR=q8$Ka(8Krw%C`0VxY%|K3xY$ky)AGbKD{s>Qhhs$R(MCFopS*znLI1Z=rO=oOn z7%^J~@mw&RwDB~c@bs&s5ElWN*fUFV2PTWBe`z)cqNy-R;@DoA$r<}zf+TDF0p=eu zaiTmW#L4>0Bjs$!7|od-`)y1F6+ev0Mv_J>Hoz4}8p~_%O1Knz_K}=vl0}vD3nFFE zoz#?BeQt=`ALYe_;!tLUA*cD3vy^dWi$gU@T*GA@@H1y7)h#Z@{bF=h0>vbKkn#hG zX#Zc(IpIkKJb7CSuf7lVrg`O;4Qrg?Z_|<#HKVF&c6#;zLNTM7Y3>XxFK$uOm_4fT zw->jCuQ8iFTm1^}?p-c!bHOP$^&uChxOl{c!NnREIu~s&JTAIi^toX8s^8<{_qiBS zD^C&%uubL|+vgQ)S}mW~&c0SJ*JtW8wOU@Q&(&-7V!eQx0`5iJr|M-nr8TZFN!9@S zRUm27#OJ0KzIQTxc)=GwAKwy8)0hU~NIV6&9PU{$O$QYon$QmkGX5_B@yj!3-N)Af zbjtgCzEZqvPF6-`GY*j@B8>x&r`#yUU`D>%CYd&5q$KSWaoVZB7dyQL^fmI|b$fym zqlTyEZLhzHuc?82-!RHtwTkZ=3}0g^d3zLRuGupfg>@XkRGA-(e#^zw>_wwe{9hP^ oB`0w>nz#{*{U*CW|8*24B$Tp$IebpT75|%+`z&(b$-n%616V;tKmY&$ literal 0 HcmV?d00001 diff --git a/utility/__pycache__/file_utility.cpython-35.pyc b/utility/__pycache__/file_utility.cpython-35.pyc new file mode 100644 index 0000000000000000000000000000000000000000..984bf0bf9be89dfb42b29dd7a76146379aee386e GIT binary patch literal 9423 zcmd5?S#KOyc0N_TH=B!E3q?t$YHZ7scC4m79(%?bMX@Z(jxribBhn*J#X_OjRZSLI zz3^7GMRv#_00-mbB|)~AJOs!9K|C)(kYHZ&93U@2en7nrfC)_Mb$R(eOkT)n=1`1 z%0KS)eB2A`?%sn(sLX7+T~~Q6%jw#9%%L5(3T{u_R!=#2C;(j1kC`JkPg~#Jj3 zwX~&74&*+^k7gyEJeGzglT>90&ZV?vR$3OxxtzAlNy`#B zSJIYwX}LG18X0w0wh{SJRgB(()QPucs{+q~!)VZ=@~Q zTOMfoW;FA$=ewVVp6`VREwEXBeAjW^!xhY-2u1fqg$Tp+wc#r~&KSf4jo#*t+X{`A za_!J{noS6_&{tkJL?s`%e*5R7W-!moD?msxXr+xAtmL8DKZwef<#oN#vaa#6ygq5e z3uF6hrX617@oueE;4b*_4y(9iyT8QdvE3bw4v7u{6jXFN*0s-dIx5gnkwTrq9EEv0 zEU-lJ2aAHv&@v7003{|g#7m;167By3g+EMt zh>O`Hk4rI&w(>j!Pm;_SkEo1|z)G0!YYYOTvSl!6?yNz3H?*fL%6XO!v~&?mAL}re zHDGXd_@7lH`E5K}eRRv{4E)f8IdP4y>pFoE_KZ!}XnXsvll8m>D`R)i2Icd+euU%ZDM$#ZDSsxD!^oeu48!J;l)qb>e?YzHp1QC->LxKe@Ycf7MvuxV!OSedEEs^_QA4tab+- zn0uqwHePDChS%*cFzpuDoHYmT-oWj`PheeRxEzEIcRuU=>`KQCdB*7tKxLBAS=y5fIcoZzj(&HfJY<8iA`I zCsbiyrCPQo^_4#+{>(hhQQ*tk+y5j^af#n5Xz>afhWR;ofZ|J{wwPO3)1QhBhV_NPSjBZDks4mW ztsds$raXlj?ZEQF%0azhUtoV7b}xmsR_cj(b`As1^D4iIf(ha0ko5 z%**PFSndzR@-J<{?u+$r>+~l&?G@Q|MOyO(&)(40RooQM+{i_G^X(|-Is1{e-_X<> z{DvKtVs^i*-e!QbdY%5A-@>!_rV0(;8RuB+`x5xFw41m6Ud#4_JE)|iC+K3(uk&=@ zL6OjPb3_*`9}@*<#qNtEpO`>!@mSMSda!fPiDn!(XeqDH)+z!yig~73M2cmEhBvw1 zL;={ldsBE)H}QgHm{1lK1fpO|#f1{H5pflZA}TyrUg$OoW=#NXJB~td7!_f5`-4FE zX4bHP)&d;Y8|C5mM%I1ID~Z_%>`vcz$Nb2&`Bx248EJd5r1sRe z@c2?&dGTNPsatqxloEkSSgkIjNuaLY;eiFe_uN%cA?PFSgP6r&Sz}A9_qbcNHBc-& zgy>UJ%iZ{;8O5D4KYLf{3?B``>59+kx&O z5*Akg-9uy!hrmjs^PwH_H%}_&{8-agpIs6&S>iza5bw&gh`4@l8*wj!?UM8_vhC++ zF^v1b1EE`pfK}wSh#Nf7WSdi=#p8V9g{o^&g*`p&8;on}koZ`_^ov8fhDu_%>tb-k z^}y{U)_KHxcpI_6Z(#(^L^H91i_4EYu|X6w+(qS72NNpW>sqZof(ln{A*ege!9HY`} zQ6-DQ5mzB?VAkX4C}9wLnIJYs`tCubeIeX}7vaI;oTGEuN-1M@@h+&NAia(lsHnY; zat{9F%9KYrF>)fhVg3oU{|FU88&L%y1%U772}<^59PgyiPJ$VUWPq(j_7Lcf$ft{= zIPjd~;OFdK03(#(7-wDp9vJ4%PzO1T%0%)2B}pvKSEqW9jGJS=W6ZgXLfI4VN5_e= zSBcv(IDp}ZUSt!WGu|?695ZYK@i-r7fC*{my4Q8w{cL-RRp)g5S;3n`e{LlDgHXQjpK`xp_h>Q*SH4ZnTDDaE+U7AB?FtQ8+V$poaKGpne5nh1zEQ@e#@5S-&#ZjEdhD@hF zi{snTylKpL1B289E+n2E4{G@+B|$ALbGq{XiZR&)jeT<}>+%t};BPD!#u=(fBOgUCw!99O_kRIY1pY zk@yg-mvQoZUlv(|64o zbW`5G<=CMuVpmr%IiIeAM&7Jsynt_#{)Nx1UN|u4c##LI)(_%j$Lzkn=fX(L@>dujuHkOP5)6k`go z_z`X=_97z0Uj7m<1q!53?j-GyF_U+Z?c_cl;+@3XV0Px1m;2XQ%~qd*vGEB@v`HteGdRB%Y0rm+LD;_e-AJo8@_fE@_z20{SKmev6TqhCY<oQ6iJ=g#Gpz_CC40dE zDv7O_$YC(_766!GyWFSrE1$4r$l7M zTp3f?EDaH9Dq%0sVPOhkY*b+t2K#)okq6>O#KgDGxC^CJ8BE0hJO{Ty8rja;6xEmJ)+tT3e+R`{rvQ3{~L z*+4`2oFg$L=K(;r+;X(k77TG4z7gPv!vH#!hP+Uzeh0Iw5(=}5y&}!X7B?lZ$U<<{ z2?|9vBMScx!xIYUrzo82nNi55r-DngfxY;_91{s80EO#MyEW1@4H~lkC2y^svWBke{Uf?0x|&1yj-c(zIKn48XK*WG6bpehE?{h9 zc{V~%CZw_Y>@IztBRCP*B%D(d(Fp2bKk=a`P6d5AnbgF<TghX?vH%ZowT4e>d~t ziMtP#i~aXPr23I}@7v)vhs9ja!prF)=!6_w_m+_k;U@{X`2;lTJ)H>3M8n`?Tl_B5 z{W=N=<=GX1ofs;p8Nz8G4CMVt+XLR%)M{&RRC*67a^9t6L=&4Mb`%oJZ`wg(dN>Gs zS-H)*;r+h4^Uu5zFI%MH&2h<0#(!f5C>@D7u#a(3{<-b%Mx}Na_-!F+-}Zcv9Tnx6 z;xvk8Ipt(jN*ENCkZ{C@9_$#r3;-zR4-TQd9=;vOX_3t&5ThBUvLB8{mo~jn!x0k$5Tg z>|?ppB%3N}7(~pVJ83Dg`rH<`KhBG?y2!j3G2}cya)cIVxcICn32eA5tbTTeE4nO^ zVvhsv7(w~?G0={ zexqhFdsO3ZFkTB^Wwv{c`X=x5eJ(!af>Uqmx4GbRjrt)MKjz|dE;u|KsvqbNXXtdw%@x{6A`v42gg<#O8PH|nMOe0{!F%W3s<^;*49&!Z)edja>^ zdPzn#t}{vY0Q*%SX~x25rxw0@GJSZ!m%aes6fDb}1K~(Og}7YqS+Oh^4L)9>9};E! zKLFx~=k8{JuLS6n3l0Lc$2*jypm!yzBS3@i8@&&l_fmo7VAJgHdbFCQnb|#5FqvLmf%i}((^yWmX}MI@23e>y&);fnt)$bA9%Zv4|1a{mn}S2lzI literal 0 HcmV?d00001 diff --git a/utility/__pycache__/file_utility.cpython-36.pyc b/utility/__pycache__/file_utility.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ac311d4276c1b280e722262969440cd8af688538 GIT binary patch literal 8702 zcmcgxOOM>xb>_3){ZOm*=pK!R9?!_GiB&z5aTHtbDAwCD+92 z>PyzBnqdQmONhsdL68Inj4ZOt!y9kB2oQK_AiMkkZnKI0f)F72&LxZW=+*=Qf>eQ* zhxf&M&pG!y4}Q3`RQuW1KmXXetSJAk%=`?{-^CUF76nsmg{ij6G+P6$Gu<|rVVlgf zORU7oe%UVba<|g0vdUOh6+;^P8}hznFX3MGmwPMrO5S(XJ|o-K>^0nL z{@LETy)N78{<+?H`@EzZ{)OH}`=X?m{EgnGy(#Hs|Bc=y`;w~MSJ(<$eW9>b{^nS> zFSGI!rFkYkb9X;<`+i)I`%yFq zuWxPn{-8BG|JF;eRoRL4R%@%NrpDtS@NqYv@WTg>K-Bkmp9`<;u)d2&9rd_Xar=n3 z1Htf63J0zTd7CDh{0gc42Wq$ii7Aj64nme?}JS804ll^mwAHS{^d)?}YL=(B8H(hbn(*m+4Wfxf^lN_rXe z2HTYM3g|c3B}uP>ev@66^cm1^F-y{GpufW2mh@TBUuEw|dL8t;?24q%F#{6*T3Wy7 z`TXO^^S$V(t>)aIyN2sFuCRe3f)Y#hR4_=}iQlAA)-W4p1-o6|j;yxeZp2xu1s0nF z;q@aBM#%lnuM|OgSjwIeU+Z$&(ew(@DWI(UbZ zKePr%X~l88z85*pC0Z7(Pf_Fbnf13O9bS?&ciJX(!EYH?4VTXOVwvyniZz$bLQlnW8Z;fsxMz=avF#g(cJ*g+vgOd1ZVkGqq zjKIi$ppn@#M(5Pb3_pE_S)M6T4eMznjc$cuHL5cDNm9<|jgFUOeW_b#%D^j{EiNfj^F!NXgwltt9OYwi!to--% z(Va)vt=`a&Je)#q^*Lvu6$REVw>sX4v!ds998R}~N=To10-6IZ@ej2Ls@ab;`NS;ndH7gOgHQ^%j?JI?h{56=ws)AUd#TX=IdudLrDY%LaWolus(zw3c<4)emuF07~ zFXkcWqHAhUFT098gbuZ)>VaJkhS6Xc$wj56R0@f4SwWbYZcH@DAx`2U=)o)zN8dto z_&y4yzOJsT6}6`YYs-muH%P1T8Z)zpAhWdBV@;0t3YW(g!EINnl)@$c)>ohlh zgn)#F^c-G8@iVeRWYvIP36S6pP~jL}3E8xbv@w9Tqy3e3NG3;1)z(WgFilOoft#R- zntH0W-cNOpjZ$^gRK+{gLN-GXD__;NN%-xcH@NXVJWKE|De(uh+{nyNu9_st*0vwC zT|c}5VlsMe8)Z!Ogr=jn$!%Q9wh_@Lv`8Ck*a}SF7kaUH+JNSP6# zh#+nYVmB(jPV`k2I2QbHK{dqhq2U3pd?TqTwGCXTa-2uP11^Zssrg)Z5pSAyL+Yl> zn1F+nmVlvyVJO*TH}C?-W6r?!L}*v&38)#$cTQL-74pmp7Tu#Y(ApL4JQOXn>l5`l z(R+@0w9nf#e1u|8Ifg-Hr45B?2b$O(JP;gIJ#bbAAz&p`bmLW{3q?Wua0 z@&8cJk$k0Yz5Fly#JBL!Eazg74x_k;Dygqxiw2s0@SKa(3iWrHr^_b6H2?pyiOK{m|5x%6 ze}GL00xu~a@rOi_a54#1q=dw`sbXF};yTrRhl+I+IF#E&lT5@PQ6cfA02y%sb)Vom zjr()Bdy@OPMQhWcpdZ;oLKlginV8IA<`;TYqFzw?(ie1;R)J9E<4RIWDi<*3@O$vR%B+lDC3tr5ydrAT8Cs(% zi2)v&I{(b70B0jd_S#NUwX0+-fh&NtNeDphHSLDbIVAQ;d~D#D1J?I)FrRV|U@j5w zJ@ka%Q_qf`w3b;X0w4){eVUMFU&QZ7xL)6B4`7eE*n>ZG8h*bdjHMN~_lNxhGRAl? zqabqq*t#u2(V|-EugMw7u_EWm3oW}svwv_cbZpb<2n2>TK`XIMfUYwTUjZ%7Q^m3e zpTHURab!876T7y@BPYjPVuo~h>}s*(%!pM5MHzPOGnk#-Z0~v)19H<~Y5&qQ#hWI~IY3*uhCOG|-2pfQ3EA{aUG@GZ0%83H%p_ceg|-ArJXU0Sp^P3z zf)FZmED|Vpjtc*DDry&en!)TDP@npEC)%fe_DoX__ZC@ya&;xwGY9+|mJ9#ewcx0d zuMcUsmTyam6$Yt zVx-f$a8iDP5h0zylA@{cH&5VyrbGqLPCG8K_Fijl@g!^M@~nc8i?7h)v!$MXoTs%? z7v&)y)6%j-Tue_>(`x1Pbc#H343{&Hiel}*kq+Q=x`~(cRrD1%sh9_3czqN{NU!fN z&^$qn{|zUK37V!f0$C8V3~M|Tp@*Pi&kuHyqzFVLKEONB4f#ApkcXbhgg_xJ=OKbz zAx70joe-MNYNMLQ#GK z&jz_p6Tdyt5?vlBJ$^LRSQ&;|kDh*PwSzta~H?a38JCBOfPwG z$N52)3s_iWx+3lVbRj>d1y@igWkhi5rW(KTCChHpaiJeEw8%ygGf3JLFfqg%f4O$7 zNl?XkA#45&sPaSQBjrc9!`;<);&N-?wGVurYR`A!a~jB?_0v*bXRu*!fZ9qYk8M-) zGuIy?V(l#MrXpi%r0#x!B}kZ25QNU>(^fd=^+$Bkq$KBA#NvDA*F^@{4E-#^c8J>4U-N9|K?5$xrKT7fKxKokG0We0(0}} z+A#$iI3G->P!!p>bp#MvZkmXf#!jj}kIiA!x%#bCtu+mbsu)M$MC%p5k0LeOeh~7e zA=eHSgCUXreJFOQ&uOasCK~<@SEd<=XI|zhDf2wm z;R7OXD0w^+jgwN5LdnxP@1}K?u`+|}@XAaiXuLsgcqre3%+oyNN^t*2%r`lR&DnKJ zQ4kMij$jd4r>e-A3E@A*6q1c;l@1NPDSV9vREt^ve)3+Zz#PAd9)%9jw#mPi{IWn~ zIzT}tE{T)4oA9^I+|Si^AYg+4AuYqgdvJHt5{LnER%W>G4tTR9i1W5d;)%tNK?@28 z#HUpJDGG3obT~C#A>5+{)}<>_tl6gp2U_$ibj{BdBI`M5Sm;@(0d1>L1In#r+agmC zL3JswLXj^y87TD)r9*W{F(Xi*Xt=a<>ax{r^@iM^c#L@kG0(1Hl}LUOR8F>5B3^TG zVsBCe)4lx~!}Gn>7WTH#v)CI!M-`V883k%bl!B59t-@P`m;fIVr zOM;-lcyV3Jj%;-T;lx}7zd@J$+-a0mQ{&B9{hjYzC==yBi0gEY!h0wXjA^6o3GV?W ztxxoyE64h=A*BGNo`L{*Q6JvJC;Y%lWijp8vYwb&TkFa}Qjcq%~hR zta+ZkFv{$mlhz!k7qB6C@1zaKIUKrv)*_ZLd)lBZzSl-xe?MU2U0UKdsklJ}<&gxj zO;B(k?o;uIiXT!z{(-Qm_%kYYsbEx)Qi=f;hg3`rv!v*#$>4=@<{{-;@U1}C)U_*( za$}{jQfX-F@|i}XVKxlZ7`U6bFEz??RP!y`k~_qDh2z+D2Olol_;!Z<(F48}g-B%M zVHq1o9wx%Ysb|%3I4bsW*Fg&UZs3r@w+4hS3 z^q1$Ekdu*#my{AOgQLtY$;69(%L}A(*bPvvoBm)QALJsLRkzDj^#s{;Qcim*fBGoP ztP+aDcDFsuR|cJyvd_|T`=@a;t!!r^zCorbBwx-z9;NsrifiD?{tbDjk23gc4gJ3X DzO^y< literal 0 HcmV?d00001 diff --git a/utility/__pycache__/file_utility.cpython-37.pyc b/utility/__pycache__/file_utility.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..bded5bf6b34fbc29f3a465a7ff27ad7af2ef0d74 GIT binary patch literal 8681 zcmcgx+i%?1dFOTRx>%Mk(t5qjX4lDVy%f59lw{=P#FXGW5@C{T0; zJp6cW{LXj(KHi?2t10;XV(lM)VqH~~f2W7>&j9c}T+u(HVT!FV)mE8iYk+m8+Xgdi zlbLpjl~_3_+hwo3TNzbZh3-ZBqJ$g4rS4_>vV`Y?mF}v&D&hIyjqVluimE(N*aBO8p|C~o z%_H5u%F0ib=DGOXy`9MId5rlz>sGYwa^Kb;Jb4tGAGF?Yy&r$am3`ma^7x(?cn2+y z4Xlt`Ua_i`&)D&T?oJqp`1(%lx0n}See~#wb=`X8cRcHn+uI(v+n)7=hixy4{N8rF zvLi%4y0Nwv1pU_Z{%bEmR%IjBTdlRGni@~SFu>h>>g_*#44}U4^*rvk9oBR4sG}dZ zDsCTn?T|A(l%l@NBd<*xjeiB#{sTRp0*)!*82I)AT=R@=h3U-b=*(oL7wVB}o1VeS ztnxwuTmoEWH3^pi*I7ft6*kA_vA#;{JF28Gt*v3sIkqI{)B&Go%Mxw?zQ8U@cnJWdCP6Uwfd zD2bYY!(Zs1Xou!uNoYbBMq(b}piE1YBVAR5#&kNiFpreO^6+y}8kG|BSiP%!8Xs1K zGAbv~TCA@g))FHcG9xMPmHE!Ll9ZXTt|ZX)6=hUOYNXryw}-bAV^lp>u>QltdQwlS zdnNv}#7OEZSb>#)Lo2gqtj?jS9e(x#o?UnFOBM~ zoXoL`JQbi7Na~TgZ5*q#qL@?7=QKuhcpqjRpEavi%$h$vYd-}>%4i``Sp5=qKGq;8 z4G79y{GT-||7||L`}l^{9R$LM7J63C^H^kw(Ax5>jz9ERG4lqL)9s=Y+-E+AWQF|T z#+k$3P!k2fD%5AJQ;`{>r@y}Q=a&0CufpKd<9^YoQ!#kJm`3p;6r z9qW~PBYw;M4BNt{PP!v+f8h0CVDVlnz8VQelV3MJx7PIpVJuo=C^njlyaE+Um!ag$ zkw=+uhgWGxj|5Nk!0V+3>2qp`eb!A^@Or*UY!IiJqYN=CXMpC_Wg7YG@HxNOhOur&o$aiB~r>_uAF zbGx3CPqJ%rr^t`J2yihqHE5Py#Tg=pMpJdqu7?BB9|(z5YD%GyHZE((W~M7+337;% zcnG_2OGMGPFdV&)MyW5W%W6d}X^ZN-Y5=OJi`qQe2L7vBNv)y(9gMt<>zW$Bdml*W z=DPLT{o6Y2jUQP+y68Fj5X~>h50O`c^-7@WcVQI{5tWdiOLX~U2z8{v1}V%qh7FV^ z-`psC18r5tb)L3tp@GR#KD`Dbs2(E387V?d@QO>=_;b8sMcLMlVQ4$rue5z~Ia;c= zUXp`pYWxk{IBnF_Q?2!Ws{3r1s>7zr-=Pum8JtM@sCq|Nt|r02VvU{qMHE5t4|%{DQ!n@lgGG{9YfF|G@%`7I0{_f7kYs_9YFJ0T4!F= z=6;`yPgCb5?7)fAoLmn&zE1FUG*Am~|Ac7p-^0K|T=_v#Q%W1CQ1zgXWDh(}gig)p z+!tQcv>Q@3UB);9q_hMZ+8;!cOm+h=aD3+UU7tsGg`QwFBl*rTDWyQ3J4K@VgahHO zXcr-9nO`4^*Qwbv#G}LBq2)s~+sYvvDg(Efwx{v+;U*jc_2|bMoZaO^5OM?un`pbn zkvcMu;b!0vuRc?H?+w46&o#bu?hl?p&PFA2c=Vn`?P!puQvBVC0iJRkQm-So+Yh`c zQ-9Jzzx*%!QguJ0|2`*`e5I~6%Q^97n8+`qS4t^g0|aFcpL;wtqdsCLuuR$!XX=Ig zn>4M~9&q9VLe3N1PnS-c#2;cwL~)8zGNAcO(ETO#vKn8VmhjWd&gMvhZT|l`BFbb? z{tvjq{{Z-NvR3>LsUfcLZ=-2eBwzS}i_k-m6WDPCxQ}WDuo|{VM>ltcWjRHPk~x+FL}CFDmGRD!|&BU`=WG zq0o2r#Mp(M-8B=F8O;1b7bTj7__Oo{sm&s+QTecvl#-jAs1orJeW|n0tO`SH6vs2(yAySD8KC%3Oe3>n{Y+8}R+<(|UzeUM-ZUo89>$_2G>X zkMv^&;)PNMW*7<5r7Va@Al%XxRN}uo(?S36QY%(TD@F=~10y%|xq=K1 zQ_=UHU8EqgP+MyL9VkjCDz_+&EQRJeoFf; zqLFx42=5Hbvo(3rE5%Ui?-6a0>2I+;ZB5w9?MZ=rivQ-bz==}ozmRd>WV>-N^i|B| zx2T!5#R%>wU68@u&oMkU82=ke6dN>6Y5%iaWghN#z#|{I!*&pEp%xKx!9Tz|Fb!op zg;OA&@r3L_TFx^8i6IzBnM6hn(!S@oWHm&oG*nAp)hh7QvGqSOte_lGt%#b)sga|A zhJi7=6g$ge6O|z#V8$^<#^HIQCA!olJ$^h9R+(*Dk2gQI+F=iQuqcM|40_8CyE4}~ zttQ3P0y$zU83@T%3xH(nkWjzmjo77L<;RL7o9rXPg*Oz0g{?K8ra3x#$DmJ%~MMO=*t`_Hq(~$Gc zA3LEK*z@#~hj*PHW-0#(B+~`y@+XM=k`S(-QOZcv)KxWp<4e$Ple*B42uh&=ojD?6 z5Vt8WVn{3geCbe=_7wU;<&tMGC_hp@Qhtm(qFQ|;F1PxAdoS=(?fDjhOaq0oURuig z3^wTY(Oc={DQs$f<^}^Krk#_gsmRnB6}rD9JV*o;?qCXk2oRF+2)9#MNUi~kzs8{CzM{?IFrHA*Z$?`v7|gQ- zXvwjDs0~*Wc$!a_4v|y9FCm46$D*8K8F_-1dnKZ!v5~6JV{;%n*T0>rwWdMI6Z4QC z5x)HQ(WGWO2qUj)NZcV}@F6mwkN5`7IZKq^#K3QGWs-rEy++31!n9-*MT;4_l9EZ8 zpekmr^LegB3HL~&^57BCcq-{8p(>2-Y=kyaj+H69j)2TW9menHj)%%BC@{^MTxs0@ zmaxx^d)%DHTe5;^Fr|V;0-dTnCnlJ`i7g}<(<&(qy(xN)1XPP%e>;9JL|}$q#f(A* z2s;X`CA};Fmo<{_@O0R|gn5YjT7yN^gWEdl7GG-XCRZr^K` zI8ojX!GRS04pVczLIOQw8cxhC z#DI=fs27z`GSNPUD-aqTD@$;hMrkh7SCkI*p^}UoK_+c#$5#B(`T&YSPOx9LtmF@v%|vjQ$NP@}rk< zbQH(mxeqFitCz1^*1Sj`5@os0No$VN4cP#+chZLA><`=^8{zc5Fl|t=-fyFPzY{Y4 zE}{4>YHm_PB_vK{K=10^}e86pL9BN!@+SHIx@{pQ7H511yAyPG&yijR8 zqB09U3Fw--bgfZtEHoA>4NaXt*Jw1%hJhXfcN6!yMp>?EzC}lJ2f&v*j$L=~eWHy| zWH=u^;FC~b7QxUzpk M-s#&5{#rx-Zwq%p0RR91 literal 0 HcmV?d00001 diff --git a/utility/featurizer.py b/utility/featurizer.py new file mode 100644 index 0000000..e94c232 --- /dev/null +++ b/utility/featurizer.py @@ -0,0 +1,19 @@ +__author__ = "Ehsaneddin Asgari" +__license__ = "GPL" +__version__ = "1.0.0" +__maintainer__ = "Ehsaneddin Asgari" +__email__ = "asgari@berkeley.edu or ehsaneddin.asgari@helmholtz-hzi.de" +__project__ = "LLP - Life Language Processing" +__website__ = "https://llp.berkeley.edu/" + + +from sklearn.feature_extraction.text import TfidfVectorizer + +class TextFeature(object): + ''' + This class is to create feature matrix + ''' + def __init__(self, corpus, analyzer='word', ngram=(1,1), idf=False, norm=None, binary=False): + tfm = TfidfVectorizer(use_idf=idf, analyzer=analyzer, tokenizer=str.split, ngram_range=ngram, norm=norm, stop_words=[], lowercase=False, binary=binary) + self.tf_vec = tfm.fit_transform(corpus) + self.feature_names = tfm.get_feature_names() diff --git a/utility/visualization_utility.py b/utility/visualization_utility.py new file mode 100644 index 0000000..ff4f1c0 --- /dev/null +++ b/utility/visualization_utility.py @@ -0,0 +1,130 @@ +__author__ = "Ehsaneddin Asgari" +__license__ = "GPL" +__version__ = "1.0.0" +__maintainer__ = "Ehsaneddin Asgari" +__email__ = "asgari@berkeley.edu or ehsaneddin.asgari@helmholtz-hzi.de" +__project__ = "LLP - Life Language Processing" +__website__ = "https://llp.berkeley.edu/" + + +import matplotlib +import matplotlib.pyplot as plt +import seaborn as sns; sns.set() +import sys +sys.path.append('../') +from sklearn.decomposition import PCA +from sklearn.manifold import TSNE +from utility.file_utility import FileUtility +from utility.visualization_utility import plot_scatter +import numpy as np +import matplotlib +import matplotlib.pyplot as plt + +global color_schemes +color_schemes=[['green','blue','red','gold', 'cyan'], ['#ff0505', '#f2a041', '#cdff05', '#04d9cb', '#45a8ff', '#8503a6', '#590202', '#734d02', '#4ab304', '#025359', '#0454cc', '#ff45da', '#993829', '#ffda45', '#1c661c', '#05cdff', '#1c2f66', '#731f57', '#b24a04', '#778003', '#0e3322', '#024566', '#0404d9', '#e5057d', '#66391c', '#31330e', '#3ee697', '#2d7da6', '#20024d', '#33011c']+list(({'aliceblue': '#F0F8FF','antiquewhite': '#FAEBD7','aqua': '#00FFFF','aquamarine': '#7FFFD4','azure': '#F0FFFF','beige': '#F5F5DC','bisque': '#FFE4C4','black': '#000000','blanchedalmond': '#FFEBCD','blue': '#0000FF','blueviolet': '#8A2BE2','brown': '#A52A2A','burlywood': '#DEB887','cadetblue': '#5F9EA0','chartreuse': '#7FFF00','chocolate': '#D2691E','coral': '#FF7F50','cornflowerblue': '#6495ED','cornsilk': '#FFF8DC','crimson': '#DC143C','cyan': '#00FFFF','darkblue': '#00008B','darkcyan': '#008B8B','darkgoldenrod': '#B8860B','darkgray': '#A9A9A9','darkgreen': '#006400','darkkhaki': '#BDB76B','darkmagenta': '#8B008B','darkolivegreen': '#556B2F','darkorange': '#FF8C00','darkorchid': '#9932CC','darkred': '#8B0000','darksalmon': '#E9967A','darkseagreen': '#8FBC8F','darkslateblue': '#483D8B','darkslategray': '#2F4F4F','darkturquoise': '#00CED1','darkviolet': '#9400D3','deeppink': '#FF1493','deepskyblue': '#00BFFF','dimgray': '#696969','dodgerblue': '#1E90FF','firebrick': '#B22222','floralwhite': '#FFFAF0','forestgreen': '#228B22','fuchsia': '#FF00FF','gainsboro': '#DCDCDC','ghostwhite': '#F8F8FF','gold': '#FFD700','goldenrod': '#DAA520','gray': '#808080','green': '#008000','greenyellow': '#ADFF2F','honeydew': '#F0FFF0','hotpink': '#FF69B4','indianred': '#CD5C5C','indigo': '#4B0082','ivory': '#FFFFF0','khaki': '#F0E68C','lavender': '#E6E6FA','lavenderblush': '#FFF0F5','lawngreen': '#7CFC00','lemonchiffon': '#FFFACD','lightblue': '#ADD8E6','lightcoral': '#F08080','lightcyan': '#E0FFFF','lightgoldenrodyellow': '#FAFAD2','lightgreen': '#90EE90','lightgray': '#D3D3D3','lightpink': '#FFB6C1','lightsalmon': '#FFA07A','lightseagreen': '#20B2AA','lightskyblue': '#87CEFA','lightslategray': '#778899','lightsteelblue': '#B0C4DE','lightyellow': '#FFFFE0','lime': '#00FF00','limegreen': '#32CD32','linen': '#FAF0E6','magenta': '#FF00FF','maroon': '#800000','mediumaquamarine': '#66CDAA','mediumblue': '#0000CD','mediumorchid': '#BA55D3','mediumpurple': '#9370DB','mediumseagreen': '#3CB371','mediumslateblue': '#7B68EE','mediumspringgreen': '#00FA9A','mediumturquoise': '#48D1CC','mediumvioletred': '#C71585','midnightblue': '#191970','mintcream': '#F5FFFA','mistyrose': '#FFE4E1','moccasin': '#FFE4B5','navajowhite': '#FFDEAD','navy': '#000080','oldlace': '#FDF5E6','olive': '#808000','olivedrab': '#6B8E23','orange': '#FFA500','orangered': '#FF4500','orchid': '#DA70D6','palegoldenrod': '#EEE8AA','palegreen': '#98FB98','paleturquoise': '#AFEEEE','palevioletred': '#DB7093','papayawhip': '#FFEFD5','peachpuff': '#FFDAB9','peru': '#CD853F','pink': '#FFC0CB','plum': '#DDA0DD','powderblue': '#B0E0E6','purple': '#800080','red': '#FF0000','rosybrown': '#BC8F8F','royalblue': '#4169E1','saddlebrown': '#8B4513','salmon': '#FA8072','sandybrown': '#FAA460','seagreen': '#2E8B57','seashell': '#FFF5EE','sienna': '#A0522D','silver': '#C0C0C0','skyblue': '#87CEEB','slateblue': '#6A5ACD','slategray': '#708090','snow': '#FFFAFA','springgreen': '#00FF7F','steelblue': '#4682B4','tan': '#D2B48C','teal': '#008080','thistle': '#D8BFD8','tomato': '#FF6347','turquoise': '#40E0D0','violet': '#EE82EE','wheat': '#F5DEB3','white': '#FFFFFF','whitesmoke': '#F5F5F5','yellow': '#FFFF00','yellowgreen': '#9ACD32'}).keys()),['#ff0505', '#f2a041', '#cdff05', '#04d9cb', '#45a8ff', '#8503a6', '#590202', '#734d02', '#4ab304', '#025359', '#0454cc', '#ff45da', '#993829', '#ffda45', '#1c661c', '#05cdff', '#1c2f66', '#731f57', '#b24a04', '#778003', '#0e3322', '#024566', '#0404d9', '#e5057d', '#66391c', '#31330e', '#3ee697', '#2d7da6', '#20024d', '#33011c']] + +def create_mat_plot(mat, axis_names, title, filename, xlab, ylab, cmap='inferno', filetype='pdf', rx=0, ry=0, font_s=10, annot=True): + ''' + :param mat: divergence matrix + :param axis_names: axis_names + :param title + :param filename: where to be saved + :return: + ''' + plt.rc('text', usetex=True) + if len(axis_names)==0: + ax = sns.heatmap(mat,annot=annot, cmap=cmap,fmt="d") + else: + # removed fmt="d", + ax = sns.heatmap(mat,annot=annot, yticklabels=axis_names, xticklabels=axis_names, cmap=cmap) + plt.title(title) + params = { + 'legend.fontsize': font_s, + 'xtick.labelsize': font_s, + 'ytick.labelsize': font_s, + 'text.usetex': True, + } + matplotlib.rcParams['mathtext.fontset'] = 'stix' + matplotlib.rcParams['font.family'] = 'STIXGeneral' + matplotlib.rcParams['mathtext.fontset'] = 'custom' + matplotlib.rcParams['mathtext.rm'] = 'Bitstream Vera Sans' + matplotlib.rcParams['mathtext.it'] = 'Bitstream Vera Sans:italic' + matplotlib.rcParams['mathtext.bf'] = 'Bitstream Vera Sans:bold' + plt.xlabel(xlab) + plt.ylabel(ylab) + plt.xticks(rotation=rx) + plt.yticks(rotation=ry) + plt.rcParams.update(params) + plt.tight_layout() + plt.savefig(filename + '.'+filetype) + plt.show() + plt.clf() + +def plot_scatter(ax, X, Y, x_label, y_label, title,legend_hide=True, legend_loc=4, label_dict=False, legend_size=7, legend_col=1, color_schemes_idx=1): + plt.rc('text', usetex=True) + global color_schemes + + target=list(set(Y)) + target.sort() + color_idx=[target.index(x) for x in Y] + color_list=color_schemes[color_schemes_idx] + + for current_color in range(len(target)): + color=color_list + current_idxs=[idx for idx,v in enumerate(color_idx) if v==current_color] + if label_dict: + ax.scatter(X[current_idxs, 0], X[current_idxs, 1], c=color[current_color], label=label_dict[target[current_color]], cmap='viridis', alpha=0.4, edgecolors=None) + else: + ax.scatter(X[current_idxs, 0], X[current_idxs, 1], c=color[current_color], label=target[current_color], cmap='viridis', alpha=0.4, edgecolors=None) + plt.xlabel(x_label) + plt.ylabel(y_label) + plt.xticks([]) + plt.yticks([]) + ax.set_title(title) + if not legend_hide: + ax.legend(loc=legend_loc, bbox_to_anchor=(0.5, -0.1), prop={'size': legend_size},ncol=legend_col, edgecolor='black', facecolor='white', frameon=True) + matplotlib.rcParams['mathtext.fontset'] = 'stix' + matplotlib.rcParams['font.family'] = 'STIXGeneral' + matplotlib.rcParams['mathtext.fontset'] = 'custom' + matplotlib.rcParams['mathtext.rm'] = 'Bitstream Vera Sans' + matplotlib.rcParams['mathtext.it'] = 'Bitstream Vera Sans:italic' + matplotlib.rcParams['mathtext.bf'] = 'Bitstream Vera Sans:bold' + matplotlib.rcParams["axes.edgecolor"] = "black" + matplotlib.rcParams["axes.linewidth"] = 0.6 + rect = ax.patch + rect.set_facecolor('white') + + + +def plot_pca_tsne_nn(X_pca, X_tsne, X_tsne_NN, Y, filename=False, legend_size=7, label_dic=False, color_idx=2, loc=[(4,1),(4,1),(4,1)]): + myplot=figure(figsize=(12, 12)) + ax=subplot(221) + plot_scatter(ax, X_pca, Y, 'PCA_1', 'PCA_0', '(i) PCA over 6-mer representations',legend_hide=True, legend_loc=loc[0][0], legend_col=loc[0][1], legend_size=legend_size, label_dict=label_dic, color_schemes_idx=color_idx) + ax=subplot(222) + plot_scatter(ax, X_tsne, Y, 't-SNE_1', 't-SNE_0', '(ii) t-SNE over 6-mer representations',legend_hide=False, legend_loc=9, legend_col=loc[1][1], legend_size=legend_size,label_dict=label_dic, color_schemes_idx=color_idx) + ax=subplot(223) + plot_scatter(ax, X_tsne_NN, Y, 't-SNE_1', 't-SNE_0', '(iii) t-SNE / activation function of the last layer of neural network',legend_hide=True, legend_loc=loc[2][0], legend_col=loc[2][1], legend_size=legend_size, label_dict=label_dic, color_schemes_idx=color_idx) + myplot.tight_layout() + if filename: + plt.savefig(filename+'.pdf') + else: + plt.show() + +def get_pca_tsne(X, X_NN): + X_pca = PCA(n_components=50).fit_transform(X.toarray()) + X_tsne = TSNE(n_components=2, perplexity=40, verbose=2, learning_rate=10).fit_transform(X.toarray()) + X_tsne_NN = TSNE(n_components=2, perplexity=40, verbose=2, learning_rate=10).fit_transform(X_NN) + return X_pca, X_tsne, X_tsne_NN + +def create_tsne_web(X, Y, tsne_file_coor, tsne_file_label): + classes=list(set(Y)) + classes.sort() + L=[classes.index(y) for y in Y] + tsne_res = np.hstack((X, np.array([L]).T)) + tsne_res[:,0:2]=np.round(tsne_res[:,0:2],2) + tsne_lines=[] + for l in tsne_res: + tsne_lines.append('\t'.join([str(l[0]), str(l[1]), str(int(l[2]))])) + FileUtility.save_list(tsne_file_coor,tsne_lines) + FileUtility.save_list(tsne_file_label,Y) +