?!0BO7kj)A{v=N&bh)`~E;+dPq4d_{LV
z#OijjnYIdF$t(blitLJxxY*7+OR!y?xmj7GY{&d2+VYX!3d57YgZx?RE4hlE!*jH_
zPLSq>6KWSLt+FZ@c?Rfc9^=ZHK4^_z5}bvWJ3p?~Fv==dkk`|FKlA(+*KMmO_g5CJ
zmo*kL>2J*_>NrJEAI<04PrY-hxGCR%qWjE(PDsfr9RF0>_HfE=lvpivYxv3$NBeHhz#wSg-^Diu>6T^m=d+{^3!j(
zb1Jr|Sm4YG8!W(=&V^5lpAHP#=ii1sfmS(!qWM01RM!a!OAwz11n*Mf@9j1#5eCW|Pbv517;T9*U%8N}
z8y)pTA-B`lS_fk^X7{q9{)%xQ-)jBsi2HDpK3GMb%$%v
zxIm9;Oy5SU(Os|o$%1cd)HkLZVHpL30-3>sTO5uT7a%#q%Uj)ka=Z*PTLRp!m#Bp8
zEA#s!ZHv9SB2|mM`XVnG^5-`#`1TraB1Cy?>mITnlq^_BbDO=Keh{5}(;lWXNF?I82K+*RwVC!)vF3OrRG!fB
7V#E~B-7bb=-lWtCZQl5yNV=YrlI3!_P+F3>CI@?`Aj<^AG$pB-h-=JQji7Z
z9997#BwE)sKe0R_*=&Yo*`x5X^yVJ)``77**c7%a-C55G&e$M9!!Q=7yie{q9*jnX
zA0xG63{MG^>OU|xeXQ5Kv@P*%=~d%LpSRAB_v@?V%je&ooNql?Z^j%wu=DSpD+`!y
z`N2-`q^X*>?+C@x3=n4tH_(;e0&e85Qd9eze0Q|x2AtY@XOofK%gb;qPB6Pl?LLb|
zlcgB^e0M`oKh{PmhS5as#gy)OqWXDS=Y@(q>sHD0T}YW8o#rsX^S2p$A%F;plsXb7
zi6#1(h*%|WsR-&vNO~{eCIOq+%X+=ke4;KG&}5kh%xq@ubVVXzWBpWx6M^
z`rD?Uz%}zCoez_JExw<6_fl|-Cm!0Ql&(DCI~S2&sUq>&=IJ+@6(p<61`VkWQOl9&
z2&v}qchohC>Md@MuN-OQ;QT`4aQYcf^095fVIaWua?8@THque=%Xgd8AKb-EGgOMr
z#Y^8+BpEv=%j{N+Htq&IZLTlfolD^XD3`h%e&~--m2n(?L>uP0Qs?6N^zL`xSSgKw
zpR=(-1p8anQm^Z(F1R&F&2D)s}{!3h(H|zB;&eH$GfYr>zx_hxu9oD@T3PcVhzVqpT9?AW3UJoS87CX~U##dX8XRZTI*cy;oVM^^Y{_C@lf5i9;l7MCcehyZ>HYR}+OGgp
zDpy9Qx#LX@Qbo~FJ16*jvn*%81P+Hj?z=jKC%GY?(Rc0V!cZFwcP?5sux
zHp*&f=pkkTJ1{U(^zPn$+D@@jmv%wTL=||7Zcgb-wo7Xav7dZ=A8yafHx}XsbG=lJ
z9d6h5n*I1-qQuEWaD+LFdOlA-D4H`*zj&%RsC8fsp!~(aC#rwCsQ8!&DYW7?z)V-y
z2y3}bqoEZ#7JMboqm*St8Uwz(G#_-Wn
za!zK;$RTzAiO;^WNBP!s7cC-i4=RpihXTGeAMQ!W?96--;=vL3+Jjt+Dyy{1?+jN)1Q|eKPpssI=?}oZg?&fEk
z_*T!?qy!#yX@BvK>(xA{*;qaK$di_E7{XiTYEl1W?J%%TKJfFH8O&mBGighDzG3~<
z7129i$FJ7^kd@`tSfr0n{{9wDTT}$L*GlfeCW3S{4tPDzl3wJJ3IW(jCu*u5jgJAt
zUxlciY0JMAmIQV#IJ|Vw1Fe0F5?!0d^o|)P3jyZlSjHp`T@*A-j0v4#@9S=`Rp*R<
zYdUPUXpl_nJUGA7`R?a%9n7~wyO#R#{jq4@jkSn(JK2bi`oz2q$=|zq+yJ!^YIbxO`g_Zeeua%$2k}K|#+qny=NCoL7RZTy~aVLZ4C+46b=6(_sX?XqHLGH*WGH|BSAD7uwi>;3c9
z(#zrAqz0d-0{a`~dDrBKHRBRXN{rjh$`$ud$EmBAbZHHUnfnb1CJC*$ee+5A%zJM&YSMcr?TGoyoV>zvm-5%d0O;Eqv6!?HRn}t
zc4QOgN8=OMi>5gi#VsnXR!1=+rVQQtF1OkQEz?HCj!Yj5I@p%K;Gw#}iAgmCI
zR;{mS^F-rQQ={otBlMy10K8>{m%qZntYu7#1hoG`*ATtzX8HGq5j%b%Q3edQeA6sG
zV)2cRgNctA%V|~YZIf!#Y2x9Snn(#SW}A_5ds%`A;*2c_y?0MNv?czrjzihKvyJdG
z7unP97Qo`!n62D!)dA1UH$tKDUKqOe)|S2m%b$B*PGT*0t>X0sm0KdTk`(LRq}VmX
zt*j!eNI$T&Z@VC=5i3!2
zsI~YxjnLOcmijM8jM5E402ld{+&t$=BJt5W4=om!YK7IP=ANWo)mppM8%dI3p%>-3
zyy73Pj^*n}jA%0#$k|kmJkDsFzlM*G6Po72YSZPprm{S85O`hqC`nE?4HnfL`Wj0=
zEg+=C^&Fa98fOE2qs`NM^y6Sz$-(h;5p3Ifbj!Q$U9h$Cs;Wjg6fvs08YfzkVe`Wv
z;q1yH>-R)`UJ#8?(Q%KKX}i*=7LQ)#LSHAOk^H5Q#yM)*Uf=8e`=}t~zvfEw;%9=v
zbfhr4)68&MdbJiRHOOOOU7GJ0IHorLSWMrszbWyM0|bN8!kkx~<*M4!E3lg~V1gGr-{
zVS^2Wqk~`VSM6uQC+1474rYRf!6}(Niz+pG}BcUnRDq
z4NEp@Rr%-X=*_BXn`>&HzBV4yhZYxvAo%TOviw9RdWH5b9bPtnG%@>>#MI~g+Owxa
zz2Q)?tbAg0eR2MpL}ol+y~OjEvNf``S?il0(6zNlsTi{!iEG1-X6vN-iDcA3Xxu=~
zF4y{H5Xjq6#{7-R;@8rJm#0lK#9D3OOd09Xt=!lL88=6)OuNBZnS(V?ruEN<7NR6_;ih6*9tzWV8xaV32B
zTH{MA0daJZJPAA5aAD3+;|x9Qi@_(7g_*VBT)*Bx>KCd{h5t*DlTyBLY~
zPSjsjPJM4zJP6}LASvVBt%&xnBo}fOm;8AZ=Zps_Do`F#EqhBm5l6CjA?V?W_BH@Y
znb+1G2q#a-oQjGrUMAuQI1DHb1H*t2C<+afL_^I$>Li?#y_EvN)(H;;13(JyR(Jvl
zh?ax^K$OiWAL3955&%-dxvJsqZS6>8Ir5?&3GZwOM3Y4*>dBR5JQPT&%VGiKD(k-r
zwdlWG__GiVMT7sJGPGIzTS6=FjLNPb&07*Pm$VyC0{*fG`vxi&y53$8Q{Ego@@C@U{tgDx{yh
zY&F}!WD4ZH8t)m=8^7DxLH?6ib2I+5dZMbFAY-<>#t-OM$!*<0b^jq5K{;gt3#xj<
zzoOdb-Hs582*{3IJWVwPdQU{XADUp){DUnSl@9hqx
zbZM}^9Tti)luLlD=--CA1O8zh;4kyY0>A8|m;@w;T+`m#-30i{2g;|Ciw7l2zik2e
ztuyp%4<#J=_`BGW<6%p7|5rEwplhypf`S$0CNcR#7s$x?29V(4-mdsyNB
z#RpgVg9l0G{l!n_gOlZv$Uk4QJQ7BJ$#gOc7)BXY$RnJr>(`cKyOqfeE@5RUD1`$J
z1w){4I2~^a7qUMgN8zqv-BS{
zN#sB5Bgg+Ad}IWZbN+W(6dd{+ABl(~U+qK+DD~`p@Z?Z|ur4lSRQ*CP0HjW^aREXo
zStlQEb!DIlPzDagAfS@UP$U`(0h5h`E25z=3>u+G#sE?YjgkZW|11>vD!W)I+2O4m
z-94OvASI--l7a#ljRZqbN)U{)G7P4SQBi?Q!XaoF2B|^;p*smjB>hS$3L%LE2neWL
H!vOvdT9)am
literal 0
HcmV?d00001
diff --git a/orangecontrib/text/widgets/tests/data/sample_txt.txt b/orangecontrib/text/widgets/tests/data/sample_txt.txt
new file mode 100644
index 000000000..2e500dafc
--- /dev/null
+++ b/orangecontrib/text/widgets/tests/data/sample_txt.txt
@@ -0,0 +1 @@
+This is a test txt file
\ No newline at end of file
diff --git a/orangecontrib/text/widgets/tests/test_owimportdocuments.py b/orangecontrib/text/widgets/tests/test_owimportdocuments.py
new file mode 100644
index 000000000..3966772d3
--- /dev/null
+++ b/orangecontrib/text/widgets/tests/test_owimportdocuments.py
@@ -0,0 +1,75 @@
+import os
+import unittest
+
+from Orange.widgets.tests.base import WidgetTest
+from orangecontrib.text.widgets.owimportdocuments import OWImportDocuments
+
+
+class TestOWImportDocuments(WidgetTest):
+ def setUp(self) -> None:
+ self.widget: OWImportDocuments = self.create_widget(OWImportDocuments)
+ path = os.path.join(os.path.dirname(__file__), "data")
+ self.widget.setCurrentPath(path)
+ self.widget.reload()
+ self.wait_until_finished()
+
+ def test_current_path(self):
+ path = os.path.join(os.path.dirname(__file__), "data")
+ self.assertEqual(path, self.widget.currentPath)
+
+ def test_output(self):
+ output = self.get_output(self.widget.Outputs.data)
+ self.assertEqual(4, len(output))
+ self.assertEqual(3, len(output.domain.metas))
+ names = output.get_column_view("name")[0]
+ self.assertListEqual(
+ ["sample_docx", "sample_odt", "sample_pdf", "sample_txt"],
+ sorted(names.tolist()),
+ )
+ texts = output.get_column_view("content")[0]
+ self.assertListEqual(
+ [
+ f"This is a test {x} file"
+ for x in ["docx", "odt", "pdf", "txt"]
+ ],
+ sorted([x.strip() for x in texts.tolist()]),
+ )
+ self.assertEqual("content", output.text_features[0].name)
+
+ skipped_output = self.get_output(self.widget.Outputs.skipped_documents)
+ self.assertEqual(1, len(skipped_output))
+ self.assertEqual(2, len(skipped_output.domain.metas))
+ names = skipped_output.get_column_view("name")[0]
+ self.assertListEqual(
+ ["sample_pdf_corrupted.pdf"],
+ sorted(names.tolist()),
+ )
+
+ def test_could_not_be_read_warning(self):
+ """
+ sample_pdf_corrupted.pdf is corrupted file and cannot be loaded
+ correctly - widget must show the warning
+ """
+ self.assertTrue(self.widget.Warning.read_error.is_shown())
+ self.assertEqual(
+ "One file couldn't be read.",
+ str(self.widget.Warning.read_error),
+ )
+
+ def test_send_report(self):
+ self.widget.send_report()
+
+ def test_info_box(self):
+ self.assertEqual(
+ "4 documents, 1 skipped", self.widget.info_area.text()
+ )
+
+ # empty widget
+ self.widget: OWImportDocuments = self.create_widget(OWImportDocuments)
+ self.assertEqual(
+ "No document set selected", self.widget.info_area.text()
+ )
+
+
+if __name__ == "__main__":
+ unittest.main()