diff --git a/Pipfile b/Pipfile index 04c5e39..c348e37 100644 --- a/Pipfile +++ b/Pipfile @@ -12,11 +12,12 @@ llama-index-embeddings-huggingface = "*" pycryptodome = "*" nbconvert = "*" pyexiftool = "*" -numba = "*" llama-index-readers-web = "*" html2text = "*" streamlit-tags = "*" streamlit-extras = "*" +black = "*" +torch = "*" [dev-packages] diff --git a/Pipfile.lock b/Pipfile.lock index 2692225..9e18da1 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "40f9cc9fdad40fdb5824441996a0245e49984cf7d722efcbdb054bd0d6da09b8" + "sha256": "b5612eaac8cb8f3d6d1619df0d596c425a484484586a81db970f3d9eda506b3c" }, "pipfile-spec": 6, "requires": { @@ -203,6 +203,35 @@ "markers": "python_full_version >= '3.6.0'", "version": "==4.12.3" }, + "black": { + "hashes": [ + "sha256:057c3dc602eaa6fdc451069bd027a1b2635028b575a6c3acfd63193ced20d9c8", + "sha256:08654d0797e65f2423f850fc8e16a0ce50925f9337fb4a4a176a7aa4026e63f8", + "sha256:163baf4ef40e6897a2a9b83890e59141cc8c2a98f2dda5080dc15c00ee1e62cd", + "sha256:1e08fb9a15c914b81dd734ddd7fb10513016e5ce7e6704bdd5e1251ceee51ac9", + "sha256:4dd76e9468d5536abd40ffbc7a247f83b2324f0c050556d9c371c2b9a9a95e31", + "sha256:4f9de21bafcba9683853f6c96c2d515e364aee631b178eaa5145fc1c61a3cc92", + "sha256:61a0391772490ddfb8a693c067df1ef5227257e72b0e4108482b8d41b5aee13f", + "sha256:6981eae48b3b33399c8757036c7f5d48a535b962a7c2310d19361edeef64ce29", + "sha256:7e53a8c630f71db01b28cd9602a1ada68c937cbf2c333e6ed041390d6968faf4", + "sha256:810d445ae6069ce64030c78ff6127cd9cd178a9ac3361435708b907d8a04c693", + "sha256:93601c2deb321b4bad8f95df408e3fb3943d85012dddb6121336b8e24a0d1218", + "sha256:992e451b04667116680cb88f63449267c13e1ad134f30087dec8527242e9862a", + "sha256:9db528bccb9e8e20c08e716b3b09c6bdd64da0dd129b11e160bf082d4642ac23", + "sha256:a0057f800de6acc4407fe75bb147b0c2b5cbb7c3ed110d3e5999cd01184d53b0", + "sha256:ba15742a13de85e9b8f3239c8f807723991fbfae24bad92d34a2b12e81904982", + "sha256:bce4f25c27c3435e4dace4815bcb2008b87e167e3bf4ee47ccdc5ce906eb4894", + "sha256:ca610d29415ee1a30a3f30fab7a8f4144e9d34c89a235d81292a1edb2b55f540", + "sha256:d533d5e3259720fdbc1b37444491b024003e012c5173f7d06825a77508085430", + "sha256:d84f29eb3ee44859052073b7636533ec995bd0f64e2fb43aeceefc70090e752b", + "sha256:e37c99f89929af50ffaf912454b3e3b47fd64109659026b678c091a4cd450fb2", + "sha256:e8a6ae970537e67830776488bca52000eaa37fa63b9988e8c487458d9cd5ace6", + "sha256:faf2ee02e6612577ba0181f4347bcbcf591eb122f7841ae5ba233d12c39dcb4d" + ], + "index": "pypi", + "markers": "python_version >= '3.8'", + "version": "==24.2.0" + }, "bleach": { "hashes": [ "sha256:0a31f1837963c41d46bbf1331b8778e1308ea0791db03cc4e7357b97cf42a8fe", @@ -531,11 +560,11 @@ }, "faker": { "hashes": [ - "sha256:117ce1a2805c1bc5ca753b3dc6f9d567732893b2294b827d3164261ee8f20267", - "sha256:458d93580de34403a8dec1e8d5e6be2fee96c4deca63b95d71df7a6a80a690de" + "sha256:2456d674f40bd51eb3acbf85221277027822e529a90cc826453d9a25dff932b1", + "sha256:ea6f784c40730de0f77067e49e78cdd590efb00bec3d33f577492262206c17fc" ], "markers": "python_version >= '3.8'", - "version": "==23.3.0" + "version": "==24.0.0" }, "fastapi": { "hashes": [ @@ -970,11 +999,11 @@ "inference" ], "hashes": [ - "sha256:26a15b604e4fc7bad37c467b76456543ec849386cbca9cd7e1e135f53e500423", - "sha256:b183144336fdf2810a8c109822e0bb6ef1fd61c65da6fb60e8c3f658b7144016" + "sha256:df37c2c37fc6c82163cdd8a67ede261687d80d1e262526d6c0ce73b6b3630a7b", + "sha256:e1f4968c93726565a80edf6dc309763c7b546d0cfe79aa221206034d50155531" ], "markers": "python_full_version >= '3.8.0'", - "version": "==0.21.3" + "version": "==0.21.4" }, "humanfriendly": { "hashes": [ @@ -1190,12 +1219,12 @@ }, "llama-index": { "hashes": [ - "sha256:b2572d407c9b1070dca2ee9a3f518ab40c7aca9e9212c008cc024c9e0bb2f60d", - "sha256:db1ea0da807b20b819849b1bff2aaf5d49073e1ea36801ff40242a62fb4676eb" + "sha256:a2302e5fff779e4b1a8c5fb2cf97f002a2d9859b945095e40317fb977d11535a", + "sha256:b5463e57b3bcaa9f9ca19695d3e0f66c21a319d32d50c0e35a9176a69725a87c" ], "index": "pypi", "markers": "python_version < '4.0' and python_full_version >= '3.8.1'", - "version": "==0.10.15" + "version": "==0.10.17" }, "llama-index-agent-openai": { "hashes": [ @@ -1207,19 +1236,19 @@ }, "llama-index-cli": { "hashes": [ - "sha256:48a81fc33d4005dbe91b77ebe840ac69e0102e7e2a59770308f94fae1b792544", - "sha256:55a77e3c370eb760c42cb74a0df6f650e41ec17928b72b07ff8b927cb94b15b4" + "sha256:4e300f06206862d6d7eedde95632c6b61a5ebb5162454f1ac7a3c3c9b3ebb05f", + "sha256:776a96917965d0df6e7e272d6278394a4f7c922e57973a75e2645609727fa4b1" ], "markers": "python_version < '4.0' and python_full_version >= '3.8.1'", - "version": "==0.1.7" + "version": "==0.1.8" }, "llama-index-core": { "hashes": [ - "sha256:37618f227583e643aee2efd3c7e2914541184136410af2fcf618b855f75b432c", - "sha256:a1482c9af67b5b254215267c02fce08ac6bb52a19a0859797552950b023fb98d" + "sha256:bbc1a0f5e457e6f44769ad70e6398a4f1d233ddf70bfa5dc41a925b360445b55", + "sha256:da13e609f015e87fbe985a4d607a8368d74e84b002d3c7ba4125ddea54dde452" ], "markers": "python_version < '4.0' and python_full_version >= '3.8.1'", - "version": "==0.10.15" + "version": "==0.10.17" }, "llama-index-embeddings-huggingface": { "hashes": [ @@ -1297,11 +1326,11 @@ }, "llama-index-readers-file": { "hashes": [ - "sha256:d9fc0ca84926d04bd757c57fe87841cd9dbc2606aab5f2ce927deec14aaa1a74", - "sha256:f583bd90353a0c0985213af02c97aa2f2f22e702d4311fe719de91382c9ad8dd" + "sha256:f23417a2afc8461a32f08f057e85c8d09b1c687ba16ca6a6a08f08f319eca26a", + "sha256:f58c72e2c2ed9f36b5308b4b9ee3142f3848156f0e3b85e813db0a26b8d03290" ], "markers": "python_version < '4.0' and python_full_version >= '3.8.1'", - "version": "==0.1.6" + "version": "==0.1.8" }, "llama-index-readers-llama-parse": { "hashes": [ @@ -1330,11 +1359,11 @@ }, "llama-parse": { "hashes": [ - "sha256:736a80e4fc5970b9cbef1048171908021ebd26be43f07b806889f0d1bb3875fe", - "sha256:8e6e7a0986ad30cb82c5c67a29b7e2c3892620dd2a422afc909654a9d0f1c82c" + "sha256:31706a610d28729c2b4741455c9a9c1edf471171b4b9d2a7138aa064656712a6", + "sha256:7d866940fa0604e0cef496a7c160fb9ef8fcd6bd3a11a2292d100f43cc6ad469" ], "markers": "python_version < '4.0' and python_full_version >= '3.8.1'", - "version": "==0.3.5" + "version": "==0.3.7" }, "llamaindex-py-client": { "hashes": [ @@ -1344,33 +1373,6 @@ "markers": "python_version >= '3.8' and python_version < '4.0'", "version": "==0.1.13" }, - "llvmlite": { - "hashes": [ - "sha256:05cb7e9b6ce69165ce4d1b994fbdedca0c62492e537b0cc86141b6e2c78d5888", - "sha256:08fa9ab02b0d0179c688a4216b8939138266519aaa0aa94f1195a8542faedb56", - "sha256:3366938e1bf63d26c34fbfb4c8e8d2ded57d11e0567d5bb243d89aab1eb56098", - "sha256:43d65cc4e206c2e902c1004dd5418417c4efa6c1d04df05c6c5675a27e8ca90e", - "sha256:70f44ccc3c6220bd23e0ba698a63ec2a7d3205da0d848804807f37fc243e3f77", - "sha256:763f8d8717a9073b9e0246998de89929071d15b47f254c10eef2310b9aac033d", - "sha256:7e0c4c11c8c2aa9b0701f91b799cb9134a6a6de51444eff5a9087fc7c1384275", - "sha256:81e674c2fe85576e6c4474e8c7e7aba7901ac0196e864fe7985492b737dbab65", - "sha256:8d90edf400b4ceb3a0e776b6c6e4656d05c7187c439587e06f86afceb66d2be5", - "sha256:a78ab89f1924fc11482209f6799a7a3fc74ddc80425a7a3e0e8174af0e9e2301", - "sha256:ae511caed28beaf1252dbaf5f40e663f533b79ceb408c874c01754cafabb9cbf", - "sha256:b2fce7d355068494d1e42202c7aff25d50c462584233013eb4470c33b995e3ee", - "sha256:bb3975787f13eb97629052edb5017f6c170eebc1c14a0433e8089e5db43bcce6", - "sha256:bdd3888544538a94d7ec99e7c62a0cdd8833609c85f0c23fcb6c5c591aec60ad", - "sha256:c35da49666a21185d21b551fc3caf46a935d54d66969d32d72af109b5e7d2b6f", - "sha256:c5bece0cdf77f22379f19b1959ccd7aee518afa4afbd3656c6365865f84903f9", - "sha256:d0936c2067a67fb8816c908d5457d63eba3e2b17e515c5fe00e5ee2bace06040", - "sha256:d47494552559e00d81bfb836cf1c4d5a5062e54102cc5767d5aa1e77ccd2505c", - "sha256:d7599b65c7af7abbc978dbf345712c60fd596aa5670496561cc10e8a71cebfb2", - "sha256:ebe66a86dc44634b59a3bc860c7b20d26d9aaffcd30364ebe8ba79161a9121f4", - "sha256:f92b09243c0cc3f457da8b983f67bd8e1295d0f5b3746c7a1861d7a99403854a" - ], - "markers": "python_version >= '3.9'", - "version": "==0.42.0" - }, "lxml": { "hashes": [ "sha256:13521a321a25c641b9ea127ef478b580b5ec82aa2e9fc076c86169d161798b01", @@ -1547,11 +1549,11 @@ }, "marshmallow": { "hashes": [ - "sha256:20f53be28c6e374a711a16165fb22a8dc6003e3f7cda1285e3ca777b9193885b", - "sha256:e7997f83571c7fd476042c2c188e4ee8a78900ca5e74bd9c8097afa56624e9bd" + "sha256:4e65e9e0d80fc9e609574b9983cf32579f305c718afb30d7233ab818571768c3", + "sha256:f085493f79efb0644f270a9bf2892843142d80d7174bbbd2f3713f2a589dc633" ], "markers": "python_version >= '3.8'", - "version": "==3.21.0" + "version": "==3.21.1" }, "matplotlib": { "hashes": [ @@ -1824,12 +1826,12 @@ }, "nbconvert": { "hashes": [ - "sha256:3188727dffadfdc9c6a1c7250729063d7bc78b355ad7aa023138afa030d1cd07", - "sha256:e79e6a074f49ba3ed29428ed86487bf51509d9aab613bd8522ac08f6d28fd7fd" + "sha256:0c01c23981a8de0220255706822c40b751438e32467d6a686e26be08ba784382", + "sha256:8310edd41e1c43947e4ecf16614c61469ebc024898eb808cce0999860fc9fb16" ], "index": "pypi", "markers": "python_version >= '3.8'", - "version": "==7.16.1" + "version": "==7.16.2" }, "nbformat": { "hashes": [ @@ -1870,34 +1872,6 @@ "markers": "python_version >= '3.7'", "version": "==3.8.1" }, - "numba": { - "hashes": [ - "sha256:0307ee91b24500bb7e64d8a109848baf3a3905df48ce142b8ac60aaa406a0400", - "sha256:1192d6b2906bf3ff72b1d97458724d98860ab86a91abdd4cfd9328432b661e31", - "sha256:12b9b064a3e4ad00e2371fc5212ef0396c80f41caec9b5ec391c8b04b6eaf2a8", - "sha256:32bd0a41525ec0b1b853da244808f4e5333867df3c43c30c33f89cf20b9c2b63", - "sha256:4981659220b61a03c1e557654027d271f56f3087448967a55c79a0e5f926de62", - "sha256:5516a469514bfae52a9d7989db4940653a5cbfac106f44cb9c50133b7ad6224b", - "sha256:6feb1504bb432280f900deaf4b1dadcee68812209500ed3f81c375cbceab24dc", - "sha256:70d26ba589f764be45ea8c272caa467dbe882b9676f6749fe6f42678091f5f21", - "sha256:753dc601a159861808cc3207bad5c17724d3b69552fd22768fddbf302a817a4c", - "sha256:8cbef55b73741b5eea2dbaf1b0590b14977ca95a13a07d200b794f8f6833a01c", - "sha256:8d061d800473fb8fef76a455221f4ad649a53f5e0f96e3f6c8b8553ee6fa98fa", - "sha256:90efb436d3413809fcd15298c6d395cb7d98184350472588356ccf19db9e37c8", - "sha256:944faad25ee23ea9dda582bfb0189fb9f4fc232359a80ab2a028b94c14ce2b1d", - "sha256:9e20736bf62e61f8353fb71b0d3a1efba636c7a303d511600fc57648b55823ed", - "sha256:c086a434e7d3891ce5dfd3d1e7ee8102ac1e733962098578b507864120559ceb", - "sha256:cd3dac45e25d927dcb65d44fb3a973994f5add2b15add13337844afe669dd1ba", - "sha256:ce62bc0e6dd5264e7ff7f34f41786889fa81a6b860662f824aa7532537a7bee0", - "sha256:d540f69a8245fb714419c2209e9af6104e568eb97623adc8943642e61f5d6d8e", - "sha256:e125f7d69968118c28ec0eed9fbedd75440e64214b8d2eac033c22c04db48492", - "sha256:e86e6786aec31d2002122199486e10bbc0dc40f78d76364cded375912b13614c", - "sha256:fe4d7562d1eed754a7511ed7ba962067f198f86909741c5c6e18c4f1819b1f47" - ], - "index": "pypi", - "markers": "python_version >= '3.9'", - "version": "==0.59.0" - }, "numpy": { "hashes": [ "sha256:03a8c78d01d9781b28a6989f6fa1bb2c4f2d51201cf99d3dd875df6fbd96b23b", @@ -2020,11 +1994,11 @@ }, "nvidia-nvjitlink-cu12": { "hashes": [ - "sha256:1b2e317e437433753530792f13eece58f0aec21a2b05903be7bffe58a606cbd1", - "sha256:64335a8088e2b9d196ae8665430bc6a2b7e6ef2eb877a9c735c804bd4ff6467c" + "sha256:991905ffa2144cb603d8ca7962d75c35334ae82bf92820b6ba78157277da1ad2", + "sha256:c6428836d20fe7e327191c175791d38570e10762edc588fb46749217cd444c74" ], "markers": "python_version >= '3'", - "version": "==12.3.101" + "version": "==12.4.99" }, "nvidia-nvtx-cu12": { "hashes": [ @@ -2292,6 +2266,14 @@ "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", "version": "==1.5.1" }, + "pathspec": { + "hashes": [ + "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08", + "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712" + ], + "markers": "python_version >= '3.8'", + "version": "==0.12.1" + }, "pillow": { "hashes": [ "sha256:0304004f8067386b477d20a518b50f3fa658a28d44e4116970abfcd94fac34a8", @@ -2376,23 +2358,23 @@ }, "playwright": { "hashes": [ - "sha256:0608717cbf291a625ba6f751061af0fc0cc9bdace217e69d87b1eb1383b03406", - "sha256:431e3a05f8c99147995e2b3e8475d07818745294fd99f1510b61756e73bdcf68", - "sha256:4bf214d812092cf5b9b9648ba84611aa35e28685519911342a7da3a3031f9ed6", - "sha256:71ead0f33e00f5a8533c037c647938b99f219436a1b27d4ba4de4e6bf0567278", - "sha256:cf68335a5dfa4038fa797a4ba0105faee0094ebbb372547d7a27feec5b23c672", - "sha256:eaa17ab44622c447de26ed8f7d99912719568d8dbc3a9db0e07f0ae1487709d9", - "sha256:edb210a015e70bb0d328bf1c9b65fa3a08361f33e4d7c4ddd1ad2adb6d9b4479" + "sha256:283887f0bdd0039c3d720e32fbc73a045c24fa800599a6ad60fb199c29580534", + "sha256:313f2551a772f57c9ccca017c4dd4661f2277166f9e1d84bbf5a2e316f0f892c", + "sha256:4e1fc1c049a0af64626ddd50814d14a01f316bcbb4d1aa83c3416fe420add558", + "sha256:b2a46a24641e5d468046cde567c98fdb8d85e32df901630b14dfb288cbd1ed4f", + "sha256:dbf473496808d4c2c816902c1dee2aabc029648e56ce8514b643f5a1a6fc8e22", + "sha256:e092c6cfbf797bff03fbdfc53c3e6a9e29fbcf6b82f9e43113d37494aee0561b", + "sha256:e2b293f077efeaa45253fde31cea4bc6b0ae8be6b5e65e8ce8b4aa3b9f0d55b6" ], "markers": "python_version >= '3.8'", - "version": "==1.41.2" + "version": "==1.42.0" }, "posthog": { "hashes": [ - "sha256:c7e79b2e585d16e93749874bcbcdad78d857037398ce0d8d6c474a04d0bd3bbe", - "sha256:f0eafa663fbc4a942b49b6168a62a890635407044bbc7593051dcb9cc1208873" + "sha256:3c672be7ba6f95d555ea207d4486c171d06657eb34b3ce25eb043bfe7b6b5b76", + "sha256:8f7e3b2c6e8714d0c0c542a2109b83a7549f63b7113a133ab2763a89245ef2ef" ], - "version": "==3.4.2" + "version": "==3.5.0" }, "prometheus-client": { "hashes": [ @@ -2678,11 +2660,11 @@ }, "pymdown-extensions": { "hashes": [ - "sha256:6ca215bc57bc12bf32b414887a68b810637d039124ed9b2e5bd3325cbb2c050c", - "sha256:c0d64d5cf62566f59e6b2b690a4095c931107c250a8c8e1351c1de5f6b036deb" + "sha256:c70e146bdd83c744ffc766b4671999796aba18842b268510a329f7f64700d584", + "sha256:f5cc7000d7ff0d1ce9395d216017fa4df3dde800afb1fb72d1c7d3fd35e710f4" ], "markers": "python_version >= '3.8'", - "version": "==10.7" + "version": "==10.7.1" }, "pymupdf": { "hashes": [ @@ -2735,19 +2717,19 @@ }, "pyparsing": { "hashes": [ - "sha256:32c7c0b711493c72ff18a981d24f28aaf9c1fb7ed5e9667c9e84e3db623bdbfb", - "sha256:ede28a1a32462f5a9705e07aea48001a08f7cf81a021585011deba701581a0db" + "sha256:a1bac0ce561155ecc3ed78ca94d3c9378656ad4c94c1270de543f621420f94ad", + "sha256:f9db75911801ed778fe61bb643079ff86601aca99fcae6345aa67292038fb742" ], "markers": "python_full_version >= '3.6.8'", - "version": "==3.1.1" + "version": "==3.1.2" }, "pypdf": { "hashes": [ - "sha256:3316d9ddfcff5df67ae3cdfe8b945c432aa43e7f970bae7c2a4ab4fe129cd937", - "sha256:a62daa2a24d5a608ba1b6284dde185317ce3644f89b9ebe5314d0c5d1c9f257d" + "sha256:01c3257ec908676efd60a4537e525b89d48e0852bc92b4e0aa4cc646feda17cc", + "sha256:16cac912a05200099cef3f347c4c7e0aaf0a6d027603b8f9a973c0ea500dff89" ], "markers": "python_version >= '3.6'", - "version": "==4.0.2" + "version": "==4.1.0" }, "pypika": { "hashes": [ @@ -3382,58 +3364,58 @@ "asyncio" ], "hashes": [ - "sha256:03f448ffb731b48323bda68bcc93152f751436ad6037f18a42b7e16af9e91c07", - "sha256:0de1263aac858f288a80b2071990f02082c51d88335a1db0d589237a3435fe71", - "sha256:0fb3bffc0ced37e5aa4ac2416f56d6d858f46d4da70c09bb731a246e70bff4d5", - "sha256:120af1e49d614d2525ac247f6123841589b029c318b9afbfc9e2b70e22e1827d", - "sha256:1306102f6d9e625cebaca3d4c9c8f10588735ef877f0360b5cdb4fdfd3fd7131", - "sha256:15e19a84b84528f52a68143439d0c7a3a69befcd4f50b8ef9b7b69d2628ae7c4", - "sha256:1ab4e0448018d01b142c916cc7119ca573803a4745cfe341b8f95657812700ac", - "sha256:1fc19ae2e07a067663dd24fca55f8ed06a288384f0e6e3910420bf4b1270cc51", - "sha256:2f5c9dfb0b9ab5e3a8a00249534bdd838d943ec4cfb9abe176a6c33408430230", - "sha256:30d81cc1192dc693d49d5671cd40cdec596b885b0ce3b72f323888ab1c3863d5", - "sha256:33e8bde8fff203de50399b9039c4e14e42d4d227759155c21f8da4a47fc8053c", - "sha256:4535c49d961fe9a77392e3a630a626af5baa967172d42732b7a43496c8b28876", - "sha256:48217be1de7d29a5600b5c513f3f7664b21d32e596d69582be0a94e36b8309cb", - "sha256:5ada0438f5b74c3952d916c199367c29ee4d6858edff18eab783b3978d0db16d", - "sha256:5b78aa9f4f68212248aaf8943d84c0ff0f74efc65a661c2fc68b82d498311fd5", - "sha256:5cd20f58c29bbf2680039ff9f569fa6d21453fbd2fa84dbdb4092f006424c2e6", - "sha256:611068511b5531304137bcd7fe8117c985d1b828eb86043bd944cebb7fae3910", - "sha256:680b9a36029b30cf063698755d277885d4a0eab70a2c7c6e71aab601323cba45", - "sha256:6c5bad7c60a392850d2f0fee8f355953abaec878c483dd7c3836e0089f046bf6", - "sha256:6c7a596d0be71b7baa037f4ac10d5e057d276f65a9a611c46970f012752ebf2d", - "sha256:7f470327d06400a0aa7926b375b8e8c3c31d335e0884f509fe272b3c700a7254", - "sha256:86a6ed69a71fe6b88bf9331594fa390a2adda4a49b5c06f98e47bf0d392534f8", - "sha256:8dfc936870507da96aebb43e664ae3a71a7b96278382bcfe84d277b88e379b18", - "sha256:954d9735ee9c3fa74874c830d089a815b7b48df6f6b6e357a74130e478dbd951", - "sha256:9e56afce6431450442f3ab5973156289bd5ec33dd618941283847c9fd5ff06bf", - "sha256:a3012ab65ea42de1be81fff5fb28d6db893ef978950afc8130ba707179b4284a", - "sha256:ad862295ad3f644e3c2c0d8b10a988e1600d3123ecb48702d2c0f26771f1c396", - "sha256:b1d9d1bfd96eef3c3faedb73f486c89e44e64e40e5bfec304ee163de01cf996f", - "sha256:b86abba762ecfeea359112b2bb4490802b340850bbee1948f785141a5e020de8", - "sha256:b90053be91973a6fb6020a6e44382c97739736a5a9d74e08cc29b196639eb979", - "sha256:c4fbe6a766301f2e8a4519f4500fe74ef0a8509a59e07a4085458f26228cd7cc", - "sha256:ca891af9f3289d24a490a5fde664ea04fe2f4984cd97e26de7442a4251bd4b7c", - "sha256:cb0845e934647232b6ff5150df37ceffd0b67b754b9fdbb095233deebcddbd4a", - "sha256:ce850db091bf7d2a1f2fdb615220b968aeff3849007b1204bf6e3e50a57b3d32", - "sha256:d04e579e911562f1055d26dab1868d3e0bb905db3bccf664ee8ad109f035618a", - "sha256:d07ee7793f2aeb9b80ec8ceb96bc8cc08a2aec8a1b152da1955d64e4825fcbac", - "sha256:d177b7e82f6dd5e1aebd24d9c3297c70ce09cd1d5d37b43e53f39514379c029c", - "sha256:d7b5a3e2120982b8b6bd1d5d99e3025339f7fb8b8267551c679afb39e9c7c7f1", - "sha256:d873c21b356bfaf1589b89090a4011e6532582b3a8ea568a00e0c3aab09399dd", - "sha256:d997c5938a08b5e172c30583ba6b8aad657ed9901fc24caf3a7152eeccb2f1b4", - "sha256:dbcd77c4d94b23e0753c5ed8deba8c69f331d4fd83f68bfc9db58bc8983f49cd", - "sha256:e36aa62b765cf9f43a003233a8c2d7ffdeb55bc62eaa0a0380475b228663a38f", - "sha256:e97cf143d74a7a5a0f143aa34039b4fecf11343eed66538610debc438685db4a", - "sha256:eb15ef40b833f5b2f19eeae65d65e191f039e71790dd565c2af2a3783f72262f", - "sha256:ec1f5a328464daf7a1e4e385e4f5652dd9b1d12405075ccba1df842f7774b4fc", - "sha256:f9374e270e2553653d710ece397df67db9d19c60d2647bcd35bfc616f1622dcd", - "sha256:fa67d821c1fd268a5a87922ef4940442513b4e6c377553506b9db3b83beebbd8", - "sha256:fd8aafda7cdff03b905d4426b714601c0978725a19efc39f5f207b86d188ba01", - "sha256:ff2f1b7c963961d41403b650842dc2039175b906ab2093635d8319bef0b7d620" + "sha256:0315d9125a38026227f559488fe7f7cee1bd2fbc19f9fd637739dc50bb6380b2", + "sha256:0d3dd67b5d69794cfe82862c002512683b3db038b99002171f624712fa71aeaa", + "sha256:124202b4e0edea7f08a4db8c81cc7859012f90a0d14ba2bf07c099aff6e96462", + "sha256:1ee8bd6d68578e517943f5ebff3afbd93fc65f7ef8f23becab9fa8fb315afb1d", + "sha256:243feb6882b06a2af68ecf4bec8813d99452a1b62ba2be917ce6283852cf701b", + "sha256:2858bbab1681ee5406650202950dc8f00e83b06a198741b7c656e63818633526", + "sha256:2f60843068e432311c886c5f03c4664acaef507cf716f6c60d5fde7265be9d7b", + "sha256:328529f7c7f90adcd65aed06a161851f83f475c2f664a898af574893f55d9e53", + "sha256:33157920b233bc542ce497a81a2e1452e685a11834c5763933b440fedd1d8e2d", + "sha256:3eba73ef2c30695cb7eabcdb33bb3d0b878595737479e152468f3ba97a9c22a4", + "sha256:426f2fa71331a64f5132369ede5171c52fd1df1bd9727ce621f38b5b24f48750", + "sha256:45c7b78dfc7278329f27be02c44abc0d69fe235495bb8e16ec7ef1b1a17952db", + "sha256:46a3d4e7a472bfff2d28db838669fc437964e8af8df8ee1e4548e92710929adc", + "sha256:4a5adf383c73f2d49ad15ff363a8748319ff84c371eed59ffd0127355d6ea1da", + "sha256:4b6303bfd78fb3221847723104d152e5972c22367ff66edf09120fcde5ddc2e2", + "sha256:56856b871146bfead25fbcaed098269d90b744eea5cb32a952df00d542cdd368", + "sha256:5da98815f82dce0cb31fd1e873a0cb30934971d15b74e0d78cf21f9e1b05953f", + "sha256:5df5d1dafb8eee89384fb7a1f79128118bc0ba50ce0db27a40750f6f91aa99d5", + "sha256:68722e6a550f5de2e3cfe9da6afb9a7dd15ef7032afa5651b0f0c6b3adb8815d", + "sha256:78bb7e8da0183a8301352d569900d9d3594c48ac21dc1c2ec6b3121ed8b6c986", + "sha256:81ba314a08c7ab701e621b7ad079c0c933c58cdef88593c59b90b996e8b58fa5", + "sha256:843a882cadebecc655a68bd9a5b8aa39b3c52f4a9a5572a3036fb1bb2ccdc197", + "sha256:87724e7ed2a936fdda2c05dbd99d395c91ea3c96f029a033a4a20e008dd876bf", + "sha256:8c7f10720fc34d14abad5b647bc8202202f4948498927d9f1b4df0fb1cf391b7", + "sha256:8e91b5e341f8c7f1e5020db8e5602f3ed045a29f8e27f7f565e0bdee3338f2c7", + "sha256:943aa74a11f5806ab68278284a4ddd282d3fb348a0e96db9b42cb81bf731acdc", + "sha256:9461802f2e965de5cff80c5a13bc945abea7edaa1d29360b485c3d2b56cdb075", + "sha256:9b66fcd38659cab5d29e8de5409cdf91e9986817703e1078b2fdaad731ea66f5", + "sha256:a6bec1c010a6d65b3ed88c863d56b9ea5eeefdf62b5e39cafd08c65f5ce5198b", + "sha256:a921002be69ac3ab2cf0c3017c4e6a3377f800f1fca7f254c13b5f1a2f10022c", + "sha256:aca7b6d99a4541b2ebab4494f6c8c2f947e0df4ac859ced575238e1d6ca5716b", + "sha256:ad7acbe95bac70e4e687a4dc9ae3f7a2f467aa6597049eeb6d4a662ecd990bb6", + "sha256:af8ce2d31679006e7b747d30a89cd3ac1ec304c3d4c20973f0f4ad58e2d1c4c9", + "sha256:b4a2cf92995635b64876dc141af0ef089c6eea7e05898d8d8865e71a326c0385", + "sha256:bbda76961eb8f27e6ad3c84d1dc56d5bc61ba8f02bd20fcf3450bd421c2fcc9c", + "sha256:bd7e4baf9161d076b9a7e432fce06217b9bd90cfb8f1d543d6e8c4595627edb9", + "sha256:bea30da1e76cb1acc5b72e204a920a3a7678d9d52f688f087dc08e54e2754c67", + "sha256:c61e2e41656a673b777e2f0cbbe545323dbe0d32312f590b1bc09da1de6c2a02", + "sha256:c6c4da4843e0dabde41b8f2e8147438330924114f541949e6318358a56d1875a", + "sha256:d3499008ddec83127ab286c6f6ec82a34f39c9817f020f75eca96155f9765097", + "sha256:dbb990612c36163c6072723523d2be7c3eb1517bbdd63fe50449f56afafd1133", + "sha256:dd53b6c4e6d960600fd6532b79ee28e2da489322fcf6648738134587faf767b6", + "sha256:df40c16a7e8be7413b885c9bf900d402918cc848be08a59b022478804ea076b8", + "sha256:e0a5354cb4de9b64bccb6ea33162cb83e03dbefa0d892db88a672f5aad638a75", + "sha256:e0b148ab0438f72ad21cb004ce3bdaafd28465c4276af66df3b9ecd2037bf252", + "sha256:e23b88c69497a6322b5796c0781400692eca1ae5532821b39ce81a48c395aae9", + "sha256:fc4974d3684f28b61b9a90fcb4c41fb340fd4b6a50c04365704a4da5a9603b05", + "sha256:feea693c452d85ea0015ebe3bb9cd15b6f49acc1a31c28b3c50f4db0f8fb1e71", + "sha256:fffcc8edc508801ed2e6a4e7b0d150a62196fd28b4e16ab9f65192e8186102b6" ], "markers": "python_version >= '3.7'", - "version": "==2.0.27" + "version": "==2.0.28" }, "st-annotated-text": { "hashes": [ @@ -3790,6 +3772,7 @@ "sha256:ed9e29eb94cd493b36bca9cb0b1fd7f06a0688215ad1e4b3ab4931726e0ec092", "sha256:f1b90ac61f862634039265cd0f746cc9879feee03ff962c803486301b778714b" ], + "index": "pypi", "markers": "python_full_version >= '3.8.0'", "version": "==2.2.1" }, diff --git a/README.md b/README.md index 23180dd..76df43f 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,7 @@ ![local-rag-demo](demo.gif) +[![OpenSSF Best Practices](https://www.bestpractices.dev/projects/8588/badge)](https://www.bestpractices.dev/projects/8588) ![GitHub commit activity](https://img.shields.io/github/commit-activity/t/jonfairbanks/local-rag) ![GitHub last commit](https://img.shields.io/github/last-commit/jonfairbanks/local-rag) ![GitHub License](https://img.shields.io/github/license/jonfairbanks/local-rag) @@ -13,8 +14,12 @@ Ingest files for retrieval augmented generation (RAG) with open-source Large Lan Features: - Offline Embeddings & LLMs Support (No OpenAI!) +- Support for Multiple Sources + - Local Files + - GitHub Repos + - Websites - Streaming Responses -- Conversation Memory +- Conversational Memory - Chat Export Learn More: diff --git a/components/page_config.py b/components/page_config.py index 489436d..fce389b 100644 --- a/components/page_config.py +++ b/components/page_config.py @@ -6,20 +6,21 @@ def set_page_config(): page_title="Local RAG", page_icon="📚", layout="wide", - initial_sidebar_state=st.session_state['sidebar_state'], + initial_sidebar_state=st.session_state["sidebar_state"], menu_items={ - 'Get Help': 'https://github.com/jonfairbanks/local-rag/discussions', - 'Report a bug': "https://github.com/jonfairbanks/local-rag/issues", - } + "Get Help": "https://github.com/jonfairbanks/local-rag/discussions", + "Report a bug": "https://github.com/jonfairbanks/local-rag/issues", + }, ) # Remove the Streamlit `Deploy` button from the Header st.markdown( - r""" + r""" - """, unsafe_allow_html=True + """, + unsafe_allow_html=True, ) diff --git a/components/page_state.py b/components/page_state.py index 850b4da..0022905 100644 --- a/components/page_state.py +++ b/components/page_state.py @@ -11,8 +11,8 @@ def set_initial_state(): # General # ########### - if 'sidebar_state' not in st.session_state: - st.session_state['sidebar_state'] = 'expanded' + if "sidebar_state" not in st.session_state: + st.session_state["sidebar_state"] = "expanded" if "ollama_endpoint" not in st.session_state: st.session_state["ollama_endpoint"] = "http://localhost:11434" @@ -30,10 +30,14 @@ def set_initial_state(): if "selected_model" not in st.session_state: try: - if("llama2:7b" in st.session_state["ollama_models"]): - st.session_state["selected_model"] = "llama2:7b" # Default to llama2:7b on initial load + if "llama2:7b" in st.session_state["ollama_models"]: + st.session_state["selected_model"] = ( + "llama2:7b" # Default to llama2:7b on initial load + ) else: - st.session_state["selected_model"] = st.session_state["ollama_models"][0] # If llama2:7b is not present, select the first model available + st.session_state["selected_model"] = st.session_state["ollama_models"][ + 0 + ] # If llama2:7b is not present, select the first model available except Exception: st.session_state["selected_model"] = None pass diff --git a/components/tabs/local_files.py b/components/tabs/local_files.py index d9a50c9..8ea175d 100644 --- a/components/tabs/local_files.py +++ b/components/tabs/local_files.py @@ -22,6 +22,7 @@ "txt", ) + def local_files(): # Force users to confirm Settings before uploading files if st.session_state["selected_model"] is not None: diff --git a/components/tabs/website.py b/components/tabs/website.py index 562686b..a97ca38 100644 --- a/components/tabs/website.py +++ b/components/tabs/website.py @@ -5,6 +5,7 @@ from llama_index.readers.web import SimpleWebPageReader from urllib.parse import urlparse + def ensure_https(url): parsed = urlparse(url) @@ -13,6 +14,7 @@ def ensure_https(url): return url + def website(): # if st.session_state["selected_model"] is not None: # st.text_input( @@ -44,31 +46,29 @@ def website(): # st.write(css_example, unsafe_allow_html=True) - - st.write("Enter a Website") - col1, col2 = st.columns([1,.2]) + col1, col2 = st.columns([1, 0.2]) with col1: new_website = st.text_input("Enter a Website", label_visibility="collapsed") with col2: - add_button = st.button(u"➕") + add_button = st.button("➕") # If the add button is clicked, append the new website to our list - if add_button and new_website != '': - st.session_state['websites'].append(ensure_https(new_website)) - st.session_state['websites'] = sorted(set(st.session_state['websites'])) - - if st.session_state['websites'] != []: + if add_button and new_website != "": + st.session_state["websites"].append(ensure_https(new_website)) + st.session_state["websites"] = sorted(set(st.session_state["websites"])) + + if st.session_state["websites"] != []: st.markdown(f"
Website(s)
", unsafe_allow_html=True) - for site in st.session_state['websites']: + for site in st.session_state["websites"]: st.caption(f"- {site}") st.write("") process_button = st.button("Process", key="process_website") - + if process_button: documents = SimpleWebPageReader(html_to_text=True).load_data( - st.session_state['websites'] + st.session_state["websites"] ) if len(documents) > 0: diff --git a/docs/setup.md b/docs/setup.md index 307b24f..6882234 100644 --- a/docs/setup.md +++ b/docs/setup.md @@ -13,3 +13,11 @@ Before you get started with Local RAG, ensure you have: ### Docker - `docker compose up -d` + +#### Note: + +If you are running Ollama as a service, you may need to add an additional configuration to your docker-compose.yml file: +``` +extra_hosts: +- 'host.docker.internal:host-gateway' +``` \ No newline at end of file diff --git a/docs/todo.md b/docs/todo.md index f1e2ead..65dfef1 100644 --- a/docs/todo.md +++ b/docs/todo.md @@ -19,7 +19,7 @@ Although not final, items are generally sorted from highest to lowest priority. - [x] Docker Support - [x] Windows Support - [ ] Extract Metadata and Load into Index -- [ ] Parallelize Document Embeddings +- [x] Faster Document Embeddings (Cuda, Batch Size, ...) - [ ] Swap to OpenAI compatible endpoints - [ ] Allow Usage of Ollama hosted embeddings - [ ] Enable support for additional LLM backends @@ -33,7 +33,6 @@ Although not final, items are generally sorted from highest to lowest priority. - [x] View and Manage Imported Files - [x] About Tab in Sidebar w/ Resources - [x] Enable Caching -- [ ] Swap Repo & Website input to [Streamlit-Tags](https://gagan3012-streamlit-tags-examplesapp-7aiy65.streamlit.app) - [ ] Allow Users to Set LLM Settings - [ ] System Prompt (needs more work) - [x] Chat Mode @@ -55,7 +54,7 @@ Although not final, items are generally sorted from highest to lowest priority. - [x] Refactor README - [x] Implement Log Library - [x] Improve Logging -- [ ] Re-write Docstrings +- [x] Re-write Docstrings - [ ] Tests ### Known Issues & Bugs diff --git a/main.py b/main.py index fdec4f4..cabe030 100644 --- a/main.py +++ b/main.py @@ -15,6 +15,7 @@ def generate_welcome_message(msg): time.sleep(0.025) # This is blocking :( yield char + ### Setup Initial State set_initial_state() diff --git a/utils/helpers.py b/utils/helpers.py index 11f81bc..be08e72 100644 --- a/utils/helpers.py +++ b/utils/helpers.py @@ -111,7 +111,7 @@ def clone_github_repo(repo: str): # Extract File Metadata # ################################### - + def get_file_metadata(file_path): """ @@ -131,4 +131,4 @@ def get_file_metadata(file_path): for d in et.get_metadata(file_path): return json.dumps(d, indent=2) except Exception: - pass \ No newline at end of file + pass diff --git a/utils/llama_index.py b/utils/llama_index.py index fbaafb1..81bb145 100644 --- a/utils/llama_index.py +++ b/utils/llama_index.py @@ -4,10 +4,10 @@ import utils.logs as logs -from numba import cuda -from llama_index.embeddings.huggingface import ( HuggingFaceEmbedding ) +from torch import cuda +from llama_index.embeddings.huggingface import HuggingFaceEmbedding -# This is not used but required by llama-index and must be imported FIRST +# This is not used but required by llama-index and must be set FIRST os.environ["OPENAI_API_KEY"] = "sk-abc123" from llama_index.core import ( @@ -24,6 +24,7 @@ # ################################### + @st.cache_resource(show_spinner=False) def setup_embedding_model( model: str, @@ -43,11 +44,12 @@ def setup_embedding_model( Notes: The `device` parameter can be set to 'cpu' or 'cuda' to specify the device to use for the embedding computations. If 'cuda' is used and CUDA is available, the embedding model will be run on the GPU. Otherwise, it will be run on the CPU. """ - device = 'cpu' if not cuda.is_available() else 'cuda' + device = "cpu" if not cuda.is_available() else "cuda" + logs.log.info(f"Using {device} to generate embeddings") embed_model = HuggingFaceEmbedding( model_name=model, # embed_batch_size=25, // TODO: Turning this on creates chaos, but has the potential to improve performance - device=device + device=device, ) logs.log.info(f"Embedding model created successfully") return embed_model @@ -61,6 +63,7 @@ def setup_embedding_model( # TODO: Migrate to LlamaIndex.Settings: https://docs.llamaindex.ai/en/stable/module_guides/supporting_modules/service_context_migration.html + def create_service_context( llm, # TODO: Determine type system_prompt: str = None, @@ -95,13 +98,13 @@ def create_service_context( service_context = ServiceContext.from_defaults( llm=llm, system_prompt=system_prompt, - embed_model=formatted_embed_model, + embed_model=embedding_model, chunk_size=int(chunk_size), # chunk_overlap=int(chunk_overlap), ) logs.log.info(f"Service Context created successfully") st.session_state["service_context"] = service_context - + # Note: this may be redundant since service_context is returned set_global_service_context(service_context) @@ -143,7 +146,9 @@ def load_documents(data_dir: str): logs.log.error(f"Error creating data index: {err}") finally: for file in os.scandir(data_dir): - if file.is_file() and not file.name.startswith(".gitkeep"): # TODO: Confirm syntax here + if file.is_file() and not file.name.startswith( + ".gitkeep" + ): # TODO: Confirm syntax here os.remove(file.path) logs.log.info(f"Document loading complete; removing local file(s)") @@ -154,6 +159,7 @@ def load_documents(data_dir: str): # ################################### + @st.cache_data(show_spinner=False) def create_index(_documents, _service_context): """ @@ -172,7 +178,7 @@ def create_index(_documents, _service_context): Notes: The `documents` parameter should be a list of strings representing the content of the documents to be indexed. The `service_context` parameter should be an instance of `ServiceContext`, providing information about the Llama model and other configuration settings for the index. """ - + try: index = VectorStoreIndex.from_documents( documents=_documents, service_context=_service_context, show_progress=True @@ -193,7 +199,7 @@ def create_index(_documents, _service_context): ################################### -@st.cache_data(show_spinner=False) +@st.cache_resource(show_spinner=False) def create_query_engine(_documents, _service_context): """ Creates a query engine from the provided documents and service context. diff --git a/utils/logs.py b/utils/logs.py index bb033d1..119f12f 100644 --- a/utils/logs.py +++ b/utils/logs.py @@ -4,7 +4,9 @@ from typing import Union -def setup_logger(log_file: str = "local-rag.log", level: Union[int, str] = logging.INFO): +def setup_logger( + log_file: str = "local-rag.log", level: Union[int, str] = logging.INFO +): """ Sets up a logger for this module. diff --git a/utils/ollama.py b/utils/ollama.py index 9c8b13f..f61f61a 100644 --- a/utils/ollama.py +++ b/utils/ollama.py @@ -61,8 +61,8 @@ def get_models(): - Exception: If there is an error retrieving the list of models. Notes: - This function retrieves a list of available language models from the Ollama server using the `ollama` library. It takes no parameters and returns a list of available language model names. - + This function retrieves a list of available language models from the Ollama server using the `ollama` library. It takes no parameters and returns a list of available language model names. + The function raises an exception if there is an error retrieving the list of models. Side Effects: @@ -80,8 +80,10 @@ def get_models(): if len(models) > 0: logs.log.info("Ollama models loaded successfully") else: - logs.log.warn("Ollama did not return any models. Make sure to download some!") - + logs.log.warn( + "Ollama did not return any models. Make sure to download some!" + ) + return models except Exception as err: logs.log.error(f"Failed to retrieve Ollama model list: {err}") @@ -167,12 +169,12 @@ def context_chat(prompt: str, query_engine: RetrieverQueryEngine): - Exception: If there is an error retrieving answers from the Llama-Index model. Notes: - This function initiates a chat with context using the Llama-Index language model and index. - - It takes two parameters, `prompt` and `query_engine`, which should be the starting prompt for the conversation and the Llama-Index query engine to use for retrieving answers, respectively. - - The function returns an iterable yielding successive chunks of conversation from the Llama-Index index with context. - + This function initiates a chat with context using the Llama-Index language model and index. + + It takes two parameters, `prompt` and `query_engine`, which should be the starting prompt for the conversation and the Llama-Index query engine to use for retrieving answers, respectively. + + The function returns an iterable yielding successive chunks of conversation from the Llama-Index index with context. + If there is an error retrieving answers from the Llama-Index instance, the function raises an exception. Side Effects: diff --git a/utils/rag_pipeline.py b/utils/rag_pipeline.py index 6007af6..80b3d42 100644 --- a/utils/rag_pipeline.py +++ b/utils/rag_pipeline.py @@ -10,6 +10,7 @@ import utils.llama_index as llama_index import utils.logs as logs + def rag_pipeline(uploaded_files: list = None): """ RAG pipeline for Llama-based chatbots. @@ -49,7 +50,6 @@ def rag_pipeline(uploaded_files: list = None): st.caption("✔️ Files Uploaded") - ###################################### # Create Llama-Index service-context # # to use local LLMs and embeddings # @@ -110,7 +110,10 @@ def rag_pipeline(uploaded_files: list = None): ####################################### # if documents already exists in state - if st.session_state["documents"] is not None and len(st.session_state["documents"]) > 0: + if ( + st.session_state["documents"] is not None + and len(st.session_state["documents"]) > 0 + ): logs.log.info("Documents are already available; skipping document loading") st.caption("✔️ Processed File Data") else: @@ -145,13 +148,15 @@ def rag_pipeline(uploaded_files: list = None): # Remove data files # ##################### - if len(st.session_state['file_list']) > 0: + if len(st.session_state["file_list"]) > 0: try: save_dir = os.getcwd() + "/data" shutil.rmtree(save_dir) st.caption("✔️ Removed Temp Files") except Exception as err: - logs.log.warning(f"Unable to delete data files, you may want to clean-up manually: {str(err)}") + logs.log.warning( + f"Unable to delete data files, you may want to clean-up manually: {str(err)}" + ) pass - - return error # If no errors occurred, None is returned \ No newline at end of file + + return error # If no errors occurred, None is returned