You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Could not create share link. Missing file: /usr/local/lib/python3.9/site-packages/gradio/frpc_linux_amd64_v0.2.
Please check your internet connection. This can happen if your antivirus software blocks the download of this file. You can install manually by following these steps:
Rename the downloaded file to: frpc_linux_amd64_v0.2
Move the file to this location: /usr/local/lib/python3.9/site-packages/gradio
Keyboard interruption in main thread... closing server.
llama.cpp: loading model from /app/model/llama-2-7b-chat.ggmlv3.q4_1.bin
llama_model_load_internal: format = ggjt v3 (latest)
llama_model_load_internal: n_vocab = 32000
llama_model_load_internal: n_ctx = 4000
llama_model_load_internal: n_embd = 4096
llama_model_load_internal: n_mult = 256
llama_model_load_internal: n_head = 32
llama_model_load_internal: n_head_kv = 32
llama_model_load_internal: n_layer = 32
llama_model_load_internal: n_rot = 128
llama_model_load_internal: n_gqa = 1
llama_model_load_internal: rnorm_eps = 1.0e-06
llama_model_load_internal: n_ff = 11008
llama_model_load_internal: freq_base = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype = 3 (mostly Q4_1)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size = 0.08 MB
llama_model_load_internal: mem required = 4561.77 MB (+ 2000.00 MB per state)
llama_new_context_with_model: kv self size = 2000.00 MB
AVX = 1 | AVX2 = 0 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | VSX = 0 |
Llama.generate: prefix-match hit
Traceback (most recent call last):
File "/usr/local/lib/python3.9/site-packages/gradio/routes.py", line 439, in run_predict
output = await app.get_blocks().process_api(
File "/usr/local/lib/python3.9/site-packages/gradio/blocks.py", line 1389, in process_api
result = await self.call_function(
File "/usr/local/lib/python3.9/site-packages/gradio/blocks.py", line 1108, in call_function
prediction = await utils.async_iteration(iterator)
File "/usr/local/lib/python3.9/site-packages/gradio/utils.py", line 347, in async_iteration
return await iterator.anext()
File "/usr/local/lib/python3.9/site-packages/gradio/utils.py", line 340, in anext
return await anyio.to_thread.run_sync(
File "/usr/local/lib/python3.9/site-packages/anyio/to_thread.py", line 33, in run_sync
return await get_asynclib().run_sync_in_worker_thread(
File "/usr/local/lib/python3.9/site-packages/anyio/_backends/_asyncio.py", line 877, in run_sync_in_worker_thread
return await future
File "/usr/local/lib/python3.9/site-packages/anyio/_backends/_asyncio.py", line 807, in run
result = context.run(func, *args)
File "/usr/local/lib/python3.9/site-packages/gradio/utils.py", line 323, in run_sync_iterator_async
return next(iterator)
File "/usr/local/lib/python3.9/site-packages/gradio/utils.py", line 692, in gen_wrapper
yield from f(*args, **kwargs)
File "/app/app.py", line 115, in generate
for response in generator:
File "/app/model.py", line 105, in run
text = str(b_text, encoding="utf-8")
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xe4 in position 0: unexpected end of data
Llama.generate: prefix-match hit
Traceback (most recent call last):
File "/usr/local/lib/python3.9/site-packages/gradio/routes.py", line 439, in run_predict
output = await app.get_blocks().process_api(
File "/usr/local/lib/python3.9/site-packages/gradio/blocks.py", line 1389, in process_api
result = await self.call_function(
File "/usr/local/lib/python3.9/site-packages/gradio/blocks.py", line 1108, in call_function
prediction = await utils.async_iteration(iterator)
File "/usr/local/lib/python3.9/site-packages/gradio/utils.py", line 347, in async_iteration
return await iterator.anext()
File "/usr/local/lib/python3.9/site-packages/gradio/utils.py", line 340, in anext
return await anyio.to_thread.run_sync(
File "/usr/local/lib/python3.9/site-packages/anyio/to_thread.py", line 33, in run_sync
return await get_asynclib().run_sync_in_worker_thread(
File "/usr/local/lib/python3.9/site-packages/anyio/_backends/_asyncio.py", line 877, in run_sync_in_worker_thread
return await future
File "/usr/local/lib/python3.9/site-packages/anyio/_backends/_asyncio.py", line 807, in run
result = context.run(func, *args)
File "/usr/local/lib/python3.9/site-packages/gradio/utils.py", line 323, in run_sync_iterator_async
return next(iterator)
File "/usr/local/lib/python3.9/site-packages/gradio/utils.py", line 692, in gen_wrapper
yield from f(*args, **kwargs)
File "/app/app.py", line 115, in generate
for response in generator:
File "/app/model.py", line 105, in run
text = str(b_text, encoding="utf-8")
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xe4 in position 0: unexpected end of data
Llama.generate: prefix-match hit
Llama.generate: prefix-match hit
Traceback (most recent call last):
File "/usr/local/lib/python3.9/site-packages/gradio/routes.py", line 439, in run_predict
output = await app.get_blocks().process_api(
File "/usr/local/lib/python3.9/site-packages/gradio/blocks.py", line 1389, in process_api
result = await self.call_function(
File "/usr/local/lib/python3.9/site-packages/gradio/blocks.py", line 1108, in call_function
prediction = await utils.async_iteration(iterator)
File "/usr/local/lib/python3.9/site-packages/gradio/utils.py", line 347, in async_iteration
return await iterator.anext()
File "/usr/local/lib/python3.9/site-packages/gradio/utils.py", line 340, in anext
return await anyio.to_thread.run_sync(
File "/usr/local/lib/python3.9/site-packages/anyio/to_thread.py", line 33, in run_sync
return await get_asynclib().run_sync_in_worker_thread(
File "/usr/local/lib/python3.9/site-packages/anyio/_backends/_asyncio.py", line 877, in run_sync_in_worker_thread
return await future
File "/usr/local/lib/python3.9/site-packages/anyio/_backends/_asyncio.py", line 807, in run
result = context.run(func, *args)
File "/usr/local/lib/python3.9/site-packages/gradio/utils.py", line 323, in run_sync_iterator_async
return next(iterator)
File "/usr/local/lib/python3.9/site-packages/gradio/utils.py", line 692, in gen_wrapper
yield from f(*args, **kwargs)
File "/app/app.py", line 115, in generate
for response in generator:
File "/app/model.py", line 105, in run
text = str(b_text, encoding="utf-8")
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xe5 in position 0: unexpected end of data
The text was updated successfully, but these errors were encountered:
I have tested some Chinese Llama2 models in ggml type , and found that in streaming mode, the above issue does seem to exist, however all thing is ok when I turned off the stream mode in the procedure of generating answer. I don't know why, and hope that it's a useful clue. :)
btw. my machine is win10+wsl2+ubuntu 22.04+ nvidia rtx 2080 8GB.
On Macbook CPU inference,run in Docker,user Chinese Error
models GGML:
https://huggingface.co/LinkSoul/Chinese-Llama-2-7b-ggml/blob/main/Chinese-Llama-2-7b.ggmlv3.q4_1.bin
llama.cpp: loading model from /app/model/llama-2-7b-chat.ggmlv3.q4_1.bin
llama_model_load_internal: format = ggjt v3 (latest)
llama_model_load_internal: n_vocab = 32000
llama_model_load_internal: n_ctx = 4000
llama_model_load_internal: n_embd = 4096
llama_model_load_internal: n_mult = 256
llama_model_load_internal: n_head = 32
llama_model_load_internal: n_head_kv = 32
llama_model_load_internal: n_layer = 32
llama_model_load_internal: n_rot = 128
llama_model_load_internal: n_gqa = 1
llama_model_load_internal: rnorm_eps = 1.0e-06
llama_model_load_internal: n_ff = 11008
llama_model_load_internal: freq_base = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype = 3 (mostly Q4_1)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size = 0.08 MB
llama_model_load_internal: mem required = 4537.35 MB (+ 2000.00 MB per state)
llama_new_context_with_model: kv self size = 2000.00 MB
AVX = 1 | AVX2 = 0 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | VSX = 0 |
Llama.generate: prefix-match hit
Llama.generate: prefix-match hit
Llama.generate: prefix-match hit
Llama.generate: prefix-match hit
Llama.generate: prefix-match hit
Llama.generate: prefix-match hit
Llama.generate: prefix-match hit
Llama.generate: prefix-match hit
Running on CPU with llama.cpp.
Caching examples at: '/app/gradio_cached_examples/19'
Caching example 1/5
Caching example 2/5
Caching example 3/5
Caching example 4/5
Caching example 5/5
Caching complete
Running on local URL: http://0.0.0.0:7860
Could not create share link. Missing file: /usr/local/lib/python3.9/site-packages/gradio/frpc_linux_amd64_v0.2.
Please check your internet connection. This can happen if your antivirus software blocks the download of this file. You can install manually by following these steps:
Keyboard interruption in main thread... closing server.
llama.cpp: loading model from /app/model/llama-2-7b-chat.ggmlv3.q4_1.bin
llama_model_load_internal: format = ggjt v3 (latest)
llama_model_load_internal: n_vocab = 32000
llama_model_load_internal: n_ctx = 4000
llama_model_load_internal: n_embd = 4096
llama_model_load_internal: n_mult = 256
llama_model_load_internal: n_head = 32
llama_model_load_internal: n_head_kv = 32
llama_model_load_internal: n_layer = 32
llama_model_load_internal: n_rot = 128
llama_model_load_internal: n_gqa = 1
llama_model_load_internal: rnorm_eps = 1.0e-06
llama_model_load_internal: n_ff = 11008
llama_model_load_internal: freq_base = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype = 3 (mostly Q4_1)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size = 0.08 MB
llama_model_load_internal: mem required = 4561.77 MB (+ 2000.00 MB per state)
llama_new_context_with_model: kv self size = 2000.00 MB
AVX = 1 | AVX2 = 0 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | VSX = 0 |
Llama.generate: prefix-match hit
Traceback (most recent call last):
File "/usr/local/lib/python3.9/site-packages/gradio/routes.py", line 439, in run_predict
output = await app.get_blocks().process_api(
File "/usr/local/lib/python3.9/site-packages/gradio/blocks.py", line 1389, in process_api
result = await self.call_function(
File "/usr/local/lib/python3.9/site-packages/gradio/blocks.py", line 1108, in call_function
prediction = await utils.async_iteration(iterator)
File "/usr/local/lib/python3.9/site-packages/gradio/utils.py", line 347, in async_iteration
return await iterator.anext()
File "/usr/local/lib/python3.9/site-packages/gradio/utils.py", line 340, in anext
return await anyio.to_thread.run_sync(
File "/usr/local/lib/python3.9/site-packages/anyio/to_thread.py", line 33, in run_sync
return await get_asynclib().run_sync_in_worker_thread(
File "/usr/local/lib/python3.9/site-packages/anyio/_backends/_asyncio.py", line 877, in run_sync_in_worker_thread
return await future
File "/usr/local/lib/python3.9/site-packages/anyio/_backends/_asyncio.py", line 807, in run
result = context.run(func, *args)
File "/usr/local/lib/python3.9/site-packages/gradio/utils.py", line 323, in run_sync_iterator_async
return next(iterator)
File "/usr/local/lib/python3.9/site-packages/gradio/utils.py", line 692, in gen_wrapper
yield from f(*args, **kwargs)
File "/app/app.py", line 115, in generate
for response in generator:
File "/app/model.py", line 105, in run
text = str(b_text, encoding="utf-8")
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xe4 in position 0: unexpected end of data
Llama.generate: prefix-match hit
Traceback (most recent call last):
File "/usr/local/lib/python3.9/site-packages/gradio/routes.py", line 439, in run_predict
output = await app.get_blocks().process_api(
File "/usr/local/lib/python3.9/site-packages/gradio/blocks.py", line 1389, in process_api
result = await self.call_function(
File "/usr/local/lib/python3.9/site-packages/gradio/blocks.py", line 1108, in call_function
prediction = await utils.async_iteration(iterator)
File "/usr/local/lib/python3.9/site-packages/gradio/utils.py", line 347, in async_iteration
return await iterator.anext()
File "/usr/local/lib/python3.9/site-packages/gradio/utils.py", line 340, in anext
return await anyio.to_thread.run_sync(
File "/usr/local/lib/python3.9/site-packages/anyio/to_thread.py", line 33, in run_sync
return await get_asynclib().run_sync_in_worker_thread(
File "/usr/local/lib/python3.9/site-packages/anyio/_backends/_asyncio.py", line 877, in run_sync_in_worker_thread
return await future
File "/usr/local/lib/python3.9/site-packages/anyio/_backends/_asyncio.py", line 807, in run
result = context.run(func, *args)
File "/usr/local/lib/python3.9/site-packages/gradio/utils.py", line 323, in run_sync_iterator_async
return next(iterator)
File "/usr/local/lib/python3.9/site-packages/gradio/utils.py", line 692, in gen_wrapper
yield from f(*args, **kwargs)
File "/app/app.py", line 115, in generate
for response in generator:
File "/app/model.py", line 105, in run
text = str(b_text, encoding="utf-8")
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xe4 in position 0: unexpected end of data
Llama.generate: prefix-match hit
Llama.generate: prefix-match hit
Traceback (most recent call last):
File "/usr/local/lib/python3.9/site-packages/gradio/routes.py", line 439, in run_predict
output = await app.get_blocks().process_api(
File "/usr/local/lib/python3.9/site-packages/gradio/blocks.py", line 1389, in process_api
result = await self.call_function(
File "/usr/local/lib/python3.9/site-packages/gradio/blocks.py", line 1108, in call_function
prediction = await utils.async_iteration(iterator)
File "/usr/local/lib/python3.9/site-packages/gradio/utils.py", line 347, in async_iteration
return await iterator.anext()
File "/usr/local/lib/python3.9/site-packages/gradio/utils.py", line 340, in anext
return await anyio.to_thread.run_sync(
File "/usr/local/lib/python3.9/site-packages/anyio/to_thread.py", line 33, in run_sync
return await get_asynclib().run_sync_in_worker_thread(
File "/usr/local/lib/python3.9/site-packages/anyio/_backends/_asyncio.py", line 877, in run_sync_in_worker_thread
return await future
File "/usr/local/lib/python3.9/site-packages/anyio/_backends/_asyncio.py", line 807, in run
result = context.run(func, *args)
File "/usr/local/lib/python3.9/site-packages/gradio/utils.py", line 323, in run_sync_iterator_async
return next(iterator)
File "/usr/local/lib/python3.9/site-packages/gradio/utils.py", line 692, in gen_wrapper
yield from f(*args, **kwargs)
File "/app/app.py", line 115, in generate
for response in generator:
File "/app/model.py", line 105, in run
text = str(b_text, encoding="utf-8")
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xe5 in position 0: unexpected end of data
The text was updated successfully, but these errors were encountered: