When I try to start llama-gpt api using docker-compose-gguf.yml, I get a bunch o…f errors (see below) on the api containers.
I've also tried with different .gguf models, but still seeing the same errors.
Any idea what is causing the errors?
Thanks
```
lama-gpt-api_llama-gpt-api_1 exited with code 1
llama-gpt-ui_1 | [INFO wait] Host [llama-gpt-api:8000] not yet available...
llama-gpt-ui_1 | [INFO wait] Host [llama-gpt-api:8000] not yet available...
llama-gpt-ui_1 | [INFO wait] Host [llama-gpt-api:8000] not yet available...
llama-gpt-ui_1 | [INFO wait] Host [llama-gpt-api:8000] not yet available...
llama-gpt-api_1 | /usr/local/lib/python3.11/site-packages/setuptools/command/develop.py:40: EasyInstallDeprecationWarning: easy_install command is deprecated.
llama-gpt-api_1 | !!
llama-gpt-api_1 |
llama-gpt-api_1 | ********************************************************************************
llama-gpt-api_1 | Please avoid running ``setup.py`` and ``easy_install``.
llama-gpt-api_1 | Instead, use pypa/build, pypa/installer or other
llama-gpt-api_1 | standards-based tools.
llama-gpt-api_1 |
llama-gpt-api_1 | See https://github.com/pypa/setuptools/issues/917 for details.
llama-gpt-api_1 | ********************************************************************************
llama-gpt-api_1 |
llama-gpt-api_1 | !!
llama-gpt-api_1 | easy_install.initialize_options(self)
llama-gpt-ui_1 | [INFO wait] Host [llama-gpt-api:8000] not yet available...
llama-gpt-api_1 | [1/2] Generating /app/vendor/llama.cpp/libllama.so
llama-gpt-api_1 | FAILED: /app/vendor/llama.cpp/libllama.so
llama-gpt-api_1 | cd /app/vendor/llama.cpp && make libllama.so
llama-gpt-api_1 | make[1]: Entering directory '/app/vendor/llama.cpp'
llama-gpt-api_1 | I llama.cpp build info:
llama-gpt-api_1 | I UNAME_S: Linux
llama-gpt-api_1 | I UNAME_P: unknown
llama-gpt-api_1 | I UNAME_M: x86_64
llama-gpt-api_1 | I CFLAGS: -I. -O3 -std=c11 -fPIC -DNDEBUG -Wall -Wextra -Wpedantic -Wcast-qual -Wdouble-promotion -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes -pthread -march=native -mtune=native -DGGML_USE_K_QUANTS
llama-gpt-api_1 | I CXXFLAGS: -I. -I./common -O3 -std=c++11 -fPIC -DNDEBUG -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-multichar -pthread -march=native -mtune=native -DGGML_USE_K_QUANTS
llama-gpt-api_1 | I LDFLAGS:
llama-gpt-api_1 | I CC: cc (Debian 10.2.1-6) 10.2.1 20210110
llama-gpt-api_1 | I CXX: g++ (Debian 10.2.1-6) 10.2.1 20210110
llama-gpt-api_1 |
llama-gpt-api_1 | cc -I. -O3 -std=c11 -fPIC -DNDEBUG -Wall -Wextra -Wpedantic -Wcast-qual -Wdouble-promotion -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes -pthread -march=native -mtune=native -DGGML_USE_K_QUANTS -c ggml.c -o ggml.o
llama-gpt-api_1 | In file included from /usr/lib/gcc/x86_64-linux-gnu/10/include/immintrin.h:111,
llama-gpt-api_1 | from ggml.c:302:
llama-gpt-api_1 | ggml.c: In function ‘ggml_vec_dot_q4_0_q8_0’:
llama-gpt-api_1 | /usr/lib/gcc/x86_64-linux-gnu/10/include/fmaintrin.h:63:1: error: inlining failed in call to ‘always_inline’ ‘_mm256_fmadd_ps’: target specific option mismatch
llama-gpt-api_1 | 63 | _mm256_fmadd_ps (__m256 __A, __m256 __B, __m256 __C)
llama-gpt-api_1 | | ^~~~~~~~~~~~~~~
llama-gpt-api_1 | ggml.c:2527:15: note: called from here
llama-gpt-api_1 | 2527 | acc = _mm256_fmadd_ps( d, q, acc );
llama-gpt-api_1 | | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~
llama-gpt-api_1 | In file included from /usr/lib/gcc/x86_64-linux-gnu/10/include/immintrin.h:111,
llama-gpt-api_1 | from ggml.c:302:
llama-gpt-api_1 | /usr/lib/gcc/x86_64-linux-gnu/10/include/fmaintrin.h:63:1: error: inlining failed in call to ‘always_inline’ ‘_mm256_fmadd_ps’: target specific option mismatch
llama-gpt-api_1 | 63 | _mm256_fmadd_ps (__m256 __A, __m256 __B, __m256 __C)
llama-gpt-api_1 | | ^~~~~~~~~~~~~~~
llama-gpt-api_1 | ggml.c:2527:15: note: called from here
llama-gpt-api_1 | 2527 | acc = _mm256_fmadd_ps( d, q, acc );
llama-gpt-api_1 | | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~
llama-gpt-api_1 | In file included from /usr/lib/gcc/x86_64-linux-gnu/10/include/immintrin.h:111,
llama-gpt-api_1 | from ggml.c:302:
llama-gpt-api_1 | /usr/lib/gcc/x86_64-linux-gnu/10/include/fmaintrin.h:63:1: error: inlining failed in call to ‘always_inline’ ‘_mm256_fmadd_ps’: target specific option mismatch
llama-gpt-api_1 | 63 | _mm256_fmadd_ps (__m256 __A, __m256 __B, __m256 __C)
llama-gpt-api_1 | | ^~~~~~~~~~~~~~~
llama-gpt-api_1 | ggml.c:2527:15: note: called from here
llama-gpt-api_1 | 2527 | acc = _mm256_fmadd_ps( d, q, acc );
llama-gpt-api_1 | | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~
llama-gpt-api_1 | In file included from /usr/lib/gcc/x86_64-linux-gnu/10/include/immintrin.h:111,
llama-gpt-api_1 | from ggml.c:302:
llama-gpt-api_1 | /usr/lib/gcc/x86_64-linux-gnu/10/include/fmaintrin.h:63:1: error: inlining failed in call to ‘always_inline’ ‘_mm256_fmadd_ps’: target specific option mismatch
llama-gpt-api_1 | 63 | _mm256_fmadd_ps (__m256 __A, __m256 __B, __m256 __C)
llama-gpt-api_1 | | ^~~~~~~~~~~~~~~
llama-gpt-api_1 | ggml.c:2527:15: note: called from here
llama-gpt-api_1 | 2527 | acc = _mm256_fmadd_ps( d, q, acc );
llama-gpt-api_1 | | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~
llama-gpt-api_1 | make[1]: *** [Makefile:349: ggml.o] Error 1
llama-gpt-api_1 | make[1]: Leaving directory '/app/vendor/llama.cpp'
llama-gpt-api_1 | ninja: build stopped: subcommand failed.
llama-gpt-api_1 | Traceback (most recent call last):
llama-gpt-api_1 | File "/usr/local/lib/python3.11/site-packages/skbuild/setuptools_wrap.py", line 674, in setup
llama-gpt-api_1 | cmkr.make(make_args, install_target=cmake_install_target, env=env)
llama-gpt-api_1 | File "/usr/local/lib/python3.11/site-packages/skbuild/cmaker.py", line 697, in make
llama-gpt-api_1 | self.make_impl(clargs=clargs, config=config, source_dir=source_dir, install_target=install_target, env=env)
llama-gpt-api_1 | File "/usr/local/lib/python3.11/site-packages/skbuild/cmaker.py", line 742, in make_impl
llama-gpt-api_1 | raise SKBuildError(msg)
llama-gpt-api_1 |
llama-gpt-api_1 | An error occurred while building with CMake.
llama-gpt-api_1 | Command:
llama-gpt-api_1 | /usr/local/lib/python3.11/site-packages/cmake/data/bin/cmake --build . --target install --config Release --
llama-gpt-api_1 | Install target:
llama-gpt-api_1 | install
llama-gpt-api_1 | Source directory:
llama-gpt-api_1 | /app
llama-gpt-api_1 | Working directory:
llama-gpt-api_1 | /app/_skbuild/linux-x86_64-3.11/cmake-build
llama-gpt-api_1 | Please check the install target is valid and see CMake's output for more information.
llama-gpt-api_1 |
llama-gpt-api_1 | make: *** [Makefile:9: build] Error 1
llama-gpt-api_1 | Initializing server with:
llama-gpt-api_1 | Batch size: 2096
llama-gpt-api_1 | Number of CPU threads: 4
llama-gpt-api_1 | Number of GPU layers: 0
llama-gpt-api_1 | Context window: 4096
llama-gpt-api_1 | Traceback (most recent call last):
llama-gpt-api_1 | File "<frozen runpy>", line 189, in _run_module_as_main
llama-gpt-api_1 | File "<frozen runpy>", line 112, in _get_module_details
llama-gpt-api_1 | File "/app/llama_cpp/__init__.py", line 1, in <module>
llama-gpt-api_1 | from .llama_cpp import *
llama-gpt-api_1 | File "/app/llama_cpp/llama_cpp.py", line 80, in <module>
llama-gpt-api_1 | _lib = _load_shared_library(_lib_base_name)
llama-gpt-api_1 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
llama-gpt-api_1 | File "/app/llama_cpp/llama_cpp.py", line 71, in _load_shared_library
llama-gpt-api_1 | raise FileNotFoundError(
llama-gpt-api_1 | FileNotFoundError: Shared library with base name 'llama' not found
llama-gpt-api_llama-gpt-api_1 exited with code 1
```
docker-compose.yaml:
```
version: '3.6'
services:
llama-gpt-api:
# Pin to llama-cpp-python 0.1.80 with GGUF support
image: ghcr.io/abetlen/llama-cpp-python:latest@sha256:de0fd227f348b5e43d4b5b7300f1344e712c14132914d1332182e9ecfde502b2
restart: on-failure
volumes:
- './models:/models'
- './api:/api'
ports:
- 3001:8000
environment:
MODEL: '/models/${MODEL_NAME:-code-llama-2-7b-chat.gguf}'
MODEL_DOWNLOAD_URL: '${MODEL_DOWNLOAD_URL:-https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q4_K_M.gguf}'
N_GQA: '${N_GQA:-1}'
USE_MLOCK: 1
cap_add:
- IPC_LOCK
command: '/bin/sh /api/run.sh'
llama-gpt-ui:
# TODO: Use this image instead of building from source after the next release
# image: 'ghcr.io/getumbrel/llama-gpt-ui:latest'
build:
context: ./ui
dockerfile: Dockerfile
ports:
- 3002:3000
restart: on-failure
environment:
- 'OPENAI_API_KEY=sk-XXXXXXXXXXXXXXXXXXXX'
- 'OPENAI_API_HOST=http://llama-gpt-api:8000'
- 'DEFAULT_MODEL=/models/${MODEL_NAME:-llama-2-7b-chat.bin}'
- 'NEXT_PUBLIC_DEFAULT_SYSTEM_PROMPT=${DEFAULT_SYSTEM_PROMPT:-"You are a helpful and friendly AI assistant. Respond very concisely."}'
- 'WAIT_HOSTS=llama-gpt-api:8000'
- 'WAIT_TIMEOUT=${WAIT_TIMEOUT:-3600}'
```