From d3d3187e512d7ce93d1184207d6b3a5f2bae762e Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sun, 18 Jun 2023 08:27:29 +0200 Subject: [PATCH] feat: fix CUDA images and update go-llama to use full GPU offloading (#618) Signed-off-by: mudler Co-authored-by: mudler --- .github/workflows/test.yml | 2 +- Dockerfile | 6 +++--- Makefile | 2 +- go.mod | 4 ++-- go.sum | 44 ++++---------------------------------- 5 files changed, 11 insertions(+), 47 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 347333e..a18cd20 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -41,4 +41,4 @@ jobs: - name: Test run: | - make test \ No newline at end of file + CMAKE_ARGS="-DLLAMA_F16C=OFF -DLLAMA_AVX512=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF" make test \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index 2b84b69..854186b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -ARG GO_VERSION=1.20 +ARG GO_VERSION=1.20-bullseye FROM golang:$GO_VERSION as requirements @@ -9,7 +9,7 @@ ARG CUDA_MINOR_VERSION=7 ENV BUILD_TYPE=${BUILD_TYPE} RUN apt-get update && \ - apt-get install -y ca-certificates cmake curl + apt-get install -y ca-certificates cmake curl patch # CuBLAS requirements RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \ @@ -67,4 +67,4 @@ HEALTHCHECK --interval=1m --timeout=10m --retries=10 \ CMD curl -f $HEALTHCHECK_ENDPOINT || exit 1 EXPOSE 8080 -ENTRYPOINT [ "/build/entrypoint.sh" ] +ENTRYPOINT [ "/build/entrypoint.sh" ] \ No newline at end of file diff --git a/Makefile b/Makefile index 75cfaca..ec1760a 100644 --- a/Makefile +++ b/Makefile @@ -3,7 +3,7 @@ GOTEST=$(GOCMD) test GOVET=$(GOCMD) vet BINARY_NAME=local-ai -GOLLAMA_VERSION?=5f1620443a59c5531b5a15a16cd68f600a8437e9 +GOLLAMA_VERSION?=7ad833b67070fd3ec46d838f5e38d21111013f98 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all GPT4ALL_VERSION?=b004c53a7bba182cd4483d95ba9e1f68d8e56da3 GOGGMLTRANSFORMERS_VERSION?=01b8436f44294d0e1267430f9eda4460458cec54 diff --git a/go.mod b/go.mod index d036847..ca0bc00 100644 --- a/go.mod +++ b/go.mod @@ -16,7 +16,7 @@ require ( github.com/imdario/mergo v0.3.16 github.com/mudler/go-stable-diffusion v0.0.0-20230605122230-d89260f598af github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230614000846-8953b7f6a6d0 - github.com/onsi/ginkgo/v2 v2.10.0 + github.com/onsi/ginkgo/v2 v2.11.0 github.com/onsi/gomega v1.27.8 github.com/otiai10/openaigo v1.1.1 github.com/rs/zerolog v1.29.1 @@ -63,7 +63,7 @@ require ( github.com/valyala/tcplisten v1.0.0 // indirect github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 // indirect golang.org/x/net v0.10.0 // indirect - golang.org/x/sys v0.8.0 // indirect + golang.org/x/sys v0.9.0 // indirect golang.org/x/text v0.9.0 // indirect golang.org/x/tools v0.9.3 // indirect ) diff --git a/go.sum b/go.sum index 66c3a02..69b2db7 100644 --- a/go.sum +++ b/go.sum @@ -16,12 +16,6 @@ github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ3 github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/donomii/go-rwkv.cpp v0.0.0-20230606181754-d5f48f6d607a h1:ye/xhplHFjnTd4O9QDDhM/QmuiGiSZaKARug6wNYgWg= -github.com/donomii/go-rwkv.cpp v0.0.0-20230606181754-d5f48f6d607a/go.mod h1:gWy7FIWioqYmYxkaoFyBnaKApeZVrUkHhv9EV9pz4dM= -github.com/donomii/go-rwkv.cpp v0.0.0-20230608182638-fb8b955b0c35 h1:tfN6kA+k4O/oPEcCcZ+XZqyqOk9Z2L8NSHjmR4i6rhc= -github.com/donomii/go-rwkv.cpp v0.0.0-20230608182638-fb8b955b0c35/go.mod h1:gWy7FIWioqYmYxkaoFyBnaKApeZVrUkHhv9EV9pz4dM= -github.com/donomii/go-rwkv.cpp v0.0.0-20230609132458-d2b25a4bb148 h1:rC8T9CXqzB4Gw2RGo7uA7r0bFE8Qvc7ZXOBwIyhQuLM= -github.com/donomii/go-rwkv.cpp v0.0.0-20230609132458-d2b25a4bb148/go.mod h1:gWy7FIWioqYmYxkaoFyBnaKApeZVrUkHhv9EV9pz4dM= github.com/donomii/go-rwkv.cpp v0.0.0-20230614130248-a57bca3031fb h1:ekua5AlHdmz8LaCOyX2bMp+a1cOEzReUEDFr5A1NOjg= github.com/donomii/go-rwkv.cpp v0.0.0-20230614130248-a57bca3031fb/go.mod h1:gWy7FIWioqYmYxkaoFyBnaKApeZVrUkHhv9EV9pz4dM= github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230606002726-57543c169e27 h1:boeMTUUBtnLU8JElZJHXrsUzROJar9/t6vGOFjkrhhI= @@ -46,26 +40,10 @@ github.com/go-openapi/swag v0.19.15 h1:D2NRCBzS9/pEY3gP9Nl8aDqGUcPFrwG2p+CNFrLyr github.com/go-openapi/swag v0.19.15/go.mod h1:QYRuS/SOXUCsnplDa677K7+DxSOj6IPNl/eQntq43wQ= github.com/go-skynet/bloomz.cpp v0.0.0-20230529155654-1834e77b83fa h1:gxr68r/6EWroay4iI81jxqGCDbKotY4+CiwdUkBz2NQ= github.com/go-skynet/bloomz.cpp v0.0.0-20230529155654-1834e77b83fa/go.mod h1:wc0fJ9V04yiYTfgKvE5RUUSRQ5Kzi0Bo4I+U3nNOUuA= -github.com/go-skynet/go-bert.cpp v0.0.0-20230531070950-0548994371f7 h1:hm5rOxRf2Y8zmQTBgtDabLoprYHHQHmZ8ui8i4KQSgU= -github.com/go-skynet/go-bert.cpp v0.0.0-20230531070950-0548994371f7/go.mod h1:55l02IF2kD+LGEH4yXzmPPygeuWiUIo8Nbh/+ZU9cb0= github.com/go-skynet/go-bert.cpp v0.0.0-20230607105116-6069103f54b9 h1:wRGbDwNwPmSzoXVw/HLzXY4blpRvPWg7QW2OA0WKezA= github.com/go-skynet/go-bert.cpp v0.0.0-20230607105116-6069103f54b9/go.mod h1:pXKCpYYXujMeAvgJHU6WoMfvYbr84563+J8+Ebkyr5U= -github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230606131358-bd765bb6f3b3 h1:xgDRCrBU2YmwerI6CY6woFyEsBrzV/4a2/atOBm0aXE= -github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230606131358-bd765bb6f3b3/go.mod h1:/JbU8HZU+tUOp+1bQAeXf3AyRXm+p3UwhccoJwCTI9A= -github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230607102637-dabd6cd7b789 h1:63wTAm/9STwy2LJ7N/F+1jyp/uRoCK6EkBhAan2WLdI= -github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230607102637-dabd6cd7b789/go.mod h1:oNCRtfr+ZHodMQnzRXcwrMKY6RSLMRXThW9WGIR2+FA= github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230610083154-01b8436f4429 h1:9w0Fy1C8fVbqKZO7Pr9NcdiN4/PSKo2OVvIdQ6meRbA= github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230610083154-01b8436f4429/go.mod h1:k/mT/SdGC6UQnNbSzeJDRyJV6kw0GfEFTpH44uTqflA= -github.com/go-skynet/go-llama.cpp v0.0.0-20230607123950-351aa714672f h1:c16pf8uTyaRRQLxR0QKp4q7XDeHXrXGVBHLOgdBtEgc= -github.com/go-skynet/go-llama.cpp v0.0.0-20230607123950-351aa714672f/go.mod h1:ffURxv+McO1SK7mWrNSaWPgTLqEukZNGTU6dn+ocMHg= -github.com/go-skynet/go-llama.cpp v0.0.0-20230608215450-672fb056081d h1:wYy8wst1Z0qP0kDWW5GrJsk89u39lbLvRgTx8uh7ijA= -github.com/go-skynet/go-llama.cpp v0.0.0-20230608215450-672fb056081d/go.mod h1:dUZekEbjnGUjk35v9iTIdmSst/NIDQ9s9Pyo4t1aBQg= -github.com/go-skynet/go-llama.cpp v0.0.0-20230609063927-a9211738b733 h1:k//Emr/uHqZvgghxWdedDVJtTbKz2uz0O/GEqzeU7Kk= -github.com/go-skynet/go-llama.cpp v0.0.0-20230609063927-a9211738b733/go.mod h1:dUZekEbjnGUjk35v9iTIdmSst/NIDQ9s9Pyo4t1aBQg= -github.com/go-skynet/go-llama.cpp v0.0.0-20230609233637-a12ce511c063 h1:TvBL5ppxuRpXfYJkAEjYgy27+aRD8/ls2JUc3JnNNio= -github.com/go-skynet/go-llama.cpp v0.0.0-20230609233637-a12ce511c063/go.mod h1:dUZekEbjnGUjk35v9iTIdmSst/NIDQ9s9Pyo4t1aBQg= -github.com/go-skynet/go-llama.cpp v0.0.0-20230613134423-5f1620443a59 h1:4RnJRwEQ/2Z8i6hhFxyhc9ef5hZP8XDh6/3saCJx+Rs= -github.com/go-skynet/go-llama.cpp v0.0.0-20230613134423-5f1620443a59/go.mod h1:dUZekEbjnGUjk35v9iTIdmSst/NIDQ9s9Pyo4t1aBQg= github.com/go-skynet/go-llama.cpp v0.0.0-20230614112429-a7960253c209 h1:4JNmUNjb1lo7hHZ+Ro680PVoeZ5qvOSofXBfrWMOdQo= github.com/go-skynet/go-llama.cpp v0.0.0-20230614112429-a7960253c209/go.mod h1:dUZekEbjnGUjk35v9iTIdmSst/NIDQ9s9Pyo4t1aBQg= github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI= @@ -113,26 +91,16 @@ github.com/mudler/go-stable-diffusion v0.0.0-20230605122230-d89260f598af h1:XFq6 github.com/mudler/go-stable-diffusion v0.0.0-20230605122230-d89260f598af/go.mod h1:8ufRkpz/S/9ahkaxzZ5i4WMgO9w4InEhuRoT7vK5Rnw= github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e h1:fD57ERR4JtEqsWbfPhv4DMiApHyliiK5xCTNVSPiaAs= github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno= -github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230605194130-266f13aee9d8 h1:7SqRnb44CN9QQtZxdFTTgaSqsWVbtFRrHLbKhrTEXlM= -github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230605194130-266f13aee9d8/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI= -github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230608180830-47fbc0e3092d h1:CtekEOIingzlwzlBySFEuR5PlCKmHylBr7F42y2erUQ= -github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230608180830-47fbc0e3092d/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI= -github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230609204846-d3ba1295a764 h1:graL+iULNQEEGS1GK6n6nIPNVzmbQYF42VsNWadFRcY= -github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230609204846-d3ba1295a764/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI= -github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230610141538-a9c2f473032f h1:Byfiqc4T4Tn0TT5QAazz59Tw7FwTsfOjKCsY+P3+CTc= -github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230610141538-a9c2f473032f/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI= github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230614000846-8953b7f6a6d0 h1:+QXKRNwzKyJvcJoH6tcCF0KhcG5aBbpLUquSJxdTRCU= github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230614000846-8953b7f6a6d0/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI= -github.com/onsi/ginkgo/v2 v2.9.7 h1:06xGQy5www2oN160RtEZoTvnP2sPhEfePYmCDc2szss= -github.com/onsi/ginkgo/v2 v2.9.7/go.mod h1:cxrmXWykAwTwhQsJOPfdIDiJ+l2RYq7U8hFU+M/1uw0= github.com/onsi/ginkgo/v2 v2.10.0 h1:sfUl4qgLdvkChZrWCYndY2EAu9BRIw1YphNAzy1VNWs= github.com/onsi/ginkgo/v2 v2.10.0/go.mod h1:UDQOh5wbQUlMnkLfVaIUMtQ1Vus92oM+P2JX1aulgcE= +github.com/onsi/ginkgo/v2 v2.11.0 h1:WgqUCUt/lT6yXoQ8Wef0fsNn5cAuMK7+KT9UFRz2tcU= +github.com/onsi/ginkgo/v2 v2.11.0/go.mod h1:ZhrRA5XmEE3x3rhlzamx/JJvujdZoJ2uvgI7kR0iZvM= github.com/onsi/gomega v1.27.8 h1:gegWiwZjBsf2DgiSbf5hpokZ98JVDMcWkUiigk6/KXc= github.com/onsi/gomega v1.27.8/go.mod h1:2J8vzI/s+2shY9XHRApDkdgPo1TKT7P2u6fXeJKFnNQ= github.com/otiai10/mint v1.5.1 h1:XaPLeE+9vGbuyEHem1JNk3bYc7KKqyI/na0/mLd/Kks= github.com/otiai10/mint v1.5.1/go.mod h1:MJm72SBthJjz8qhefc4z1PYEieWmy8Bku7CjcAqyUSM= -github.com/otiai10/openaigo v1.1.0 h1:zRvGBqZUW5PCMgdkJNsPVTBd8tOLCMTipXE5wD2pdTg= -github.com/otiai10/openaigo v1.1.0/go.mod h1:792bx6AWTS61weDi2EzKpHHnTF4eDMAlJ5GvAk/mgPg= github.com/otiai10/openaigo v1.1.1 h1:ZGL13vSYA1WF/9g18JEzfEuyC+MscGyrtPMsilebOY0= github.com/otiai10/openaigo v1.1.1/go.mod h1:792bx6AWTS61weDi2EzKpHHnTF4eDMAlJ5GvAk/mgPg= github.com/philhofer/fwd v1.1.1/go.mod h1:gk3iGcWd9+svBvR0sR+KPcfE+RNWozjowpeBVG3ZVNU= @@ -148,8 +116,6 @@ github.com/rs/zerolog v1.29.1 h1:cO+d60CHkknCbvzEWxP0S9K6KqyTjrCNUy1LdQLCGPc= github.com/rs/zerolog v1.29.1/go.mod h1:Le6ESbR7hc+DP6Lt1THiV8CQSdkkNrd3R0XbEgp3ZBU= github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= -github.com/sashabaranov/go-openai v1.10.0 h1:uUD3EOKDdGa6geMVbe2Trj9/ckF9sCV5jpQM19f7GM8= -github.com/sashabaranov/go-openai v1.10.0/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg= github.com/sashabaranov/go-openai v1.10.1 h1:6WyHJaNzF266VaEEuW6R4YW+Ei0wpMnqRYPGK7fhuhQ= github.com/sashabaranov/go-openai v1.10.1/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg= github.com/savsgio/dictpool v0.0.0-20221023140959-7bf2e61cea94 h1:rmMl4fXJhKMNWl+K+r/fq4FbbKI+Ia2m9hYBLm2h4G4= @@ -167,8 +133,6 @@ github.com/swaggo/swag v1.16.1/go.mod h1:9/LMvHycG3NFHfR6LwvikHv5iFvmPADQ359cKik github.com/tinylib/msgp v1.1.6/go.mod h1:75BAfg2hauQhs3qedfdDZmWAPcFMAvJE5b9rGOMufyw= github.com/tinylib/msgp v1.1.8 h1:FCXC1xanKO4I8plpHGH2P7koL/RzZs12l/+r7vakfm0= github.com/tinylib/msgp v1.1.8/go.mod h1:qkpG+2ldGg4xRFmx+jfTvZPxfGFhi64BcnL9vkCm/Tw= -github.com/tmc/langchaingo v0.0.0-20230605114752-4afed6d7be4a h1:YtKJTKbM3qu60+ZxLtyeCl0RvdG7LKbyF8TT7nzV6Gg= -github.com/tmc/langchaingo v0.0.0-20230605114752-4afed6d7be4a/go.mod h1:6l1WoyqVDwkv7cFlY3gfcTv8yVowVyuutKv8PGlQCWI= github.com/tmc/langchaingo v0.0.0-20230610024316-06cb7b57ea80 h1:Y+a76dNVbdWduw3gznOr2O2OSZkdwDRYPKTDpG/vM9I= github.com/tmc/langchaingo v0.0.0-20230610024316-06cb7b57ea80/go.mod h1:6l1WoyqVDwkv7cFlY3gfcTv8yVowVyuutKv8PGlQCWI= github.com/urfave/cli/v2 v2.25.5 h1:d0NIAyhh5shGscroL7ek/Ya9QYQE0KNabJgiUinIQkc= @@ -220,6 +184,8 @@ golang.org/x/sys v0.3.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.8.0 h1:EBmGv8NaZBZTWvrbjNoL6HVt+IVy3QDQpJs7VRIw3tU= golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.9.0 h1:KS/R3tvhPqvJvwcKfnBHJwwthS11LRhmM5D59eEXa0s= +golang.org/x/sys v0.9.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.3.0/go.mod h1:q750SLmJuPmVoN1blW3UFBPREJfb1KmY3vwxfr+nFDA= @@ -235,8 +201,6 @@ golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtn golang.org/x/tools v0.0.0-20201022035929-9cf592e881e9/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= golang.org/x/tools v0.4.0/go.mod h1:UE5sM2OK9E/d67R0ANs2xJizIymRP5gJU295PvKXxjQ= -golang.org/x/tools v0.9.1 h1:8WMNJAz3zrtPmnYC7ISf5dEn3MT0gY7jBJfw27yrrLo= -golang.org/x/tools v0.9.1/go.mod h1:owI94Op576fPu3cIGQeHs3joujW/2Oc6MtlxbF5dfNc= golang.org/x/tools v0.9.3 h1:Gn1I8+64MsuTb/HpH+LmQtNas23LhUVr3rYZ0eKuaMM= golang.org/x/tools v0.9.3/go.mod h1:owI94Op576fPu3cIGQeHs3joujW/2Oc6MtlxbF5dfNc= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=