From 9f426578cfe7b7bb6b2fb52bb21f6762783672ad Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 9 May 2023 11:43:50 +0200 Subject: [PATCH] feat: add transcript endpoint (#211) --- .github/workflows/bump_deps.yaml | 3 ++ Makefile | 19 +++++-- README.md | 25 +++++++++ api/api.go | 2 + api/openai.go | 50 +++++++++++++++++ go.mod | 5 ++ go.sum | 23 ++++---- pkg/whisper/whisper.go | 93 ++++++++++++++++++++++++++++++++ 8 files changed, 205 insertions(+), 15 deletions(-) create mode 100644 pkg/whisper/whisper.go diff --git a/.github/workflows/bump_deps.yaml b/.github/workflows/bump_deps.yaml index c75b0a9..aaa5671 100644 --- a/.github/workflows/bump_deps.yaml +++ b/.github/workflows/bump_deps.yaml @@ -21,6 +21,9 @@ jobs: - repository: "donomii/go-rwkv.cpp" variable: "RWKV_VERSION" branch: "main" + - repository: "ggerganov/whisper.cpp" + variable: "WHISPER_CPP_VERSION" + branch: "master" runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 diff --git a/Makefile b/Makefile index c4b1f56..bffb161 100644 --- a/Makefile +++ b/Makefile @@ -8,6 +8,8 @@ GOGPT4ALLJ_VERSION?=1f7bff57f66cb7062e40d0ac3abd2217815e5109 GOGPT2_VERSION?=245a5bfe6708ab80dc5c733dcdbfbe3cfd2acdaa RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp RWKV_VERSION?=af62fcc432be2847acb6e0688b2c2491d6588d58 +WHISPER_CPP_VERSION?=bf2449dfae35a46b2cd92ab22661ce81a48d4993 + GREEN := $(shell tput -Txterm setaf 2) YELLOW := $(shell tput -Txterm setaf 3) @@ -15,8 +17,8 @@ WHITE := $(shell tput -Txterm setaf 7) CYAN := $(shell tput -Txterm setaf 6) RESET := $(shell tput -Txterm sgr0) -C_INCLUDE_PATH=$(shell pwd)/go-llama:$(shell pwd)/go-gpt4all-j:$(shell pwd)/go-gpt2:$(shell pwd)/go-rwkv -LIBRARY_PATH=$(shell pwd)/go-llama:$(shell pwd)/go-gpt4all-j:$(shell pwd)/go-gpt2:$(shell pwd)/go-rwkv +C_INCLUDE_PATH=$(shell pwd)/go-llama:$(shell pwd)/go-gpt4all-j:$(shell pwd)/go-gpt2:$(shell pwd)/go-rwkv:$(shell pwd)/whisper.cpp +LIBRARY_PATH=$(shell pwd)/go-llama:$(shell pwd)/go-gpt4all-j:$(shell pwd)/go-gpt2:$(shell pwd)/go-rwkv:$(shell pwd)/whisper.cpp # Use this if you want to set the default behavior ifndef BUILD_TYPE @@ -76,6 +78,13 @@ go-gpt2: go-gpt2/libgpt2.a: go-gpt2 $(MAKE) -C go-gpt2 $(GENERIC_PREFIX)libgpt2.a +whisper.cpp: + git clone https://github.com/ggerganov/whisper.cpp.git + cd whisper.cpp && git checkout -b build $(WHISPER_CPP_VERSION) && git submodule update --init --recursive --depth 1 + +whisper.cpp/libwhisper.a: whisper.cpp + cd whisper.cpp && make libwhisper.a + go-llama: git clone --recurse-submodules https://github.com/go-skynet/go-llama.cpp go-llama cd go-llama && git checkout -b build $(GOLLAMA_VERSION) && git submodule update --init --recursive --depth 1 @@ -88,8 +97,9 @@ replace: $(GOCMD) mod edit -replace github.com/go-skynet/go-gpt4all-j.cpp=$(shell pwd)/go-gpt4all-j $(GOCMD) mod edit -replace github.com/go-skynet/go-gpt2.cpp=$(shell pwd)/go-gpt2 $(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(shell pwd)/go-rwkv + $(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(shell pwd)/whisper.cpp -prepare-sources: go-llama go-gpt2 go-gpt4all-j go-rwkv +prepare-sources: go-llama go-gpt2 go-gpt4all-j go-rwkv whisper.cpp $(GOCMD) mod download ## GENERIC @@ -98,9 +108,10 @@ rebuild: ## Rebuilds the project $(MAKE) -C go-gpt4all-j clean $(MAKE) -C go-gpt2 clean $(MAKE) -C go-rwkv clean + $(MAKE) -C whisper.cpp clean $(MAKE) build -prepare: prepare-sources go-llama/libbinding.a go-gpt4all-j/libgptj.a go-gpt2/libgpt2.a go-rwkv/librwkv.a replace ## Prepares for building +prepare: prepare-sources go-llama/libbinding.a go-gpt4all-j/libgptj.a go-gpt2/libgpt2.a go-rwkv/librwkv.a whisper.cpp/libwhisper.a replace ## Prepares for building clean: ## Remove build related file rm -fr ./go-llama diff --git a/README.md b/README.md index 1f40d95..d252b53 100644 --- a/README.md +++ b/README.md @@ -549,6 +549,31 @@ Note: embeddings is supported only with `llama.cpp` compatible models. (doesn't +### Transcriptions endpoint + +
+ +Note: requires ffmpeg in the container image, which is currently not shipped due to licensing issues. We will prepare separated images with ffmpeg. (stay tuned!) + +Download one of the models from https://huggingface.co/ggerganov/whisper.cpp/tree/main in the `models` folder, and create a YAML file for your model: + +```yaml +name: whisper-1 +parameters: + model: whisper-en +``` + +The transcriptions endpoint then can be tested like so: +``` +wget --quiet --show-progress -O gb1.ogg https://upload.wikimedia.org/wikipedia/commons/1/1f/George_W_Bush_Columbia_FINAL.ogg + +curl http://localhost:8080/v1/audio/transcriptions -H "Content-Type: multipart/form-data" -F file="@$PWD/gb1.ogg" -F model="whisper-1" + +{"text":"My fellow Americans, this day has brought terrible news and great sadness to our country.At nine o'clock this morning, Mission Control in Houston lost contact with our Space ShuttleColumbia.A short time later, debris was seen falling from the skies above Texas.The Columbia's lost.There are no survivors.One board was a crew of seven.Colonel Rick Husband, Lieutenant Colonel Michael Anderson, Commander Laurel Clark, Captain DavidBrown, Commander William McCool, Dr. Kultna Shavla, and Elon Ramon, a colonel in the IsraeliAir Force.These men and women assumed great risk in the service to all humanity.In an age when spaceflight has come to seem almost routine, it is easy to overlook thedangers of travel by rocket and the difficulties of navigating the fierce outer atmosphere ofthe Earth.These astronauts knew the dangers, and they faced them willingly, knowing they had a highand noble purpose in life.Because of their courage and daring and idealism, we will miss them all the more.All Americans today are thinking as well of the families of these men and women who havebeen given this sudden shock and grief.You're not alone.Our entire nation agrees with you, and those you loved will always have the respect andgratitude of this country.The cause in which they died will continue.Mankind has led into the darkness beyond our world by the inspiration of discovery andthe longing to understand.Our journey into space will go on.In the skies today, we saw destruction and tragedy.As farther than we can see, there is comfort and hope.In the words of the prophet Isaiah, \"Lift your eyes and look to the heavens who createdall these, he who brings out the starry hosts one by one and calls them each by name.\"Because of his great power and mighty strength, not one of them is missing.The same creator who names the stars also knows the names of the seven souls we mourntoday.The crew of the shuttle Columbia did not return safely to Earth yet we can pray that all aresafely home.May God bless the grieving families and may God continue to bless America.[BLANK_AUDIO]"} +``` + +
+ ## Frequently asked questions Here are answers to some of the most common questions. diff --git a/api/api.go b/api/api.go index 7994ff2..a855534 100644 --- a/api/api.go +++ b/api/api.go @@ -84,6 +84,8 @@ func App(configFile string, loader *model.ModelLoader, threads, ctxSize int, f16 app.Post("/v1/engines/:model/embeddings", embeddingsEndpoint(cm, debug, loader, threads, ctxSize, f16)) + app.Post("/v1/audio/transcriptions", transcriptEndpoint(cm, debug, loader, threads, ctxSize, f16)) + app.Get("/v1/models", listModels(loader, cm)) app.Get("/models", listModels(loader, cm)) diff --git a/api/openai.go b/api/openai.go index c472b68..6279a69 100644 --- a/api/openai.go +++ b/api/openai.go @@ -5,10 +5,16 @@ import ( "bytes" "encoding/json" "fmt" + "net/http" + "os" + "path" + "path/filepath" "strings" model "github.com/go-skynet/LocalAI/pkg/model" + "github.com/go-skynet/LocalAI/pkg/whisper" "github.com/gofiber/fiber/v2" + "github.com/otiai10/copy" "github.com/rs/zerolog/log" "github.com/valyala/fasthttp" ) @@ -69,6 +75,11 @@ type OpenAIModel struct { type OpenAIRequest struct { Model string `json:"model" yaml:"model"` + // whisper + File string `json:"file" validate:"required"` + ResponseFormat string `json:"response_format"` + Language string `json:"language"` + // Prompt is read only by completion API calls Prompt interface{} `json:"prompt" yaml:"prompt"` @@ -385,6 +396,45 @@ func editEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader, thread } } +// https://platform.openai.com/docs/api-reference/audio/create +func transcriptEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader, threads, ctx int, f16 bool) func(c *fiber.Ctx) error { + return func(c *fiber.Ctx) error { + config, input, err := readConfig(cm, c, loader, debug, threads, ctx, f16) + if err != nil { + return fmt.Errorf("failed reading parameters from request:%w", err) + } + + // retrieve the file data from the request + file, err := c.FormFile("file") + if err != nil { + return c.Status(http.StatusBadRequest).JSON(fiber.Map{"error": err.Error()}) + } + log.Debug().Msgf("Audio file: %+v", file) + + dir, err := os.MkdirTemp("", "whisper") + if err != nil { + return err + } + defer os.RemoveAll(dir) + + dst := filepath.Join(dir, path.Base(file.Filename)) + if err := copy.Copy(file.Filename, dst); err != nil { + return err + } + + log.Debug().Msgf("Audio file copied to: %+v", dst) + + tr, err := whisper.Transcript(filepath.Join(loader.ModelPath, config.Model), dst, input.Language) + if err != nil { + return err + } + + log.Debug().Msgf("Trascribed: %+v", tr) + // TODO: handle different outputs here + return c.Status(http.StatusOK).JSON(fiber.Map{"text": tr}) + } +} + func listModels(loader *model.ModelLoader, cm ConfigMerger) func(ctx *fiber.Ctx) error { return func(c *fiber.Ctx) error { models, err := loader.ListModels() diff --git a/go.mod b/go.mod index 3529a21..0f15231 100644 --- a/go.mod +++ b/go.mod @@ -4,6 +4,8 @@ go 1.19 require ( github.com/donomii/go-rwkv.cpp v0.0.0-20230503112711-af62fcc432be + github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230508180809-bf2449dfae35 + github.com/go-audio/wav v1.1.0 github.com/go-skynet/go-gpt2.cpp v0.0.0-20230422085954-245a5bfe6708 github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230422090028-1f7bff57f66c github.com/go-skynet/go-llama.cpp v0.0.0-20230508165257-c03e8adbc45c @@ -11,6 +13,7 @@ require ( github.com/hashicorp/go-multierror v1.1.1 github.com/onsi/ginkgo/v2 v2.9.4 github.com/onsi/gomega v1.27.6 + github.com/otiai10/copy v1.11.0 github.com/otiai10/openaigo v1.1.0 github.com/rs/zerolog v1.29.1 github.com/sashabaranov/go-openai v1.9.3 @@ -26,6 +29,8 @@ require ( github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 // indirect github.com/andybalholm/brotli v1.0.5 // indirect github.com/cpuguy83/go-md2man/v2 v2.0.2 // indirect + github.com/go-audio/audio v1.0.0 // indirect + github.com/go-audio/riff v1.0.0 // indirect github.com/go-logr/logr v1.2.4 // indirect github.com/go-openapi/jsonpointer v0.19.5 // indirect github.com/go-openapi/jsonreference v0.19.6 // indirect diff --git a/go.sum b/go.sum index 01ce408..55087d0 100644 --- a/go.sum +++ b/go.sum @@ -18,6 +18,14 @@ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/donomii/go-rwkv.cpp v0.0.0-20230503112711-af62fcc432be h1:3Hic97PY6hcw/SY44RuR7kyONkxd744RFeRrqckzwNQ= github.com/donomii/go-rwkv.cpp v0.0.0-20230503112711-af62fcc432be/go.mod h1:gWy7FIWioqYmYxkaoFyBnaKApeZVrUkHhv9EV9pz4dM= +github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230508180809-bf2449dfae35 h1:sMg/SgnMPS/HNUO/2kGm72vl8R9TmNIwgLFr2TNwR3g= +github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230508180809-bf2449dfae35/go.mod h1:QIjZ9OktHFG7p+/m3sMvrAJKKdWrr1fZIK0rM6HZlyo= +github.com/go-audio/audio v1.0.0 h1:zS9vebldgbQqktK4H0lUqWrG8P0NxCJVqcj7ZpNnwd4= +github.com/go-audio/audio v1.0.0/go.mod h1:6uAu0+H2lHkwdGsAY+j2wHPNPpPoeg5AaEFh9FlA+Zs= +github.com/go-audio/riff v1.0.0 h1:d8iCGbDvox9BfLagY94fBynxSPHO80LmZCaOsmKxokA= +github.com/go-audio/riff v1.0.0/go.mod h1:l3cQwc85y79NQFCRB7TiPoNiaijp6q8Z0Uv38rVG498= +github.com/go-audio/wav v1.1.0 h1:jQgLtbqBzY7G+BM8fXF7AHUk1uHUviWS4X39d5rsL2g= +github.com/go-audio/wav v1.1.0/go.mod h1:mpe9qfwbScEbkd8uybLuIpTgHyrISw/OTuvjUW2iGtE= github.com/go-logr/logr v1.2.4 h1:g01GSCwiDw2xSZfjJ2/T9M+S6pFdcNtFYsp+Y43HYDQ= github.com/go-logr/logr v1.2.4/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= github.com/go-openapi/jsonpointer v0.19.3/go.mod h1:Pl9vOtqEWErmShwVjC8pYs9cog34VGT37dQOVbmoatg= @@ -34,18 +42,11 @@ github.com/go-skynet/go-gpt2.cpp v0.0.0-20230422085954-245a5bfe6708 h1:cfOi4TWvQ github.com/go-skynet/go-gpt2.cpp v0.0.0-20230422085954-245a5bfe6708/go.mod h1:1Wj/xbkMfwQSOrhNYK178IzqQHstZbRfhx4s8p1M5VM= github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230422090028-1f7bff57f66c h1:48I7jpLNGiQeBmF0SFVVbREh8vlG0zN13v9LH5ctXis= github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230422090028-1f7bff57f66c/go.mod h1:5VZ9XbcINI0XcHhkcX8GPK8TplFGAzu1Hrg4tNiMCtI= -github.com/go-skynet/go-llama.cpp v0.0.0-20230504223241-67ff6a4db244/go.mod h1:LvSQx5QAYBAMpWkbyVFFDiM1Tzj8LP55DvmUM3hbRMY= -github.com/go-skynet/go-llama.cpp v0.0.0-20230505100647-691d479d3675 h1:plXywr95RghidIHPHl+O/zpcNXenEeS6w/6WftFNr9E= -github.com/go-skynet/go-llama.cpp v0.0.0-20230505100647-691d479d3675/go.mod h1:LvSQx5QAYBAMpWkbyVFFDiM1Tzj8LP55DvmUM3hbRMY= -github.com/go-skynet/go-llama.cpp v0.0.0-20230506193017-cf9b522db638 h1:+7UXkGG+LeqJ5oPBEJo5D73Y2drKOVzrlB8D+iG2PHw= -github.com/go-skynet/go-llama.cpp v0.0.0-20230506193017-cf9b522db638/go.mod h1:DLfsPD7tYYnpksERH83HSf7qVNW3FIwmz7/zfYO0/6I= github.com/go-skynet/go-llama.cpp v0.0.0-20230508165257-c03e8adbc45c h1:JoW2+LKrSemoV32QRwrEC5f53erym96NCsUSM3wSVbM= github.com/go-skynet/go-llama.cpp v0.0.0-20230508165257-c03e8adbc45c/go.mod h1:DLfsPD7tYYnpksERH83HSf7qVNW3FIwmz7/zfYO0/6I= github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI= github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls= github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= -github.com/gofiber/fiber/v2 v2.44.0 h1:Z90bEvPcJM5GFJnu1py0E1ojoerkyew3iiNJ78MQCM8= -github.com/gofiber/fiber/v2 v2.44.0/go.mod h1:VTMtb/au8g01iqvHyaCzftuM/xmZgKOZCtFzz6CdV9w= github.com/gofiber/fiber/v2 v2.45.0 h1:p4RpkJT9GAW6parBSbcNFH2ApnAuW3OzaQzbOCoDu+s= github.com/gofiber/fiber/v2 v2.45.0/go.mod h1:DNl0/c37WLe0g92U6lx1VMQuxGUQY5V7EIaVoEsUffc= github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg= @@ -88,7 +89,9 @@ github.com/onsi/ginkgo/v2 v2.9.4 h1:xR7vG4IXt5RWx6FfIjyAtsoMAtnc3C/rFXBBd2AjZwE= github.com/onsi/ginkgo/v2 v2.9.4/go.mod h1:gCQYp2Q+kSoIj7ykSVb9nskRSsR6PUj4AiLywzIhbKM= github.com/onsi/gomega v1.27.6 h1:ENqfyGeS5AX/rlXDd/ETokDz93u0YufY1Pgxuy/PvWE= github.com/onsi/gomega v1.27.6/go.mod h1:PIQNjfQwkP3aQAH7lf7j87O/5FiNr+ZR8+ipb+qQlhg= -github.com/otiai10/mint v1.4.1 h1:HOVBfKP1oXIc0wWo9hZ8JLdZtyCPWqjvmFDuVZ0yv2Y= +github.com/otiai10/copy v1.11.0 h1:OKBD80J/mLBrwnzXqGtFCzprFSGioo30JcmR4APsNwc= +github.com/otiai10/copy v1.11.0/go.mod h1:rSaLseMUsZFFbsFGc7wCJnnkTAvdc5L6VWxPE4308Ww= +github.com/otiai10/mint v1.5.1 h1:XaPLeE+9vGbuyEHem1JNk3bYc7KKqyI/na0/mLd/Kks= github.com/otiai10/openaigo v1.1.0 h1:zRvGBqZUW5PCMgdkJNsPVTBd8tOLCMTipXE5wD2pdTg= github.com/otiai10/openaigo v1.1.0/go.mod h1:792bx6AWTS61weDi2EzKpHHnTF4eDMAlJ5GvAk/mgPg= github.com/philhofer/fwd v1.1.1/go.mod h1:gk3iGcWd9+svBvR0sR+KPcfE+RNWozjowpeBVG3ZVNU= @@ -114,7 +117,7 @@ github.com/savsgio/gotils v0.0.0-20230208104028-c358bd845dee/go.mod h1:qwtSXrKuJ github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY= +github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk= github.com/swaggo/swag v1.16.1 h1:fTNRhKstPKxcnoKsytm4sahr8FaYzUcT7i1/3nd/fBg= github.com/swaggo/swag v1.16.1/go.mod h1:9/LMvHycG3NFHfR6LwvikHv5iFvmPADQ359cKikGxto= github.com/tinylib/msgp v1.1.6/go.mod h1:75BAfg2hauQhs3qedfdDZmWAPcFMAvJE5b9rGOMufyw= @@ -167,8 +170,6 @@ golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.3.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.7.0 h1:3jlCCIQZPdOYu1h8BkNvLz8Kgwtae2cagcG/VamtZRU= -golang.org/x/sys v0.7.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.8.0 h1:EBmGv8NaZBZTWvrbjNoL6HVt+IVy3QDQpJs7VRIw3tU= golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= diff --git a/pkg/whisper/whisper.go b/pkg/whisper/whisper.go new file mode 100644 index 0000000..4077f86 --- /dev/null +++ b/pkg/whisper/whisper.go @@ -0,0 +1,93 @@ +package whisper + +import ( + "fmt" + "os" + "os/exec" + "path/filepath" + + "github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper" + wav "github.com/go-audio/wav" +) + +func sh(c string) (string, error) { + cmd := exec.Command("/bin/sh", "-c", c) + cmd.Env = os.Environ() + o, err := cmd.CombinedOutput() + return string(o), err +} + +// AudioToWav converts audio to wav for transcribe. It bashes out to ffmpeg +// TODO: use https://github.com/mccoyst/ogg? +func audioToWav(src, dst string) error { + out, err := sh(fmt.Sprintf("ffmpeg -i %s -format s16le -ar 16000 -ac 1 -acodec pcm_s16le %s", src, dst)) + if err != nil { + return fmt.Errorf("error: %w out: %s", err, out) + } + + return nil +} + +func Transcript(modelpath, audiopath, language string) (string, error) { + + dir, err := os.MkdirTemp("", "whisper") + if err != nil { + return "", err + } + defer os.RemoveAll(dir) + + convertedPath := filepath.Join(dir, "converted.wav") + + if err := audioToWav(audiopath, convertedPath); err != nil { + return "", err + } + + // Open samples + fh, err := os.Open(convertedPath) + if err != nil { + return "", err + } + defer fh.Close() + + // Read samples + d := wav.NewDecoder(fh) + buf, err := d.FullPCMBuffer() + if err != nil { + return "", err + } + + data := buf.AsFloat32Buffer().Data + + // Load the model + model, err := whisper.New(modelpath) + if err != nil { + return "", err + } + defer model.Close() + + // Process samples + context, err := model.NewContext() + if err != nil { + return "", err + + } + + if language != "" { + context.SetLanguage(language) + } + + if err := context.Process(data, nil); err != nil { + return "", err + } + + text := "" + for { + segment, err := context.NextSegment() + if err != nil { + break + } + text += segment.Text + } + + return text, nil +}