From 356ead8aa66e939d78bf38b4e5515545bbdf5a91 Mon Sep 17 00:00:00 2001
From: juodumas <juodumas@gmail.com>
Date: Mon, 18 Sep 2023 13:59:51 +0300
Subject: Add support for base_url option to use local models

For example, you can start llama-cpp-python like this (it emulates
the openai api):
```sh
CMAKE_ARGS="-DLLAMA_CUBLAS=on" pip install 'llama-cpp-python[server]'
wget https://huggingface.co/TheBloke/CodeLlama-13B-Instruct-GGUF/resolve/main/codellama-13b-instruct.Q5_K_M.gguf
python3 -m llama_cpp.server --n_gpu_layers 100 --model codellama-13b-instruct.Q5_K_M.gguf
```

Then set the API url in your `.vimrc`:

```vim
let g:vim_ai_chat = {
    \ "engine": "chat",
    \ "options": {
        \ "base_url": "http://127.0.0.1:8000",
    \ },
\ }
```

And chat with the locally hosted AI using `:AIChat`.

The change in utils.py was needed because llama-cpp-python adds a new
line to the final response: `[DONE]^M`.
---
 py/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'py/utils.py')

diff --git a/py/utils.py b/py/utils.py
index 76ae1e4..3b34517 100644
--- a/py/utils.py
+++ b/py/utils.py
@@ -138,7 +138,7 @@ def openai_request(url, data, options):
             line = line_bytes.decode("utf-8", errors="replace")
             if line.startswith(OPENAI_RESP_DATA_PREFIX):
                 line_data = line[len(OPENAI_RESP_DATA_PREFIX):-1]
-                if line_data == OPENAI_RESP_DONE:
+                if line_data.strip() == OPENAI_RESP_DONE:
                     pass
                 else:
                     openai_obj = json.loads(line_data)
-- 
cgit v1.2.3