diff options
| author | juodumas <juodumas@gmail.com> | 2023-09-18 13:59:51 +0300 |
|---|---|---|
| committer | juodumas <juodumas@gmail.com> | 2023-09-18 14:12:51 +0300 |
| commit | 356ead8aa66e939d78bf38b4e5515545bbdf5a91 (patch) | |
| tree | e73e4d970a0c00f737c78196f39bd26c96003434 /py | |
| parent | 924e3a390f043e979f16113f6b0a55f8c54b1f5e (diff) | |
| download | vim-ai-356ead8aa66e939d78bf38b4e5515545bbdf5a91.tar.gz | |
Add support for base_url option to use local models
For example, you can start llama-cpp-python like this (it emulates
the openai api):
```sh
CMAKE_ARGS="-DLLAMA_CUBLAS=on" pip install 'llama-cpp-python[server]'
wget https://huggingface.co/TheBloke/CodeLlama-13B-Instruct-GGUF/resolve/main/codellama-13b-instruct.Q5_K_M.gguf
python3 -m llama_cpp.server --n_gpu_layers 100 --model codellama-13b-instruct.Q5_K_M.gguf
```
Then set the API url in your `.vimrc`:
```vim
let g:vim_ai_chat = {
\ "engine": "chat",
\ "options": {
\ "base_url": "http://127.0.0.1:8000",
\ },
\ }
```
And chat with the locally hosted AI using `:AIChat`.
The change in utils.py was needed because llama-cpp-python adds a new
line to the final response: `[DONE]^M`.
Diffstat (limited to 'py')
| -rw-r--r-- | py/chat.py | 5 | ||||
| -rw-r--r-- | py/complete.py | 9 | ||||
| -rw-r--r-- | py/utils.py | 2 |
3 files changed, 12 insertions, 4 deletions
@@ -1,3 +1,4 @@ +from urllib.parse import urljoin # import utils plugin_root = vim.eval("s:plugin_root") vim.command(f"py3file {plugin_root}/py/utils.py") @@ -69,7 +70,9 @@ try: **openai_options } printDebug("[chat] request: {}", request) - response = openai_request('https://api.openai.com/v1/chat/completions', request, http_options) + base_url = options.get('base_url', 'https://api.openai.com') + url = urljoin(base_url, 'v1/chat/completions') + response = openai_request(url, request, http_options) def map_chunk(resp): printDebug("[chat] response: {}", resp) return resp['choices'][0]['delta'].get('content', '') diff --git a/py/complete.py b/py/complete.py index c8d45fe..a5c4711 100644 --- a/py/complete.py +++ b/py/complete.py @@ -1,3 +1,4 @@ +from urllib.parse import urljoin # import utils plugin_root = vim.eval("s:plugin_root") vim.command(f"py3file {plugin_root}/py/utils.py") @@ -17,7 +18,9 @@ def complete_engine(prompt): **openai_options } printDebug("[engine-complete] request: {}", request) - response = openai_request('https://api.openai.com/v1/completions', request, http_options) + base_url = config_options.get('base_url', 'https://api.openai.com') + url = urljoin(base_url, 'v1/completions') + response = openai_request(url, request, http_options) def map_chunk(resp): printDebug("[engine-complete] response: {}", resp) return resp['choices'][0].get('text', '') @@ -35,7 +38,9 @@ def chat_engine(prompt): **openai_options } printDebug("[engine-chat] request: {}", request) - response = openai_request('https://api.openai.com/v1/chat/completions', request, http_options) + base_url = config_options.get('base_url', 'https://api.openai.com') + url = urljoin(base_url, 'v1/chat/completions') + response = openai_request(url, request, http_options) def map_chunk(resp): printDebug("[engine-chat] response: {}", resp) return resp['choices'][0]['delta'].get('content', '') diff --git a/py/utils.py b/py/utils.py index 76ae1e4..3b34517 100644 --- a/py/utils.py +++ b/py/utils.py @@ -138,7 +138,7 @@ def openai_request(url, data, options): line = line_bytes.decode("utf-8", errors="replace") if line.startswith(OPENAI_RESP_DATA_PREFIX): line_data = line[len(OPENAI_RESP_DATA_PREFIX):-1] - if line_data == OPENAI_RESP_DONE: + if line_data.strip() == OPENAI_RESP_DONE: pass else: openai_obj = json.loads(line_data) |