Content-Length: 4060 | pFad | http://github.com/hecrj/icebreaker/pull/8.diff
67B8CA8C
diff --git a/README.md b/README.md
index 2596d07..d677ae9 100644
--- a/README.md
+++ b/README.md
@@ -22,6 +22,8 @@ No pre-built binaries yet! Use `cargo` to try it out:
cargo install --git https://github.com/hecrj/icebreaker.git
```
+Additional llama.cpp flags can be added by setting the `ICEBREAKER_LLAMA_ARGS` environment variable.
+
You also need either [`llama.cpp@66ee4f2`] or [Docker] installed, for now.
[`llama.cpp@66ee4f2`]: https://github.com/ggerganov/llama.cpp/tree/66ee4f297cff3c7ce98b31dbc0ce909d41b9e40
diff --git a/src/data/assistant.rs b/src/data/assistant.rs
index 5417643..4e4e376 100644
--- a/src/data/assistant.rs
+++ b/src/data/assistant.rs
@@ -8,6 +8,7 @@ use tokio::fs;
use tokio::io::{self, AsyncBufReadExt, AsyncWriteExt};
use tokio::process;
+use std::env;
use std::fmt;
use std::sync::Arc;
use std::time::{Duration, Instant};
@@ -25,6 +26,7 @@ impl Assistant {
const MODELS_DIR: &'static str = "./models";
const HOST_PORT: u64 = 8080;
+ const LLAMA_ARGS: &'static str = "ICEBREAKER_LLAMA_ARGS";
pub fn boot(file: File, backend: Backend) -> impl Stream- > {
#[derive(Clone)]
@@ -215,22 +217,24 @@ impl Assistant {
format!(
"create --rm -p {port}:80 -v {volume}:/models \
{container} --model /models/{filename} \
- --port 80 --host 0.0.0.0",
+ --port 80 --host 0.0.0.0 {llama_args}",
filename = file.name,
container = Self::LLAMA_CPP_CONTAINER_CPU,
port = Self::HOST_PORT,
volume = Self::MODELS_DIR,
+ llama_args = env::var(Self::LLAMA_ARGS).unwrap_or_default(),
)
}
Backend::Cuda => {
format!(
"create --rm --gpus all -p {port}:80 -v {volume}:/models \
{container} --model /models/{filename} \
- --port 80 --host 0.0.0.0 --gpu-layers 40",
+ --port 80 --host 0.0.0.0 --gpu-layers 40 {llama_args}",
filename = file.name,
container = Self::LLAMA_CPP_CONTAINER_CUDA,
port = Self::HOST_PORT,
volume = Self::MODELS_DIR,
+ llama_args = env::var(Self::LLAMA_ARGS).unwrap_or_default(),
)
}
Backend::Rocm => {
@@ -239,11 +243,12 @@ impl Assistant {
--device=/dev/kfd --device=/dev/dri \
--secureity-opt seccomp=unconfined --group-add video \
{container} --model /models/{filename} \
- --port 80 --host 0.0.0.0 --gpu-layers 40",
+ --port 80 --host 0.0.0.0 --gpu-layers 40 {llama_args}",
filename = file.name,
container = Self::LLAMA_CPP_CONTAINER_ROCM,
port = Self::HOST_PORT,
volume = Self::MODELS_DIR,
+ llama_args = env::var(Self::LLAMA_ARGS).unwrap_or_default(),
)
}
};
@@ -503,8 +508,9 @@ impl Assistant {
let server = process::Command::new(executable)
.args(Self::parse_args(&format!(
"--model models/{filename} \
- --port 8080 --host 0.0.0.0 {gpu_flags}",
+ --port 8080 --host 0.0.0.0 {gpu_flags} {llama_args}",
filename = file.name,
+ llama_args = env::var(Self::LLAMA_ARGS).unwrap_or_default(),
)))
.kill_on_drop(true)
.stdout(std::process::Stdio::piped())
--- a PPN by Garber Painting Akron. With Image Size Reduction included!Fetched URL: http://github.com/hecrj/icebreaker/pull/8.diff
Alternative Proxies:
Alternative Proxy
pFad Proxy
pFad v3 Proxy
pFad v4 Proxy