Skip to content

Commit 5a4ab9a

Browse files
dm4hydai
authored andcommitted
[CI] llama: add llava v1.6
Signed-off-by: dm4 <dm4@secondstate.io>
1 parent 341f109 commit 5a4ab9a

File tree

3 files changed

+121
-182
lines changed

3 files changed

+121
-182
lines changed

‎.github/workflows/llama.yml‎

Lines changed: 89 additions & 167 deletions
Original file line numberDiff line numberDiff line change
@@ -21,109 +21,97 @@ on:
2121
- "wasmedge-ggml/**"
2222

2323
jobs:
24-
ubuntu:
25-
runs-on: ubuntu-20.04
26-
steps:
27-
- uses: actions/checkout@v4
28-
29-
- name: Install apt-get packages
30-
run: |
31-
echo RESET grub-efi/install_devices | sudo debconf-communicate grub-pc
32-
sudo ACCEPT_EULA=Y apt-get update
33-
sudo ACCEPT_EULA=Y apt-get upgrade
34-
sudo apt-get install wget git curl software-properties-common build-essential libopenblas-dev
35-
36-
- name: Install Rust target for wasm
37-
run: |
38-
rustup target add wasm32-wasi
39-
40-
- name: Install WasmEdge + WASI-NN + GGML
41-
run: |
42-
VERSION=0.13.5
43-
curl -sSf https://raw.githubusercontent.com/WasmEdge/WasmEdge/master/utils/install.sh | sudo bash -s -- -v $VERSION --plugins wasi_nn-ggml -p /usr/local
44-
45-
- name: Tiny Llama
46-
run: |
47-
cd wasmedge-ggml/llama
48-
curl -LO https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/resolve/main/tinyllama-1.1b-chat-v0.3.Q5_K_M.gguf
49-
cargo build --target wasm32-wasi --release
50-
wasmedge --dir .:. \
51-
--nn-preload default:GGML:AUTO:tinyllama-1.1b-chat-v0.3.Q5_K_M.gguf \
52-
target/wasm32-wasi/release/wasmedge-ggml-llama.wasm \
53-
default \
54-
$'<|im_start|>system\nYou are an AI assistant, always answer as short as possible<|im_end|>\n<|im_start|>user\nWhere is the capital of Japan?<|im_end|>\n<|im_start|>assistant'
55-
56-
- name: Gemma 2B
57-
run: |
58-
cd wasmedge-ggml/gemma
59-
curl -LO https://huggingface.co/second-state/Gemma-2b-it-GGUF/resolve/main/gemma-2b-it-Q5_K_M.gguf
60-
cargo build --target wasm32-wasi --release
61-
wasmedge --dir .:. \
62-
--nn-preload default:GGML:AUTO:gemma-2b-it-Q5_K_M.gguf \
63-
target/wasm32-wasi/release/wasmedge-ggml-gemma.wasm \
64-
default \
65-
'<start_of_turn>user Where is the capital of Japan? <end_of_turn><start_of_turn>model'
66-
67-
- name: Llava v1.5 7B
68-
run: |
69-
cd wasmedge-ggml/llava
70-
curl -LO https://huggingface.co/mys/ggml_llava-v1.5-7b/resolve/main/ggml-model-q5_k.gguf
71-
curl -LO https://huggingface.co/mys/ggml_llava-v1.5-7b/resolve/main/mmproj-model-f16.gguf
72-
curl -LO https://llava-vl.github.io/static/images/monalisa.jpg
73-
cargo build --target wasm32-wasi --release
74-
wasmedge --dir .:. \
75-
--env mmproj=mmproj-model-f16.gguf \
76-
--env image=monalisa.jpg \
77-
--nn-preload default:GGML:AUTO:ggml-model-q5_k.gguf \
78-
target/wasm32-wasi/release/wasmedge-ggml-llava.wasm \
79-
default \
80-
$'You are a helpful, respectful and honest assistant. Always answer as short as possible, while being safe.\nUSER:<image>\nDo you know who drew this painting?\nASSISTANT:'
81-
82-
- name: llama2 7b
83-
run: |
84-
cd wasmedge-ggml/llama
85-
curl -LO https://huggingface.co/TheBloke/Llama-2-7b-Chat-GGUF/resolve/main/llama-2-7b-chat.Q5_K_M.gguf
86-
87-
cargo build --target wasm32-wasi --release
88-
wasmedge --dir .:. \
89-
--nn-preload default:GGML:AUTO:llama-2-7b-chat.Q5_K_M.gguf \
90-
target/wasm32-wasi/release/wasmedge-ggml-llama.wasm \
91-
default \
92-
$'[INST] <<SYS>>\nYou are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature. If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you do not know the answer to a question, please do not share false information.\n<</SYS>>\nWhat is the capital of Japan?[/INST]'
93-
94-
- name: Build llama-stream
95-
run: |
96-
cd wasmedge-ggml/llama-stream
97-
cargo build --target wasm32-wasi --release
98-
99-
- name: Build chatml
100-
run: |
101-
cd wasmedge-ggml/chatml
102-
cargo build --target wasm32-wasi --release
103-
104-
- name: Build embedding
105-
run: |
106-
cd wasmedge-ggml/embedding
107-
cargo build --target wasm32-wasi --release
108-
109-
- name: Build llava
110-
run: |
111-
cd wasmedge-ggml/llava
112-
cargo build --target wasm32-wasi --release
113-
114-
macos:
24+
build:
11525
strategy:
11626
matrix:
117-
include:
118-
- name: MacOS-13
119-
host_runner: macos-13
120-
- name: MacOS-14
121-
host_runner: macos-14
122-
name: ${{ matrix.name }}
123-
runs-on: ${{ matrix.host_runner }}
27+
runner: [ubuntu-20.04, macos-13, macos-14]
28+
job:
29+
- name: "Tiny Llama"
30+
run: |
31+
cd wasmedge-ggml/llama
32+
curl -LO https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/resolve/main/tinyllama-1.1b-chat-v0.3.Q5_K_M.gguf
33+
cargo build --target wasm32-wasi --release
34+
wasmedge --dir .:. \
35+
--nn-preload default:GGML:AUTO:tinyllama-1.1b-chat-v0.3.Q5_K_M.gguf \
36+
target/wasm32-wasi/release/wasmedge-ggml-llama.wasm \
37+
default \
38+
$'<|im_start|>system\nYou are an AI assistant<|im_end|>\n<|im_start|>user\nWhere is the capital of Japan?<|im_end|>\n<|im_start|>assistant'
39+
40+
- name: Gemma 2B
41+
run: |
42+
cd wasmedge-ggml/gemma
43+
curl -LO https://huggingface.co/second-state/Gemma-2b-it-GGUF/resolve/main/gemma-2b-it-Q5_K_M.gguf
44+
cargo build --target wasm32-wasi --release
45+
wasmedge --dir .:. \
46+
--env n_gpu_layers=0 \
47+
--nn-preload default:GGML:AUTO:gemma-2b-it-Q5_K_M.gguf \
48+
target/wasm32-wasi/release/wasmedge-ggml-gemma.wasm \
49+
default \
50+
'<start_of_turn>user Where is the capital of Japan? <end_of_turn><start_of_turn>model'
51+
52+
- name: Llava v1.5 7B
53+
run: |
54+
cd wasmedge-ggml/llava
55+
curl -LO https://huggingface.co/mys/ggml_llava-v1.5-7b/resolve/main/ggml-model-q5_k.gguf
56+
curl -LO https://huggingface.co/mys/ggml_llava-v1.5-7b/resolve/main/mmproj-model-f16.gguf
57+
curl -LO https://llava-vl.github.io/static/images/monalisa.jpg
58+
cargo build --target wasm32-wasi --release
59+
wasmedge --dir .:. \
60+
--env mmproj=mmproj-model-f16.gguf \
61+
--env image=monalisa.jpg \
62+
--env n_gpu_layers=0 \
63+
--nn-preload default:GGML:AUTO:ggml-model-q5_k.gguf \
64+
target/wasm32-wasi/release/wasmedge-ggml-llava.wasm \
65+
default \
66+
$'You are a helpful, respectful and honest assistant. Always answer as short as possible, while being safe.\nUSER:<image>\nDo you know who drew this painting?\nASSISTANT:'
67+
68+
- name: Llava v1.6 7B
69+
run: |
70+
cd wasmedge-ggml/llava
71+
curl -LO https://huggingface.co/cmp-nct/llava-1.6-gguf/resolve/main/vicuna-7b-q5_k.gguf
72+
curl -LO https://huggingface.co/cmp-nct/llava-1.6-gguf/resolve/main/mmproj-vicuna7b-f16.gguf
73+
curl -LO https://llava-vl.github.io/static/images/monalisa.jpg
74+
cargo build --target wasm32-wasi --release
75+
wasmedge --dir .:. \
76+
--env mmproj=mmproj-vicuna7b-f16.gguf \
77+
--env image=monalisa.jpg \
78+
--env ctx_size=4096 \
79+
--env n_gpu_layers=0 \
80+
--nn-preload default:GGML:AUTO:vicuna-7b-q5_k.gguf \
81+
target/wasm32-wasi/release/wasmedge-ggml-llava.wasm \
82+
default \
83+
$'You are a helpful, respectful and honest assistant. Always answer as short as possible, while being safe.\nUSER:<image>\nDo you know who drew this painting?\nASSISTANT:'
84+
85+
- name: Llama2 7B
86+
run: |
87+
cd wasmedge-ggml/llama
88+
curl -LO https://huggingface.co/TheBloke/Llama-2-7b-Chat-GGUF/resolve/main/llama-2-7b-chat.Q5_K_M.gguf
89+
cargo build --target wasm32-wasi --release
90+
wasmedge --dir .:. \
91+
--nn-preload default:GGML:AUTO:llama-2-7b-chat.Q5_K_M.gguf \
92+
target/wasm32-wasi/release/wasmedge-ggml-llama.wasm \
93+
default \
94+
$'[INST] <<SYS>>\nYou are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature. If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you do not know the answer to a question, please do not share false information.\n<</SYS>>\nWhat is the capital of Japan?[/INST]'
95+
96+
- name: Build llama-stream
97+
run: |
98+
cd wasmedge-ggml/llama-stream
99+
cargo build --target wasm32-wasi --release
100+
101+
- name: Build chatml
102+
run: |
103+
cd wasmedge-ggml/chatml
104+
cargo build --target wasm32-wasi --release
105+
106+
- name: Build embedding
107+
run: |
108+
cd wasmedge-ggml/embedding
109+
cargo build --target wasm32-wasi --release
110+
111+
name: ${{ matrix.runner }} - ${{ matrix.job.name }}
112+
runs-on: ${{ matrix.runner }}
124113
steps:
125114
- uses: actions/checkout@v4
126-
127115
- uses: actions-rust-lang/setup-rust-toolchain@v1
128116
- name: Install Rust target for wasm
129117
run: |
@@ -134,72 +122,6 @@ jobs:
134122
VERSION=0.13.5
135123
curl -sSf https://raw.githubusercontent.com/WasmEdge/WasmEdge/master/utils/install.sh | sudo bash -s -- -v $VERSION --plugins wasi_nn-ggml -p /usr/local
136124
137-
- name: Tiny Llama
138-
run: |
139-
cd wasmedge-ggml/llama
140-
curl -LO https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/resolve/main/tinyllama-1.1b-chat-v0.3.Q5_K_M.gguf
141-
cargo build --target wasm32-wasi --release
142-
wasmedge --dir .:. \
143-
--nn-preload default:GGML:AUTO:tinyllama-1.1b-chat-v0.3.Q5_K_M.gguf \
144-
target/wasm32-wasi/release/wasmedge-ggml-llama.wasm \
145-
default \
146-
$'<|im_start|>system\nYou are an AI assistant<|im_end|>\n<|im_start|>user\nWhere is the capital of Japan?<|im_end|>\n<|im_start|>assistant'
147-
148-
- name: Gemma 2B
149-
run: |
150-
cd wasmedge-ggml/gemma
151-
curl -LO https://huggingface.co/second-state/Gemma-2b-it-GGUF/resolve/main/gemma-2b-it-Q5_K_M.gguf
152-
cargo build --target wasm32-wasi --release
153-
wasmedge --dir .:. \
154-
--env n_gpu_layers=0 \
155-
--nn-preload default:GGML:AUTO:gemma-2b-it-Q5_K_M.gguf \
156-
target/wasm32-wasi/release/wasmedge-ggml-gemma.wasm \
157-
default \
158-
'<start_of_turn>user Where is the capital of Japan? <end_of_turn><start_of_turn>model'
125+
- name: ${{ matrix.job.name }}
126+
run: ${{ matrix.job.run }}
159127

160-
- name: Llava v1.5 7B
161-
run: |
162-
cd wasmedge-ggml/llava
163-
curl -LO https://huggingface.co/mys/ggml_llava-v1.5-7b/resolve/main/ggml-model-q5_k.gguf
164-
curl -LO https://huggingface.co/mys/ggml_llava-v1.5-7b/resolve/main/mmproj-model-f16.gguf
165-
curl -LO https://llava-vl.github.io/static/images/monalisa.jpg
166-
cargo build --target wasm32-wasi --release
167-
wasmedge --dir .:. \
168-
--env mmproj=mmproj-model-f16.gguf \
169-
--env image=monalisa.jpg \
170-
--nn-preload default:GGML:AUTO:ggml-model-q5_k.gguf \
171-
target/wasm32-wasi/release/wasmedge-ggml-llava.wasm \
172-
default \
173-
$'You are a helpful, respectful and honest assistant. Always answer as short as possible, while being safe.\nUSER:<image>\nDo you know who drew this painting?\nASSISTANT:'
174-
175-
- name: llama2 7b
176-
run: |
177-
cd wasmedge-ggml/llama
178-
curl -LO https://huggingface.co/TheBloke/Llama-2-7b-Chat-GGUF/resolve/main/llama-2-7b-chat.Q5_K_M.gguf
179-
180-
cargo build --target wasm32-wasi --release
181-
wasmedge --dir .:. \
182-
--nn-preload default:GGML:AUTO:llama-2-7b-chat.Q5_K_M.gguf \
183-
target/wasm32-wasi/release/wasmedge-ggml-llama.wasm \
184-
default \
185-
$'[INST] <<SYS>>\nYou are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature. If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you do not know the answer to a question, please do not share false information.\n<</SYS>>\nWhat is the capital of Japan?[/INST]'
186-
187-
- name: Build llama-stream
188-
run: |
189-
cd wasmedge-ggml/llama-stream
190-
cargo build --target wasm32-wasi --release
191-
192-
- name: Build chatml
193-
run: |
194-
cd wasmedge-ggml/chatml
195-
cargo build --target wasm32-wasi --release
196-
197-
- name: Build embedding
198-
run: |
199-
cd wasmedge-ggml/embedding
200-
cargo build --target wasm32-wasi --release
201-
202-
- name: Build llava
203-
run: |
204-
cd wasmedge-ggml/llava
205-
cargo build --target wasm32-wasi --release

‎wasmedge-ggml/llava/src/main.rs‎

Lines changed: 32 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,36 @@ fn read_input() -> String {
1616
}
1717
}
1818

19+
fn get_options_from_env() -> HashMap<&'static str, Value> {
20+
let mut options = HashMap::new();
21+
22+
// Required parameters for llava
23+
if let Ok(val) = env::var("mmproj") {
24+
options.insert("mmproj", Value::from(val.as_str()));
25+
} else {
26+
eprintln!("Failed to get mmproj model.");
27+
std::process::exit(1);
28+
}
29+
if let Ok(val) = env::var("image") {
30+
options.insert("image", Value::from(val.as_str()));
31+
} else {
32+
eprintln!("Failed to get the target image.");
33+
std::process::exit(1);
34+
}
35+
36+
// Optional parameters
37+
if let Ok(val) = env::var("ctx_size") {
38+
options.insert("ctx-size", serde_json::from_str(val.as_str()).unwrap());
39+
} else {
40+
options.insert("ctx-size", Value::from(2048));
41+
}
42+
if let Ok(val) = env::var("n_gpu_layers") {
43+
options.insert("n-gpu-layers", serde_json::from_str(val.as_str()).unwrap());
44+
}
45+
46+
options
47+
}
48+
1949
fn set_data_to_context(
2050
context: &mut GraphExecutionContext,
2151
data: Vec<u8>,
@@ -45,22 +75,9 @@ fn main() {
4575

4676
// Set options for the graph. Check our README for more details:
4777
// https://github.com/second-state/WasmEdge-WASINN-examples/tree/master/wasmedge-ggml#parameters
48-
let mut options = HashMap::new();
78+
let mut options = get_options_from_env();
79+
// You could also set the options manually like this:
4980
options.insert("enable-log", Value::from(false));
50-
options.insert("n-gpu-layers", Value::from(0));
51-
options.insert("ctx-size", Value::from(2048));
52-
if let Ok(val) = env::var("mmproj") {
53-
options.insert("mmproj", Value::from(val.as_str()));
54-
} else {
55-
eprintln!("Failed to get mmproj model.");
56-
std::process::exit(1);
57-
}
58-
if let Ok(val) = env::var("image") {
59-
options.insert("image", Value::from(val.as_str()));
60-
} else {
61-
eprintln!("Failed to get the target image.");
62-
std::process::exit(1);
63-
}
6481

6582
// Create graph and initialize context.
6683
let graph =
29.8 KB
Binary file not shown.

0 commit comments

Comments
 (0)