Skip to content

Commit 710283f

Browse files
authored
[Example] ggml: Enable new GPU runner (second-state#106)
* [Example] ggml: update llama example, add options for enable-log and ngl Signed-off-by: hydai <hydai@secondstate.io> * [Example] ggml: update llava example, add options for enable-log and ngl Signed-off-by: hydai <hydai@secondstate.io> * [Example] ggml: update gemma example, provide enable_log Signed-off-by: hydai <hydai@secondstate.io> * [Example] ggml: enable new macos m1 runner for the GPU verification Signed-off-by: hydai <hydai@secondstate.io> * [Example] ggml: install wasmedge in the user space on macOS Signed-off-by: hydai <hydai@secondstate.io> * [Example] ggml: apply fmt and clippy on llama Signed-off-by: hydai <hydai@secondstate.io> * [Example] ggml: apply fmt and clippy on gemma Signed-off-by: hydai <hydai@secondstate.io> * [Example] ggml: support enable_log in the llava example Signed-off-by: hydai <hydai@secondstate.io> * [Example] ggml: enable new m1 runner Signed-off-by: hydai <hydai@secondstate.io> * [Example] ggml: use the time command to get the real execution time Signed-off-by: hydai <hydai@secondstate.io> --------- Signed-off-by: hydai <hydai@secondstate.io>
1 parent 5a4ab9a commit 710283f

File tree

7 files changed

+140
-17
lines changed

7 files changed

+140
-17
lines changed

‎.github/workflows/llama.yml‎

Lines changed: 101 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ jobs:
3131
cd wasmedge-ggml/llama
3232
curl -LO https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/resolve/main/tinyllama-1.1b-chat-v0.3.Q5_K_M.gguf
3333
cargo build --target wasm32-wasi --release
34-
wasmedge --dir .:. \
34+
time wasmedge --dir .:. \
3535
--nn-preload default:GGML:AUTO:tinyllama-1.1b-chat-v0.3.Q5_K_M.gguf \
3636
target/wasm32-wasi/release/wasmedge-ggml-llama.wasm \
3737
default \
@@ -42,7 +42,7 @@ jobs:
4242
cd wasmedge-ggml/gemma
4343
curl -LO https://huggingface.co/second-state/Gemma-2b-it-GGUF/resolve/main/gemma-2b-it-Q5_K_M.gguf
4444
cargo build --target wasm32-wasi --release
45-
wasmedge --dir .:. \
45+
time wasmedge --dir .:. \
4646
--env n_gpu_layers=0 \
4747
--nn-preload default:GGML:AUTO:gemma-2b-it-Q5_K_M.gguf \
4848
target/wasm32-wasi/release/wasmedge-ggml-gemma.wasm \
@@ -56,7 +56,7 @@ jobs:
5656
curl -LO https://huggingface.co/mys/ggml_llava-v1.5-7b/resolve/main/mmproj-model-f16.gguf
5757
curl -LO https://llava-vl.github.io/static/images/monalisa.jpg
5858
cargo build --target wasm32-wasi --release
59-
wasmedge --dir .:. \
59+
time wasmedge --dir .:. \
6060
--env mmproj=mmproj-model-f16.gguf \
6161
--env image=monalisa.jpg \
6262
--env n_gpu_layers=0 \
@@ -72,7 +72,7 @@ jobs:
7272
curl -LO https://huggingface.co/cmp-nct/llava-1.6-gguf/resolve/main/mmproj-vicuna7b-f16.gguf
7373
curl -LO https://llava-vl.github.io/static/images/monalisa.jpg
7474
cargo build --target wasm32-wasi --release
75-
wasmedge --dir .:. \
75+
time wasmedge --dir .:. \
7676
--env mmproj=mmproj-vicuna7b-f16.gguf \
7777
--env image=monalisa.jpg \
7878
--env ctx_size=4096 \
@@ -87,7 +87,7 @@ jobs:
8787
cd wasmedge-ggml/llama
8888
curl -LO https://huggingface.co/TheBloke/Llama-2-7b-Chat-GGUF/resolve/main/llama-2-7b-chat.Q5_K_M.gguf
8989
cargo build --target wasm32-wasi --release
90-
wasmedge --dir .:. \
90+
time wasmedge --dir .:. \
9191
--nn-preload default:GGML:AUTO:llama-2-7b-chat.Q5_K_M.gguf \
9292
target/wasm32-wasi/release/wasmedge-ggml-llama.wasm \
9393
default \
@@ -125,3 +125,99 @@ jobs:
125125
- name: ${{ matrix.job.name }}
126126
run: ${{ matrix.job.run }}
127127

128+
m1:
129+
strategy:
130+
matrix:
131+
runner: [macos-m1]
132+
job:
133+
- name: "Tiny Llama"
134+
run: |
135+
source ~/.wasmedge/env
136+
cd wasmedge-ggml/llama
137+
curl -LO https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/resolve/main/tinyllama-1.1b-chat-v0.3.Q5_K_M.gguf
138+
cargo build --target wasm32-wasi --release
139+
time wasmedge --dir .:. \
140+
--nn-preload default:GGML:AUTO:tinyllama-1.1b-chat-v0.3.Q5_K_M.gguf \
141+
--env n_gpu_layers=100 \
142+
target/wasm32-wasi/release/wasmedge-ggml-llama.wasm \
143+
default \
144+
$'<|im_start|>system\nYou are an AI assistant<|im_end|>\n<|im_start|>user\nWhere is the capital of Japan?<|im_end|>\n<|im_start|>assistant'
145+
146+
- name: Gemma 2B
147+
run: |
148+
source ~/.wasmedge/env
149+
cd wasmedge-ggml/gemma
150+
curl -LO https://huggingface.co/second-state/Gemma-2b-it-GGUF/resolve/main/gemma-2b-it-Q5_K_M.gguf
151+
cargo build --target wasm32-wasi --release
152+
time wasmedge --dir .:. \
153+
--env n_gpu_layers=100 \
154+
--nn-preload default:GGML:AUTO:gemma-2b-it-Q5_K_M.gguf \
155+
target/wasm32-wasi/release/wasmedge-ggml-gemma.wasm \
156+
default \
157+
'<start_of_turn>user Where is the capital of Japan? <end_of_turn><start_of_turn>model'
158+
159+
- name: Llava v1.5 7B
160+
run: |
161+
source ~/.wasmedge/env
162+
cd wasmedge-ggml/llava
163+
curl -LO https://huggingface.co/mys/ggml_llava-v1.5-7b/resolve/main/ggml-model-q5_k.gguf
164+
curl -LO https://huggingface.co/mys/ggml_llava-v1.5-7b/resolve/main/mmproj-model-f16.gguf
165+
curl -LO https://llava-vl.github.io/static/images/monalisa.jpg
166+
cargo build --target wasm32-wasi --release
167+
time wasmedge --dir .:. \
168+
--env mmproj=mmproj-model-f16.gguf \
169+
--env image=monalisa.jpg \
170+
--env ctx_size=2048 \
171+
--env n_gpu_layers=100 \
172+
--nn-preload default:GGML:AUTO:ggml-model-q5_k.gguf \
173+
target/wasm32-wasi/release/wasmedge-ggml-llava.wasm \
174+
default \
175+
$'You are a helpful, respectful and honest assistant. Always answer as short as possible, while being safe.\nUSER:<image>\nDo you know who drew this painting?\nASSISTANT:'
176+
177+
- name: Llava v1.6 7B
178+
run: |
179+
source ~/.wasmedge/env
180+
cd wasmedge-ggml/llava
181+
curl -LO https://huggingface.co/cmp-nct/llava-1.6-gguf/resolve/main/vicuna-7b-q5_k.gguf
182+
curl -LO https://huggingface.co/cmp-nct/llava-1.6-gguf/resolve/main/mmproj-vicuna7b-f16.gguf
183+
curl -LO https://llava-vl.github.io/static/images/monalisa.jpg
184+
cargo build --target wasm32-wasi --release
185+
time wasmedge --dir .:. \
186+
--env mmproj=mmproj-vicuna7b-f16.gguf \
187+
--env image=monalisa.jpg \
188+
--env ctx_size=4096 \
189+
--env n_gpu_layers=100 \
190+
--nn-preload default:GGML:AUTO:vicuna-7b-q5_k.gguf \
191+
target/wasm32-wasi/release/wasmedge-ggml-llava.wasm \
192+
default \
193+
$'You are a helpful, respectful and honest assistant. Always answer as short as possible, while being safe.\nUSER:<image>\nDo you know who drew this painting?\nASSISTANT:'
194+
195+
- name: Llama2 7B
196+
run: |
197+
source ~/.wasmedge/env
198+
cd wasmedge-ggml/llama
199+
curl -LO https://huggingface.co/TheBloke/Llama-2-7b-Chat-GGUF/resolve/main/llama-2-7b-chat.Q5_K_M.gguf
200+
cargo build --target wasm32-wasi --release
201+
time wasmedge --dir .:. \
202+
--nn-preload default:GGML:AUTO:llama-2-7b-chat.Q5_K_M.gguf \
203+
--env n_gpu_layers=100 \
204+
target/wasm32-wasi/release/wasmedge-ggml-llama.wasm \
205+
default \
206+
$'[INST] <<SYS>>\nYou are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature. If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you do not know the answer to a question, please do not share false information.\n<</SYS>>\nWhat is the capital of Japan?[/INST]'
207+
208+
name: ${{ matrix.runner }} - ${{ matrix.job.name }}
209+
runs-on: ${{ matrix.runner }}
210+
steps:
211+
- uses: actions/checkout@v4
212+
- uses: actions-rust-lang/setup-rust-toolchain@v1
213+
- name: Install Rust target for wasm
214+
run: |
215+
rustup target add wasm32-wasi
216+
217+
- name: Install WasmEdge + WASI-NN + GGML
218+
run: |
219+
VERSION=0.13.5
220+
curl -sSf https://raw.githubusercontent.com/WasmEdge/WasmEdge/master/utils/install.sh | bash -s -- -v $VERSION --plugins wasi_nn-ggml
221+
222+
- name: ${{ matrix.job.name }}
223+
run: ${{ matrix.job.run }}

‎wasmedge-ggml/gemma/src/main.rs‎

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,15 +19,20 @@ fn read_input() -> String {
1919
fn get_options_from_env() -> Value {
2020
let mut options = json!({});
2121
if let Ok(val) = env::var("enable_log") {
22-
options["enable-log"] = serde_json::from_str(val.as_str()).unwrap()
22+
options["enable-log"] =
23+
serde_json::from_str(val.as_str()).expect("invalid enable-log value (true/false)")
24+
} else {
25+
options["enable-log"] = serde_json::from_str("false").unwrap()
2326
}
2427
if let Ok(val) = env::var("ctx_size") {
25-
options["ctx-size"] = serde_json::from_str(val.as_str()).unwrap()
28+
options["ctx-size"] =
29+
serde_json::from_str(val.as_str()).expect("invalid ctx-size value (unsigned integer)")
2630
} else {
2731
options["ctx-size"] = serde_json::from_str("4096").unwrap()
2832
}
2933
if let Ok(val) = env::var("n_gpu_layers") {
30-
options["n-gpu-layers"] = serde_json::from_str(val.as_str()).unwrap()
34+
options["n-gpu-layers"] =
35+
serde_json::from_str(val.as_str()).expect("invalid ngl (unsigned integer)")
3136
} else {
3237
options["n-gpu-layers"] = serde_json::from_str("100").unwrap()
3338
}
@@ -171,7 +176,7 @@ fn main() {
171176
// Retrieve the output.
172177
let mut output = get_output_from_context(&context);
173178
if let Some(true) = options["stream-stdout"].as_bool() {
174-
println!("");
179+
println!();
175180
} else {
176181
println!("{}", output.trim());
177182
}
144 KB
Binary file not shown.

‎wasmedge-ggml/llama/src/main.rs‎

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1+
use serde_json::json;
12
use serde_json::Value;
2-
use std::collections::HashMap;
33
use std::env;
44
use std::io;
55
use wasi_nn::{self, GraphExecutionContext};
@@ -16,6 +16,25 @@ fn read_input() -> String {
1616
}
1717
}
1818

19+
fn get_options_from_env() -> Value {
20+
let mut options = json!({});
21+
if let Ok(val) = env::var("enable_log") {
22+
options["enable-log"] = serde_json::from_str(val.as_str())
23+
.expect("invalid value for enable-log option (true/false)")
24+
} else {
25+
options["enable-log"] = serde_json::from_str("false").unwrap()
26+
}
27+
if let Ok(val) = env::var("n_gpu_layers") {
28+
options["n-gpu-layers"] =
29+
serde_json::from_str(val.as_str()).expect("invalid ngl value (unsigned integer")
30+
} else {
31+
options["n-gpu-layers"] = serde_json::from_str("0").unwrap()
32+
}
33+
options["ctx-size"] = serde_json::from_str("1024").unwrap();
34+
35+
options
36+
}
37+
1938
fn set_data_to_context(
2039
context: &mut GraphExecutionContext,
2140
data: Vec<u8>,
@@ -58,10 +77,7 @@ fn main() {
5877

5978
// Set options for the graph. Check our README for more details:
6079
// https://github.com/second-state/WasmEdge-WASINN-examples/tree/master/wasmedge-ggml#parameters
61-
let mut options = HashMap::new();
62-
options.insert("enable-log", Value::from(false));
63-
options.insert("n-gpu-layers", Value::from(0));
64-
options.insert("ctx-size", Value::from(1024));
80+
let options = get_options_from_env();
6581

6682
// Create graph and initialize context.
6783
let graph =
31.1 KB
Binary file not shown.

‎wasmedge-ggml/llava/src/main.rs‎

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,15 +34,21 @@ fn get_options_from_env() -> HashMap<&'static str, Value> {
3434
}
3535

3636
// Optional parameters
37+
if let Ok(val) = env::var("enable_log") {
38+
options.insert("enable-log", serde_json::from_str(val.as_str()).unwrap());
39+
} else {
40+
options.insert("enable-log", Value::from(false));
41+
}
3742
if let Ok(val) = env::var("ctx_size") {
3843
options.insert("ctx-size", serde_json::from_str(val.as_str()).unwrap());
3944
} else {
4045
options.insert("ctx-size", Value::from(2048));
4146
}
4247
if let Ok(val) = env::var("n_gpu_layers") {
4348
options.insert("n-gpu-layers", serde_json::from_str(val.as_str()).unwrap());
49+
} else {
50+
options.insert("n-gpu-layers", Value::from(0));
4451
}
45-
4652
options
4753
}
4854

@@ -75,9 +81,9 @@ fn main() {
7581

7682
// Set options for the graph. Check our README for more details:
7783
// https://github.com/second-state/WasmEdge-WASINN-examples/tree/master/wasmedge-ggml#parameters
78-
let mut options = get_options_from_env();
84+
let options = get_options_from_env();
7985
// You could also set the options manually like this:
80-
options.insert("enable-log", Value::from(false));
86+
// options.insert("enable-log", Value::from(false));
8187

8288
// Create graph and initialize context.
8389
let graph =
8 KB
Binary file not shown.

0 commit comments

Comments
 (0)