Skip to content

Commit 8d0fa4c

Browse files
dm4hydai
authored andcommitted
[CI] llama: add gemma and llava jobs
Signed-off-by: dm4 <dm4@secondstate.io>
1 parent 76e9f01 commit 8d0fa4c

File tree

2 files changed

+73
-4
lines changed

2 files changed

+73
-4
lines changed

‎.github/workflows/llama.yml‎

Lines changed: 57 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,33 @@ jobs:
5151
--nn-preload default:GGML:AUTO:tinyllama-1.1b-chat-v0.3.Q5_K_M.gguf \
5252
target/wasm32-wasi/release/wasmedge-ggml-llama.wasm \
5353
default \
54-
'<|im_start|>system\nYou are an AI assistant, always answer as short as possible<|im_end|>\n<|im_start|>user\nWhere is the capital of Japan?<|im_end|>\n<|im_start|>assistant'
54+
$'<|im_start|>system\nYou are an AI assistant, always answer as short as possible<|im_end|>\n<|im_start|>user\nWhere is the capital of Japan?<|im_end|>\n<|im_start|>assistant'
55+
56+
- name: Gemma 2B
57+
run: |
58+
cd wasmedge-ggml/gemma
59+
curl -LO https://huggingface.co/second-state/Gemma-2b-it-GGUF/resolve/main/gemma-2b-it-Q5_K_M.gguf
60+
cargo build --target wasm32-wasi --release
61+
wasmedge --dir .:. \
62+
--nn-preload default:GGML:AUTO:gemma-2b-it-Q5_K_M.gguf \
63+
target/wasm32-wasi/release/wasmedge-ggml-gemma.wasm \
64+
default \
65+
'<start_of_turn>user Where is the capital of Japan? <end_of_turn><start_of_turn>model'
66+
67+
- name: Llava v1.5 7B
68+
run: |
69+
cd wasmedge-ggml/llava
70+
curl -LO https://huggingface.co/mys/ggml_llava-v1.5-7b/resolve/main/ggml-model-q5_k.gguf
71+
curl -LO https://huggingface.co/mys/ggml_llava-v1.5-7b/resolve/main/mmproj-model-f16.gguf
72+
curl -LO https://llava-vl.github.io/static/images/monalisa.jpg
73+
cargo build --target wasm32-wasi --release
74+
wasmedge --dir .:. \
75+
--env mmproj=mmproj-model-f16.gguf \
76+
--env image=monalisa.jpg \
77+
--nn-preload default:GGML:AUTO:ggml-model-q5_k.gguf \
78+
target/wasm32-wasi/release/wasmedge-ggml-llava.wasm \
79+
default \
80+
$'You are a helpful, respectful and honest assistant. Always answer as short as possible, while being safe.\nUSER:<image>\nDo you know who drew this painting?\nASSISTANT:'
5581
5682
- name: llama2 7b
5783
run: |
@@ -63,7 +89,7 @@ jobs:
6389
--nn-preload default:GGML:AUTO:llama-2-7b-chat.Q5_K_M.gguf \
6490
target/wasm32-wasi/release/wasmedge-ggml-llama.wasm \
6591
default \
66-
'[INST] <<SYS>>\nYou are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature. If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you do not know the answer to a question, please do not share false information.\n<</SYS>>\nWhat is the capital of Japan?[/INST]'
92+
$'[INST] <<SYS>>\nYou are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature. If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you do not know the answer to a question, please do not share false information.\n<</SYS>>\nWhat is the capital of Japan?[/INST]'
6793
6894
- name: Build llama-stream
6995
run: |
@@ -117,7 +143,34 @@ jobs:
117143
--nn-preload default:GGML:AUTO:tinyllama-1.1b-chat-v0.3.Q5_K_M.gguf \
118144
target/wasm32-wasi/release/wasmedge-ggml-llama.wasm \
119145
default \
120-
'<|im_start|>system\nYou are an AI assistant<|im_end|>\n<|im_start|>user\nWhere is the capital of Japan?<|im_end|>\n<|im_start|>assistant'
146+
$'<|im_start|>system\nYou are an AI assistant<|im_end|>\n<|im_start|>user\nWhere is the capital of Japan?<|im_end|>\n<|im_start|>assistant'
147+
148+
- name: Gemma 2B
149+
run: |
150+
cd wasmedge-ggml/gemma
151+
curl -LO https://huggingface.co/second-state/Gemma-2b-it-GGUF/resolve/main/gemma-2b-it-Q5_K_M.gguf
152+
cargo build --target wasm32-wasi --release
153+
wasmedge --dir .:. \
154+
--env n_gpu_layers=0 \
155+
--nn-preload default:GGML:AUTO:gemma-2b-it-Q5_K_M.gguf \
156+
target/wasm32-wasi/release/wasmedge-ggml-gemma.wasm \
157+
default \
158+
'<start_of_turn>user Where is the capital of Japan? <end_of_turn><start_of_turn>model'
159+
160+
- name: Llava v1.5 7B
161+
run: |
162+
cd wasmedge-ggml/llava
163+
curl -LO https://huggingface.co/mys/ggml_llava-v1.5-7b/resolve/main/ggml-model-q5_k.gguf
164+
curl -LO https://huggingface.co/mys/ggml_llava-v1.5-7b/resolve/main/mmproj-model-f16.gguf
165+
curl -LO https://llava-vl.github.io/static/images/monalisa.jpg
166+
cargo build --target wasm32-wasi --release
167+
wasmedge --dir .:. \
168+
--env mmproj=mmproj-model-f16.gguf \
169+
--env image=monalisa.jpg \
170+
--nn-preload default:GGML:AUTO:ggml-model-q5_k.gguf \
171+
target/wasm32-wasi/release/wasmedge-ggml-llava.wasm \
172+
default \
173+
$'You are a helpful, respectful and honest assistant. Always answer as short as possible, while being safe.\nUSER:<image>\nDo you know who drew this painting?\nASSISTANT:'
121174
122175
- name: llama2 7b
123176
run: |
@@ -129,7 +182,7 @@ jobs:
129182
--nn-preload default:GGML:AUTO:llama-2-7b-chat.Q5_K_M.gguf \
130183
target/wasm32-wasi/release/wasmedge-ggml-llama.wasm \
131184
default \
132-
'[INST] <<SYS>>\nYou are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature. If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you do not know the answer to a question, please do not share false information.\n<</SYS>>\nWhat is the capital of Japan?[/INST]'
185+
$'[INST] <<SYS>>\nYou are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature. If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you do not know the answer to a question, please do not share false information.\n<</SYS>>\nWhat is the capital of Japan?[/INST]'
133186
134187
- name: Build llama-stream
135188
run: |

‎wasmedge-ggml/llava/src/main.rs‎

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,22 @@ fn main() {
7272
.init_execution_context()
7373
.expect("Failed to init context");
7474

75+
// If there is a third argument, use it as the prompt and enter non-interactive mode.
76+
// This is mainly for the CI workflow.
77+
if args.len() >= 3 {
78+
let prompt = &args[2];
79+
println!("Prompt:\n{}", prompt);
80+
let tensor_data = prompt.as_bytes().to_vec();
81+
context
82+
.set_input(0, wasi_nn::TensorType::U8, &[1], &tensor_data)
83+
.expect("Failed to set input");
84+
println!("Response:");
85+
context.compute().expect("Failed to compute");
86+
let output = get_output_from_context(&context);
87+
println!("{}", output.trim());
88+
std::process::exit(0);
89+
}
90+
7591
let mut saved_prompt = String::new();
7692
let system_prompt = String::from("You are a helpful, respectful and honest assistant. Always answer as short as possible, while being safe." );
7793
let image_placeholder = "<image>";

0 commit comments

Comments
 (0)