HyeonJungHam
diff --git a/‎.github/workflows/llama.yml‎
Lines changed: 19 additions & 97 deletions b/‎.github/workflows/llama.yml‎
Lines changed: 19 additions & 97 deletions
diff --git a/‎wasmedge-ggml/llava-base64-stream/Cargo.toml‎
Lines changed: 8 additions & 0 deletions b/‎wasmedge-ggml/llava-base64-stream/Cargo.toml‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎wasmedge-ggml/llava-base64-stream/README.md‎
Lines changed: 30 additions & 0 deletions b/‎wasmedge-ggml/llava-base64-stream/README.md‎
Lines changed: 30 additions & 0 deletions
diff --git a/‎wasmedge-ggml/llava-base64-stream/src/main.rs‎
Lines changed: 187 additions & 0 deletions b/‎wasmedge-ggml/llava-base64-stream/src/main.rs‎
Lines changed: 187 additions & 0 deletions
diff --git a/‎wasmedge-ggml/llava-base64-stream/wasmedge-ggml-llava-base64-stream.wasm‎
3.18 MB b/‎wasmedge-ggml/llava-base64-stream/wasmedge-ggml-llava-base64-stream.wasm‎
3.18 MB
@@ -24,33 +24,37 @@ jobs:
   build:
     strategy:
       matrix:
-        runner: [ubuntu-20.04, macos-13, macos-14]
+        runner: [ubuntu-20.04, macos-13, macos-14, macos-m1]
         job:
           - name: "Tiny Llama"
             run: |
+              test -f ~/.wasmedge/env && source ~/.wasmedge/env
               cd wasmedge-ggml/llama
               curl -LO https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/resolve/main/tinyllama-1.1b-chat-v0.3.Q5_K_M.gguf
               cargo build --target wasm32-wasi --release
               time wasmedge --dir .:. \
+                --env n_gpu_layers="$NGL" \
                 --nn-preload default:GGML:AUTO:tinyllama-1.1b-chat-v0.3.Q5_K_M.gguf \
                 target/wasm32-wasi/release/wasmedge-ggml-llama.wasm \
                 default \
                 $'<|im_start|>system\nYou are an AI assistant<|im_end|>\n<|im_start|>user\nWhere is the capital of Japan?<|im_end|>\n<|im_start|>assistant'
 
           - name: Gemma 2B
             run: |
+              test -f ~/.wasmedge/env && source ~/.wasmedge/env
               cd wasmedge-ggml/gemma
               curl -LO https://huggingface.co/second-state/Gemma-2b-it-GGUF/resolve/main/gemma-2b-it-Q5_K_M.gguf
               cargo build --target wasm32-wasi --release
               time wasmedge --dir .:. \
-                --env n_gpu_layers=0 \
+                --env n_gpu_layers="$NGL" \
                 --nn-preload default:GGML:AUTO:gemma-2b-it-Q5_K_M.gguf \
                 target/wasm32-wasi/release/wasmedge-ggml-gemma.wasm \
                 default \
                 '<start_of_turn>user Where is the capital of Japan? <end_of_turn><start_of_turn>model'
 
           - name: Llava v1.5 7B
             run: |
+              test -f ~/.wasmedge/env && source ~/.wasmedge/env
               cd wasmedge-ggml/llava
               curl -LO https://huggingface.co/mys/ggml_llava-v1.5-7b/resolve/main/ggml-model-q5_k.gguf
               curl -LO https://huggingface.co/mys/ggml_llava-v1.5-7b/resolve/main/mmproj-model-f16.gguf
@@ -59,14 +63,15 @@ jobs:
               time wasmedge --dir .:. \
                 --env mmproj=mmproj-model-f16.gguf \
                 --env image=monalisa.jpg \
-                --env n_gpu_layers=0 \
+                --env n_gpu_layers="$NGL" \
                 --nn-preload default:GGML:AUTO:ggml-model-q5_k.gguf \
                 target/wasm32-wasi/release/wasmedge-ggml-llava.wasm \
                 default \
                 $'You are a helpful, respectful and honest assistant. Always answer as short as possible, while being safe.\nUSER:<image>\nDo you know who drew this painting?\nASSISTANT:'
 
           - name: Llava v1.6 7B
             run: |
+              test -f ~/.wasmedge/env && source ~/.wasmedge/env
               cd wasmedge-ggml/llava
               curl -LO https://huggingface.co/cmp-nct/llava-1.6-gguf/resolve/main/vicuna-7b-q5_k.gguf
               curl -LO https://huggingface.co/cmp-nct/llava-1.6-gguf/resolve/main/mmproj-vicuna7b-f16.gguf
@@ -76,18 +81,20 @@ jobs:
                 --env mmproj=mmproj-vicuna7b-f16.gguf \
                 --env image=monalisa.jpg \
                 --env ctx_size=4096 \
-                --env n_gpu_layers=0 \
+                --env n_gpu_layers="$NGL" \
                 --nn-preload default:GGML:AUTO:vicuna-7b-q5_k.gguf \
                 target/wasm32-wasi/release/wasmedge-ggml-llava.wasm \
                 default \
                 $'You are a helpful, respectful and honest assistant. Always answer as short as possible, while being safe.\nUSER:<image>\nDo you know who drew this painting?\nASSISTANT:'
 
           - name: Llama2 7B
             run: |
+              test -f ~/.wasmedge/env && source ~/.wasmedge/env
               cd wasmedge-ggml/llama
               curl -LO https://huggingface.co/TheBloke/Llama-2-7b-Chat-GGUF/resolve/main/llama-2-7b-chat.Q5_K_M.gguf
               cargo build --target wasm32-wasi --release
               time wasmedge --dir .:. \
+                --env n_gpu_layers="$NGL" \
                 --nn-preload default:GGML:AUTO:llama-2-7b-chat.Q5_K_M.gguf \
                 target/wasm32-wasi/release/wasmedge-ggml-llama.wasm \
                 default \
@@ -108,102 +115,14 @@ jobs:
               cd wasmedge-ggml/embedding
               cargo build --target wasm32-wasi --release
 
-    name: ${{ matrix.runner }} - ${{ matrix.job.name }}
-    runs-on: ${{ matrix.runner }}
-    steps:
-    - uses: actions/checkout@v4
-    - uses: actions-rust-lang/setup-rust-toolchain@v1
-    - name: Install Rust target for wasm
-      run: |
-        rustup target add wasm32-wasi
-
-    - name: Install WasmEdge + WASI-NN + GGML
-      run: |
-        VERSION=0.13.5
-        curl -sSf https://raw.githubusercontent.com/WasmEdge/WasmEdge/master/utils/install.sh | sudo bash -s -- -v $VERSION --plugins wasi_nn-ggml -p /usr/local
-
-    - name: ${{ matrix.job.name }}
-      run: ${{ matrix.job.run }}
-
-  m1:
-    strategy:
-      matrix:
-        runner: [macos-m1]
-        job:
-          - name: "Tiny Llama"
-            run: |
-              source ~/.wasmedge/env
-              cd wasmedge-ggml/llama
-              curl -LO https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/resolve/main/tinyllama-1.1b-chat-v0.3.Q5_K_M.gguf
-              cargo build --target wasm32-wasi --release
-              time wasmedge --dir .:. \
-                --nn-preload default:GGML:AUTO:tinyllama-1.1b-chat-v0.3.Q5_K_M.gguf \
-                --env n_gpu_layers=100 \
-                target/wasm32-wasi/release/wasmedge-ggml-llama.wasm \
-                default \
-                $'<|im_start|>system\nYou are an AI assistant<|im_end|>\n<|im_start|>user\nWhere is the capital of Japan?<|im_end|>\n<|im_start|>assistant'
-
-          - name: Gemma 2B
-            run: |
-              source ~/.wasmedge/env
-              cd wasmedge-ggml/gemma
-              curl -LO https://huggingface.co/second-state/Gemma-2b-it-GGUF/resolve/main/gemma-2b-it-Q5_K_M.gguf
-              cargo build --target wasm32-wasi --release
-              time wasmedge --dir .:. \
-                --env n_gpu_layers=100 \
-                --nn-preload default:GGML:AUTO:gemma-2b-it-Q5_K_M.gguf \
-                target/wasm32-wasi/release/wasmedge-ggml-gemma.wasm \
-                default \
-                '<start_of_turn>user Where is the capital of Japan? <end_of_turn><start_of_turn>model'
-
-          - name: Llava v1.5 7B
-            run: |
-              source ~/.wasmedge/env
-              cd wasmedge-ggml/llava
-              curl -LO https://huggingface.co/mys/ggml_llava-v1.5-7b/resolve/main/ggml-model-q5_k.gguf
-              curl -LO https://huggingface.co/mys/ggml_llava-v1.5-7b/resolve/main/mmproj-model-f16.gguf
-              curl -LO https://llava-vl.github.io/static/images/monalisa.jpg
-              cargo build --target wasm32-wasi --release
-              time wasmedge --dir .:. \
-                --env mmproj=mmproj-model-f16.gguf \
-                --env image=monalisa.jpg \
-                --env ctx_size=2048 \
-                --env n_gpu_layers=100 \
-                --nn-preload default:GGML:AUTO:ggml-model-q5_k.gguf \
-                target/wasm32-wasi/release/wasmedge-ggml-llava.wasm \
-                default \
-                $'You are a helpful, respectful and honest assistant. Always answer as short as possible, while being safe.\nUSER:<image>\nDo you know who drew this painting?\nASSISTANT:'
-
-          - name: Llava v1.6 7B
+          - name: Build llava-base64-stream
             run: |
-              source ~/.wasmedge/env
-              cd wasmedge-ggml/llava
-              curl -LO https://huggingface.co/cmp-nct/llava-1.6-gguf/resolve/main/vicuna-7b-q5_k.gguf
-              curl -LO https://huggingface.co/cmp-nct/llava-1.6-gguf/resolve/main/mmproj-vicuna7b-f16.gguf
-              curl -LO https://llava-vl.github.io/static/images/monalisa.jpg
+              cd wasmedge-ggml/llava-base64-stream
               cargo build --target wasm32-wasi --release
-              time wasmedge --dir .:. \
-                --env mmproj=mmproj-vicuna7b-f16.gguf \
-                --env image=monalisa.jpg \
-                --env ctx_size=4096 \
-                --env n_gpu_layers=100 \
-                --nn-preload default:GGML:AUTO:vicuna-7b-q5_k.gguf \
-                target/wasm32-wasi/release/wasmedge-ggml-llava.wasm \
-                default \
-                $'You are a helpful, respectful and honest assistant. Always answer as short as possible, while being safe.\nUSER:<image>\nDo you know who drew this painting?\nASSISTANT:'
 
-          - name: Llama2 7B
-            run: |
-              source ~/.wasmedge/env
-              cd wasmedge-ggml/llama
-              curl -LO https://huggingface.co/TheBloke/Llama-2-7b-Chat-GGUF/resolve/main/llama-2-7b-chat.Q5_K_M.gguf
-              cargo build --target wasm32-wasi --release
-              time wasmedge --dir .:. \
-                --nn-preload default:GGML:AUTO:llama-2-7b-chat.Q5_K_M.gguf \
-                --env n_gpu_layers=100 \
-                target/wasm32-wasi/release/wasmedge-ggml-llama.wasm \
-                default \
-                $'[INST] <<SYS>>\nYou are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe.  Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature. If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you do not know the answer to a question, please do not share false information.\n<</SYS>>\nWhat is the capital of Japan?[/INST]'
+        include:
+          - runner: macos-m1
+            ngl: 100
 
     name: ${{ matrix.runner }} - ${{ matrix.job.name }}
     runs-on: ${{ matrix.runner }}
@@ -219,5 +138,8 @@ jobs:
         VERSION=0.13.5
         curl -sSf https://raw.githubusercontent.com/WasmEdge/WasmEdge/master/utils/install.sh | bash -s -- -v $VERSION --plugins wasi_nn-ggml
 
+    - name: Set environment variable
+      run: echo "NGL=${{ matrix.ngl || 0 }}" >> $GITHUB_ENV
+
     - name: ${{ matrix.job.name }}
       run: ${{ matrix.job.run }}
@@ -0,0 +1,8 @@
+[package]
+name = "wasmedge-ggml-llava-base64-stream"
+version = "0.1.0"
+edition = "2021"
+
+[dependencies]
+serde_json = "1.0"
+wasi-nn = { git = "https://github.com/second-state/wasmedge-wasi-nn", branch = "ggml" }
@@ -0,0 +1,30 @@
+# Llava Example For WASI-NN with GGML Backend
+
+> [!NOTE]
+> Please refer to the [wasmedge-ggml/README.md](../README.md) for the general introduction and the setup of the WASI-NN plugin with GGML backend. This document will focus on the specific example of the Llava model.
+> Refer to the [wasmedge-ggml/llava/README.md](../llava/README.md) for downloading Llava models and execution commands.
+
+This example is to demonstrate the usage of the Llava model inference with inline base64 encoded image. Here we hardcode the base64 encoded image in the source code.
+
+## Execute
+
+Execute the WASM with the `wasmedge` using the named model feature to preload a large model:
+
+> [!NOTE]
+> You may see some warnings stating `key clip.vision.* not found in file.` when using llava v1.5 models. These are expected and can be ignored.
+
+```console
+$ wasmedge --dir .:. \
+  --env mmproj=mmproj-model-f16.gguf \
+  --nn-preload default:GGML:AUTO:ggml-model-q5_k.gguf \
+  wasmedge-ggml-llava-base64-stream.wasm default
+
+USER:
+what is in this picture?
+ASSISTANT:
+The image showcases a bowl filled with an assortment of fresh berries, including several strawberries and blueberries. A person is standing close to the bowl, holding it in their hand or about to grab some fruit from it. The colorful fruit arrangement adds vibrancy to the scene.
+USER:
+please tell me a kind of fruit that is not in the picture
+ASSISTANT:
+There are no bananas in the picture.
+```