7802 lines
222 KiB
Plaintext
7802 lines
222 KiB
Plaintext
Directory structure:
|
||
└── examples/
|
||
├── README.md
|
||
├── abort-reload/
|
||
│ ├── README.md
|
||
│ ├── package.json
|
||
│ └── src/
|
||
│ ├── get_started.html
|
||
│ └── get_started.js
|
||
├── cache-usage/
|
||
│ ├── README.md
|
||
│ ├── package.json
|
||
│ └── src/
|
||
│ ├── cache_usage.html
|
||
│ └── cache_usage.ts
|
||
├── chrome-extension/
|
||
│ ├── README.md
|
||
│ ├── package.json
|
||
│ └── src/
|
||
│ ├── content.js
|
||
│ ├── example.html
|
||
│ ├── manifest.json
|
||
│ ├── manifest_v2.json
|
||
│ ├── popup.css
|
||
│ ├── popup.html
|
||
│ └── popup.ts
|
||
├── chrome-extension-webgpu-service-worker/
|
||
│ ├── README.md
|
||
│ ├── package.json
|
||
│ └── src/
|
||
│ ├── background.ts
|
||
│ ├── content.js
|
||
│ ├── example.html
|
||
│ ├── manifest.json
|
||
│ ├── popup.css
|
||
│ ├── popup.html
|
||
│ └── popup.ts
|
||
├── embeddings/
|
||
│ ├── README.md
|
||
│ ├── package.json
|
||
│ └── src/
|
||
│ ├── embeddings.html
|
||
│ └── embeddings.ts
|
||
├── function-calling/
|
||
│ ├── README.md
|
||
│ ├── function-calling-manual/
|
||
│ │ ├── README.md
|
||
│ │ ├── package.json
|
||
│ │ └── src/
|
||
│ │ ├── function_calling_manual.html
|
||
│ │ └── function_calling_manual.ts
|
||
│ └── function-calling-openai/
|
||
│ ├── README.md
|
||
│ ├── package.json
|
||
│ └── src/
|
||
│ ├── function_calling_openai.html
|
||
│ └── function_calling_openai.ts
|
||
├── get-started/
|
||
│ ├── README.md
|
||
│ ├── package.json
|
||
│ └── src/
|
||
│ ├── get_started.html
|
||
│ └── get_started.ts
|
||
├── get-started-latency-breakdown/
|
||
│ ├── README.md
|
||
│ ├── package.json
|
||
│ └── src/
|
||
│ ├── get_started_latency_breakdown.html
|
||
│ └── get_started_latency_breakdown.ts
|
||
├── get-started-web-worker/
|
||
│ ├── README.md
|
||
│ ├── package.json
|
||
│ └── src/
|
||
│ ├── get_started.html
|
||
│ ├── main.ts
|
||
│ └── worker.ts
|
||
├── json-mode/
|
||
│ ├── README.md
|
||
│ ├── package.json
|
||
│ └── src/
|
||
│ ├── json_mode.html
|
||
│ └── json_mode.ts
|
||
├── json-schema/
|
||
│ ├── README.md
|
||
│ ├── package.json
|
||
│ └── src/
|
||
│ ├── json_schema.html
|
||
│ └── json_schema.ts
|
||
├── logit-processor/
|
||
│ ├── README.md
|
||
│ ├── package.json
|
||
│ └── src/
|
||
│ ├── logit_processor.html
|
||
│ ├── logit_processor.ts
|
||
│ ├── my_logit_processor.ts
|
||
│ └── worker.ts
|
||
├── multi-models/
|
||
│ ├── README.md
|
||
│ ├── package.json
|
||
│ └── src/
|
||
│ ├── main.ts
|
||
│ ├── multi_models.html
|
||
│ └── worker.ts
|
||
├── multi-round-chat/
|
||
│ ├── README.md
|
||
│ ├── package.json
|
||
│ └── src/
|
||
│ ├── multi_round_chat.html
|
||
│ └── multi_round_chat.ts
|
||
├── next-simple-chat/
|
||
│ ├── README.md
|
||
│ ├── next.config.js
|
||
│ ├── package.json
|
||
│ ├── postcss.config.js
|
||
│ ├── tailwind.config.js
|
||
│ ├── tsconfig.json
|
||
│ └── src/
|
||
│ ├── pages/
|
||
│ │ ├── _app.tsx
|
||
│ │ ├── _document.tsx
|
||
│ │ ├── index.tsx
|
||
│ │ └── api/
|
||
│ │ └── hello.ts
|
||
│ ├── styles/
|
||
│ │ └── globals.css
|
||
│ └── utils/
|
||
│ ├── chat_component.tsx
|
||
│ └── chat_ui.ts
|
||
├── qwen3/
|
||
│ ├── README.md
|
||
│ ├── package.json
|
||
│ └── src/
|
||
│ ├── qwen3_example.html
|
||
│ └── qwen3_example.ts
|
||
├── seed-to-reproduce/
|
||
│ ├── README.md
|
||
│ ├── package.json
|
||
│ └── src/
|
||
│ ├── seed.html
|
||
│ └── seed.ts
|
||
├── service-worker/
|
||
│ ├── README.md
|
||
│ ├── package.json
|
||
│ └── src/
|
||
│ ├── index.html
|
||
│ ├── main.ts
|
||
│ └── sw.ts
|
||
├── simple-chat-js/
|
||
│ ├── index.css
|
||
│ ├── index.html
|
||
│ └── index.js
|
||
├── simple-chat-ts/
|
||
│ ├── README.md
|
||
│ ├── package.json
|
||
│ └── src/
|
||
│ ├── gh-config.js
|
||
│ ├── llm_chat.css
|
||
│ ├── llm_chat.html
|
||
│ ├── simple_chat.ts
|
||
│ └── worker.ts
|
||
├── simple-chat-upload/
|
||
│ ├── README.md
|
||
│ ├── package.json
|
||
│ └── src/
|
||
│ ├── gh-config.js
|
||
│ ├── llm_chat.css
|
||
│ ├── llm_chat.html
|
||
│ ├── simple_chat.ts
|
||
│ └── worker.ts
|
||
├── streaming/
|
||
│ ├── README.md
|
||
│ ├── package.json
|
||
│ └── src/
|
||
│ ├── streaming.html
|
||
│ └── streaming.ts
|
||
├── text-completion/
|
||
│ ├── README.md
|
||
│ ├── package.json
|
||
│ └── src/
|
||
│ ├── text_completion.html
|
||
│ └── text_completion.ts
|
||
└── vision-model/
|
||
├── README.md
|
||
├── package.json
|
||
└── src/
|
||
├── utils.ts
|
||
├── vision_model.html
|
||
├── vision_model.ts
|
||
└── worker.ts
|
||
|
||
================================================
|
||
FILE: examples/README.md
|
||
================================================
|
||
# Awesome WebLLM
|
||
|
||
This page contains a curated list of examples, tutorials, blogs about WebLLM usecases.
|
||
Please send a pull request if you find things that belongs to here.
|
||
|
||
## Example Projects
|
||
|
||
Note that all examples below run in-browser and use WebGPU as a backend.
|
||
|
||
#### Project List
|
||
|
||
- [get-started](get-started): minimum get started example with chat completion.
|
||
|
||
[](https://jsfiddle.net/neetnestor/yac9gbwf/)
|
||
[](https://codepen.io/neetnestor/pen/NWVdgey)
|
||
|
||
- [simple-chat-js](simple-chat-js): a mininum and complete chat bot app in vanilla JavaScript.
|
||
|
||
[](https://jsfiddle.net/neetnestor/4nmgvsa2/)
|
||
[](https://codepen.io/neetnestor/pen/vYwgZaG)
|
||
|
||
- [simple-chat-ts](simple-chat-ts): a mininum and complete chat bot app in TypeScript.
|
||
- [get-started-web-worker](get-started-web-worker): same as get-started, but using web worker.
|
||
- [next-simple-chat](next-simple-chat): a mininum and complete chat bot app with [Next.js](https://nextjs.org/).
|
||
- [multi-round-chat](multi-round-chat): while APIs are functional, we internally optimize so that multi round chat usage can reuse KV cache
|
||
- [text-completion](text-completion): demonstrates API `engine.completions.create()`, which is pure text completion with no conversation, as opposed to `engine.chat.completions.create()`
|
||
- [embeddings](embeddings): demonstrates API `engine.embeddings.create()`, integration with `EmbeddingsInterface` and `MemoryVectorStore` of [Langchain.js](js.langchain.com), and RAG with Langchain.js using WebLLM for both LLM and Embedding in a single engine
|
||
- [multi-models](multi-models): demonstrates loading multiple models in a single engine concurrently
|
||
|
||
#### Advanced OpenAI API Capabilities
|
||
|
||
These examples demonstrate various capabilities via WebLLM's OpenAI-like API.
|
||
|
||
- [streaming](streaming): return output as chunks in real-time in the form of an AsyncGenerator
|
||
- [json-mode](json-mode): efficiently ensure output is in json format, see [OpenAI Reference](https://platform.openai.com/docs/guides/text-generation/chat-completions-api) for more.
|
||
- [json-schema](json-schema): besides guaranteeing output to be in JSON, ensure output to adhere to a specific JSON schema specified the user
|
||
- [seed-to-reproduce](seed-to-reproduce): use seeding to ensure reproducible output with fields `seed`.
|
||
- [function-calling](function-calling) (WIP): function calling with fields `tools` and `tool_choice` (with preliminary support).
|
||
- [vision-model](vision-model): process request with image input using Vision Language Model (e.g. Phi3.5-vision)
|
||
|
||
#### Chrome Extension
|
||
|
||
- [chrome-extension](chrome-extension): chrome extension that does not have a persistent background
|
||
- [chrome-extension-webgpu-service-worker](chrome-extension-webgpu-service-worker): chrome extension using service worker, hence having a persistent background
|
||
|
||
#### Others
|
||
|
||
- [logit-processor](logit-processor): while `logit_bias` is supported, we additionally support stateful logit processing where users can specify their own rules. We also expose low-level API `forwardTokensAndSample()`.
|
||
- [cache-usage](cache-usage): demonstrates how WebLLM supports both the [Cache API](https://developer.mozilla.org/en-US/docs/Web/API/Cache) and [IndexedDB cache](https://developer.mozilla.org/en-US/docs/Web/API/IndexedDB_API), and
|
||
users can pick with `appConfig.useIndexedDBCache`. Also demonstrates various cache utils such as checking
|
||
whether a model is cached, deleting a model's weights from cache, deleting a model library wasm from cache, etc.
|
||
- [simple-chat-upload](simple-chat-upload): demonstrates how to upload local models to WebLLM instead of downloading via a URL link
|
||
|
||
## Demo Spaces
|
||
|
||
- [web-llm-embed](https://huggingface.co/spaces/matthoffner/web-llm-embed): document chat prototype using react-llm with transformers.js embeddings
|
||
- [DeVinci](https://x6occ-biaaa-aaaai-acqzq-cai.icp0.io/): AI chat app based on WebLLM and hosted on decentralized cloud platform
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/abort-reload/README.md
|
||
================================================
|
||
# WebLLM Get Started App
|
||
|
||
This folder provides a demo for cancelling model fetching after calling `engine.reload()`.
|
||
|
||
```bash
|
||
npm install
|
||
npm start
|
||
```
|
||
|
||
Note if you would like to hack WebLLM core package.
|
||
You can change web-llm dependencies as `"file:../.."`, and follow the build from source
|
||
instruction in the project to build webllm locally. This option is only recommended
|
||
if you would like to hack WebLLM core package.
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/abort-reload/package.json
|
||
================================================
|
||
{
|
||
"name": "get-started",
|
||
"version": "0.1.0",
|
||
"private": true,
|
||
"scripts": {
|
||
"start": "parcel src/get_started.html --port 8887",
|
||
"build": "parcel build src/get_started.html --dist-dir lib"
|
||
},
|
||
"devDependencies": {
|
||
"buffer": "^5.7.1",
|
||
"parcel": "^2.8.3",
|
||
"process": "^0.11.10",
|
||
"tslib": "^2.3.1",
|
||
"typescript": "^4.9.5",
|
||
"url": "^0.11.3"
|
||
},
|
||
"dependencies": {
|
||
"@mlc-ai/web-llm": "^0.2.80"
|
||
}
|
||
}
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/abort-reload/src/get_started.html
|
||
================================================
|
||
<!doctype html>
|
||
<html>
|
||
<script>
|
||
webLLMGlobal = {};
|
||
</script>
|
||
<body>
|
||
<h2>WebLLM Test Page</h2>
|
||
Open console to see output
|
||
<br />
|
||
<br />
|
||
<label id="init-label"> </label>
|
||
|
||
<h3>Prompt</h3>
|
||
<label id="prompt-label"> </label>
|
||
|
||
<h3>Response</h3>
|
||
<label id="generate-label"> </label>
|
||
<br />
|
||
<label id="stats-label"> </label>
|
||
|
||
<script type="module" src="./get_started.js"></script>
|
||
</body>
|
||
</html>
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/abort-reload/src/get_started.js
|
||
================================================
|
||
import * as webllm from "@mlc-ai/web-llm";
|
||
import { error } from "loglevel";
|
||
|
||
let engine;
|
||
|
||
function setLabel(id, text) {
|
||
const label = document.getElementById(id);
|
||
if (label == null) {
|
||
throw Error("Cannot find label " + id);
|
||
}
|
||
label.innerText = text;
|
||
}
|
||
|
||
async function main() {
|
||
const initProgressCallback = (report) => {
|
||
console.log(report.text);
|
||
setLabel("init-label", report.text);
|
||
};
|
||
// Option 1: If we do not specify appConfig, we use `prebuiltAppConfig` defined in `config.ts`
|
||
const selectedModel = "Llama-3.1-8B-Instruct-q4f32_1-MLC";
|
||
engine = new webllm.MLCEngine({
|
||
initProgressCallback,
|
||
});
|
||
engine.reload(selectedModel);
|
||
}
|
||
main();
|
||
setTimeout(() => {
|
||
console.log("calling unload");
|
||
engine.unload().catch((err) => {
|
||
console.log(err);
|
||
});
|
||
}, 5000);
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/cache-usage/README.md
|
||
================================================
|
||
# WebLLM Cache Usage
|
||
|
||
WebLLM supports both the Cache API and IndexedDB, which you can specify via `AppConfig.useIndexedDBCache`.
|
||
This folder provides an example on how Cache and IndexedDB Cache are used in WebLLM. We also
|
||
demonstrate the utility cache functions such as deleting models, checking if models are in cache, etc.
|
||
|
||
For more information about the two caches, see: https://developer.mozilla.org/en-US/docs/Web/API/Storage_API/Storage_quotas_and_eviction_criteria#what_technologies_store_data_in_the_browser.
|
||
|
||
To inspect the downloaded artifacts in your browser, open up developer console, go to application,
|
||
and you will find the artifacts under either `IndexedDB` or `Cache storage`.
|
||
|
||
To run the exapmle, you can do the following steps under this folder
|
||
|
||
```bash
|
||
npm install
|
||
npm start
|
||
```
|
||
|
||
Note if you would like to hack WebLLM core package.
|
||
You can change web-llm dependencies as `"file:../.."`, and follow the build from source
|
||
instruction in the project to build webllm locally. This option is only recommended
|
||
if you would like to hack WebLLM core package.
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/cache-usage/package.json
|
||
================================================
|
||
{
|
||
"name": "cache-usage",
|
||
"version": "0.1.0",
|
||
"private": true,
|
||
"scripts": {
|
||
"start": "parcel src/cache_usage.html --port 8888",
|
||
"build": "parcel build src/cache_usage.html --dist-dir lib"
|
||
},
|
||
"devDependencies": {
|
||
"buffer": "^5.7.1",
|
||
"parcel": "^2.8.3",
|
||
"process": "^0.11.10",
|
||
"tslib": "^2.3.1",
|
||
"typescript": "^4.9.5",
|
||
"url": "^0.11.3"
|
||
},
|
||
"dependencies": {
|
||
"@mlc-ai/web-llm": "^0.2.80"
|
||
}
|
||
}
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/cache-usage/src/cache_usage.html
|
||
================================================
|
||
<!doctype html>
|
||
<html>
|
||
<script>
|
||
webLLMGlobal = {};
|
||
</script>
|
||
|
||
<body>
|
||
<h2>WebLLM Test Page</h2>
|
||
Open console to see output
|
||
<br />
|
||
<br />
|
||
<label id="init-label"> </label>
|
||
|
||
<h3>Prompt</h3>
|
||
<label id="prompt-label"> </label>
|
||
|
||
<h3>Response</h3>
|
||
<label id="generate-label"> </label>
|
||
<br />
|
||
<label id="stats-label"> </label>
|
||
|
||
<script type="module" src="./cache_usage.ts"></script>
|
||
</body>
|
||
</html>
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/cache-usage/src/cache_usage.ts
|
||
================================================
|
||
import * as webllm from "@mlc-ai/web-llm";
|
||
|
||
function setLabel(id: string, text: string) {
|
||
const label = document.getElementById(id);
|
||
if (label == null) {
|
||
throw Error("Cannot find label " + id);
|
||
}
|
||
label.innerText = text;
|
||
}
|
||
|
||
const initProgressCallback = (report: webllm.InitProgressReport) => {
|
||
setLabel("init-label", report.text);
|
||
};
|
||
|
||
async function main() {
|
||
const appConfig = webllm.prebuiltAppConfig;
|
||
// CHANGE THIS TO SEE EFFECTS OF BOTH, CODE BELOW DO NOT NEED TO CHANGE
|
||
appConfig.useIndexedDBCache = true;
|
||
|
||
if (appConfig.useIndexedDBCache) {
|
||
console.log("Using IndexedDB Cache");
|
||
} else {
|
||
console.log("Using Cache API");
|
||
}
|
||
|
||
// 1. This triggers downloading and caching the model with either Cache or IndexedDB Cache
|
||
const selectedModel = "phi-2-q4f16_1-MLC";
|
||
const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine(
|
||
selectedModel,
|
||
{ initProgressCallback: initProgressCallback, appConfig: appConfig },
|
||
);
|
||
|
||
const request: webllm.ChatCompletionRequest = {
|
||
stream: false,
|
||
messages: [
|
||
{
|
||
role: "user",
|
||
content: "Write an analogy between mathematics and a lighthouse.",
|
||
},
|
||
],
|
||
n: 1,
|
||
};
|
||
let reply = await engine.chat.completions.create(request);
|
||
console.log(reply);
|
||
|
||
// 2. Check whether model weights are cached
|
||
let modelCached = await webllm.hasModelInCache(selectedModel, appConfig);
|
||
console.log("hasModelInCache: ", modelCached);
|
||
if (!modelCached) {
|
||
throw Error("Expect hasModelInCache() to be true, but got: " + modelCached);
|
||
}
|
||
|
||
// 3. We reload, and we should see this time it is much faster because the weights are cached.
|
||
console.log("Reload model start");
|
||
await engine.reload(selectedModel);
|
||
console.log("Reload model end");
|
||
reply = await engine.chat.completions.create(request);
|
||
console.log(reply);
|
||
|
||
// 4. Delete every thing about this model from cache
|
||
// You can also delete only the model library wasm, only the model weights, or only the config file
|
||
await webllm.deleteModelAllInfoInCache(selectedModel, appConfig);
|
||
modelCached = await webllm.hasModelInCache(selectedModel, appConfig);
|
||
console.log("After deletion, hasModelInCache: ", modelCached);
|
||
if (modelCached) {
|
||
throw Error(
|
||
"Expect hasModelInCache() to be false, but got: " + modelCached,
|
||
);
|
||
}
|
||
|
||
// 5. If we reload, we should expect the model to start downloading again
|
||
console.log("Reload model start");
|
||
await engine.reload(selectedModel);
|
||
console.log("Reload model end");
|
||
reply = await engine.chat.completions.create(request);
|
||
console.log(reply);
|
||
}
|
||
|
||
main();
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/chrome-extension/README.md
|
||
================================================
|
||
# WebLLM Chrome Extension
|
||
|
||

|
||
|
||
To run the extension, do the following steps under this folder
|
||
|
||
```bash
|
||
npm install
|
||
npm run build
|
||
```
|
||
|
||
This will create a new directory at `chrome-extension/dist/`. To load the extension into Chrome, go to Extensions > Manage Extensions and select Load Unpacked. Add the `chrome-extension/dist/` directory. You can now pin the extension to your toolbar and use the drop-down menu to chat with your favorite model!
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/chrome-extension/package.json
|
||
================================================
|
||
{
|
||
"name": "chrome-extension",
|
||
"version": "1.0.1",
|
||
"description": "",
|
||
"private": true,
|
||
"scripts": {
|
||
"build": "parcel build src/manifest.json --config @parcel/config-webextension"
|
||
},
|
||
"author": "",
|
||
"license": "ISC",
|
||
"devDependencies": {
|
||
"@parcel/config-webextension": "^2.9.3",
|
||
"@types/chrome": "^0.0.242",
|
||
"buffer": "^6.0.3",
|
||
"parcel": "^2.9.3",
|
||
"process": "^0.11.10",
|
||
"url": "^0.11.1"
|
||
},
|
||
"dependencies": {
|
||
"@mlc-ai/web-llm": "^0.2.80",
|
||
"progressbar.js": "^1.1.0"
|
||
}
|
||
}
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/chrome-extension/src/content.js
|
||
================================================
|
||
// Only the content script is able to access the DOM
|
||
chrome.runtime.onConnect.addListener(function (port) {
|
||
port.onMessage.addListener(function (msg) {
|
||
port.postMessage({ contents: document.body.innerText });
|
||
});
|
||
});
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/chrome-extension/src/example.html
|
||
================================================
|
||
In the year 2154, humanity had colonized several planets in the distant reaches
|
||
of the galaxy. The planet of Xylophia-IV was one of the most remote and
|
||
inhospitable, with temperatures often dropping to -200 degrees Celsius. Despite
|
||
these harsh conditions, a team of scientists had established a research station
|
||
on the planet to study the unique geological formations and exotic flora and
|
||
fauna. One day, while conducting a routine survey of the planet's surface, the
|
||
team discovered an strange object buried deep in the ice. As they examined it
|
||
closer, they realized it was a small, metallic capsule with a glowing blue
|
||
symbol etched onto its surface. The team's leader, a brilliant scientist named
|
||
Dr. Maria Rodriguez, was immediately intrigued by the capsule's mysterious
|
||
origins. She ordered her team to bring it back to the research station for
|
||
further analysis. After weeks of studying the capsule, the team finally cracked
|
||
the code to the symbol etched onto its surface. It was a message from an alien
|
||
race, warning Earth of an impending attack from an unknown threat. The team was
|
||
shocked and dismayed by the news, but they knew they had to act quickly to warn
|
||
the rest of humanity. They transmitted the message to the nearest space station,
|
||
which relayed it to Earth's government. As the threat of attack loomed near, the
|
||
team remained on high alert, ready to face whatever dangers lay ahead. They had
|
||
uncovered a secrets of the universe, and now they were determined to protect
|
||
their planet and its inhabitants at all costs.
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/chrome-extension/src/manifest.json
|
||
================================================
|
||
{
|
||
"manifest_version": 3,
|
||
"name": "MLCBot",
|
||
"version": "0.1.1",
|
||
"description": "Chat with your browser",
|
||
"icons": {
|
||
"16": "icons/icon-16.png",
|
||
"32": "icons/icon-32.png",
|
||
"64": "icons/icon-64.png",
|
||
"128": "icons/icon-128.png"
|
||
},
|
||
"content_security_policy": {
|
||
"extension_pages": "style-src-elem 'self' https://cdnjs.cloudflare.com; font-src 'self' https://cdnjs.cloudflare.com; script-src 'self' 'wasm-unsafe-eval'; default-src 'self' data:; connect-src 'self' data: http://localhost:8000 https://huggingface.co https://cdn-lfs.huggingface.co https://cdn-lfs-us-1.huggingface.co https://raw.githubusercontent.com https://cdn-lfs-us-1.hf.co"
|
||
},
|
||
"action": {
|
||
"default_title": "MLCBot",
|
||
"default_popup": "popup.html"
|
||
},
|
||
"content_scripts": [
|
||
{
|
||
"matches": ["<all_urls>"],
|
||
"js": ["content.js"]
|
||
}
|
||
],
|
||
"permissions": ["storage", "tabs", "webNavigation", "activeTab", "scripting"],
|
||
"host_permissions": ["http://*/", "https://*/"]
|
||
}
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/chrome-extension/src/manifest_v2.json
|
||
================================================
|
||
{
|
||
"manifest_version": 2,
|
||
"name": "MLCBot",
|
||
"version": "0.1.0",
|
||
"description": "Chat with your browser",
|
||
"icons": {
|
||
"16": "icons/icon-16.png",
|
||
"32": "icons/icon-32.png",
|
||
"64": "icons/icon-64.png",
|
||
"128": "icons/icon-128.png"
|
||
},
|
||
"content_security_policy": "style-src-elem 'self' https://cdnjs.cloudflare.com; font-src 'self' https://cdnjs.cloudflare.com; script-src 'self' 'unsafe-eval' 'wasm-unsafe-eval'; default-src 'self' data:; connect-src 'self' data: http://localhost:8000 https://huggingface.co https://cdn-lfs.huggingface.co https://raw.githubusercontent.com https://cdn-lfs-us-1.hf.co",
|
||
"browser_action": {
|
||
"default_popup": "popup.html"
|
||
},
|
||
"content_scripts": [
|
||
{
|
||
"matches": ["<all_urls>"],
|
||
"js": ["content.js"]
|
||
}
|
||
],
|
||
"permissions": ["storage", "tabs", "webNavigation", "activeTab"]
|
||
}
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/chrome-extension/src/popup.css
|
||
================================================
|
||
*,
|
||
*::before,
|
||
*::after {
|
||
margin: 0;
|
||
padding: 0;
|
||
box-sizing: border-box;
|
||
}
|
||
|
||
html {
|
||
font-family:
|
||
-apple-system,
|
||
BlinkMacSystemFont,
|
||
Segoe UI,
|
||
Helvetica,
|
||
Arial,
|
||
sans-serif;
|
||
color: #222;
|
||
}
|
||
|
||
body {
|
||
margin: 0;
|
||
padding: 0.5rem;
|
||
background-color: #778da9;
|
||
width: 335px;
|
||
font-size: small;
|
||
}
|
||
|
||
p {
|
||
margin: 0;
|
||
}
|
||
|
||
/* LOADING BAR */
|
||
#loadingContainer {
|
||
margin-bottom: 15px;
|
||
width: 315px;
|
||
height: 8px;
|
||
}
|
||
|
||
/* INPUT AREA */
|
||
#query-input {
|
||
border: 1px solid #ccc;
|
||
border-radius: 4px;
|
||
}
|
||
|
||
.input-container {
|
||
display: flex;
|
||
flex-direction: row;
|
||
align-items: center;
|
||
}
|
||
|
||
.input-container input {
|
||
width: 100%;
|
||
outline: none;
|
||
padding: 0.5rem;
|
||
margin-right: 0.5rem;
|
||
}
|
||
|
||
/* BUTTON */
|
||
.btn {
|
||
background-color: #1b263b;
|
||
color: white;
|
||
font-size: small;
|
||
cursor: pointer;
|
||
border-radius: 4px;
|
||
border: none;
|
||
padding: 0.5rem;
|
||
}
|
||
|
||
.btn:hover {
|
||
background-color: #d0d0d0;
|
||
}
|
||
|
||
.btn:disabled {
|
||
background-color: #a7a7a7;
|
||
color: rgb(255, 255, 255);
|
||
cursor: default;
|
||
}
|
||
|
||
.btn img {
|
||
width: 1rem;
|
||
height: 1rem;
|
||
}
|
||
|
||
/* LOADING */
|
||
|
||
.stage {
|
||
display: flex;
|
||
justify-content: center;
|
||
align-items: center;
|
||
position: relative;
|
||
margin: 0 -5%;
|
||
overflow: hidden;
|
||
}
|
||
|
||
#loading-indicator {
|
||
display: none;
|
||
color: white;
|
||
margin-top: 0.5rem;
|
||
}
|
||
|
||
.dot-flashing {
|
||
position: relative;
|
||
width: 10px;
|
||
height: 10px;
|
||
border-radius: 5px;
|
||
background-color: #1b263b;
|
||
color: #1b263b;
|
||
animation: dot-flashing 0.4s infinite linear alternate;
|
||
animation-delay: 0.2s;
|
||
}
|
||
|
||
.dot-flashing::before,
|
||
.dot-flashing::after {
|
||
content: "";
|
||
display: inline-block;
|
||
position: absolute;
|
||
top: 0;
|
||
}
|
||
|
||
.dot-flashing::before {
|
||
left: -15px;
|
||
width: 10px;
|
||
height: 10px;
|
||
border-radius: 5px;
|
||
background-color: #1b263b;
|
||
color: #1b263b;
|
||
animation: dot-flashing 0.4s infinite alternate;
|
||
animation-delay: 0s;
|
||
}
|
||
|
||
.dot-flashing::after {
|
||
left: 15px;
|
||
width: 10px;
|
||
height: 10px;
|
||
border-radius: 5px;
|
||
background-color: #1b263b;
|
||
color: #1b263b;
|
||
animation: dot-flashing 0.4s infinite alternate;
|
||
animation-delay: 0.4s;
|
||
}
|
||
|
||
@keyframes dot-flashing {
|
||
0% {
|
||
background-color: #1b263b;
|
||
}
|
||
|
||
50%,
|
||
100% {
|
||
background-color: #415a77;
|
||
}
|
||
}
|
||
|
||
/* ANSWERS */
|
||
#queriesAnswersContainer {
|
||
display: block;
|
||
color: white;
|
||
margin-top: 0.5rem;
|
||
}
|
||
|
||
#answer {
|
||
color: #333333;
|
||
}
|
||
|
||
#answerWrapper {
|
||
display: none;
|
||
background-color: #ffd166;
|
||
border-radius: 8px;
|
||
padding: 0.5rem;
|
||
margin-top: 0.5rem;
|
||
}
|
||
|
||
.queriesAnswers {
|
||
border-radius: 8px;
|
||
background-color: #ffd166;
|
||
padding: 0.5rem;
|
||
color: #333333;
|
||
}
|
||
|
||
#lastQuery {
|
||
color: rgb(188, 188, 188);
|
||
}
|
||
|
||
#lastAnswer {
|
||
color: white;
|
||
margin-top: 0.5rem;
|
||
}
|
||
|
||
#lastRequest {
|
||
padding: 0.5rem;
|
||
margin-top: 0.5rem;
|
||
background-color: #333333;
|
||
border-radius: 4px;
|
||
}
|
||
|
||
/* ANSWER OPTIONS */
|
||
.timeStamp {
|
||
color: #9a8c98;
|
||
}
|
||
|
||
.copyRow {
|
||
display: flex;
|
||
flex-direction: row;
|
||
align-items: end;
|
||
justify-content: space-between;
|
||
color: #a7a7a7;
|
||
margin-top: 0.5rem;
|
||
}
|
||
|
||
.copyText {
|
||
display: none;
|
||
color: #a7a7a7;
|
||
margin-right: 0.5rem;
|
||
}
|
||
|
||
.copyButton {
|
||
color: #415a77;
|
||
background-color: transparent;
|
||
border: none;
|
||
cursor: pointer;
|
||
padding: 0;
|
||
margin-left: 0.5rem;
|
||
}
|
||
|
||
.copyButton:hover {
|
||
color: #5e80a7;
|
||
background-color: transparent;
|
||
}
|
||
|
||
.removeButton {
|
||
color: #415a77;
|
||
background-color: transparent;
|
||
border: none;
|
||
cursor: pointer;
|
||
padding: 0;
|
||
}
|
||
|
||
.removeButton:hover {
|
||
color: #5e80a7;
|
||
background-color: transparent;
|
||
}
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/chrome-extension/src/popup.html
|
||
================================================
|
||
<!doctype html>
|
||
<html lang="en">
|
||
<head>
|
||
<meta charset="UTF-8" />
|
||
<title>Chatbot</title>
|
||
<link rel="stylesheet" href="popup.css" />
|
||
<link
|
||
rel="stylesheet"
|
||
href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css"
|
||
/>
|
||
</head>
|
||
<body>
|
||
<select id="model-selection"></select>
|
||
<div id="loadingBox">
|
||
<p id="init-label">Initializing model...</p>
|
||
<div id="loadingContainer"></div>
|
||
</div>
|
||
<p id="model-name"></p>
|
||
<div class="input-container form-group">
|
||
<input
|
||
type="search"
|
||
id="query-input"
|
||
placeholder="What's on your mind?"
|
||
/>
|
||
<button id="submit-button" class="btn">
|
||
<i class="fa fa-comments"></i>
|
||
</button>
|
||
</div>
|
||
|
||
<div class="stage">
|
||
<div id="loading-indicator" class="dot-flashing"></div>
|
||
</div>
|
||
|
||
<div id="answerWrapper">
|
||
<div id="answer"></div>
|
||
<div class="copyRow">
|
||
<span id="timestamp"></span>
|
||
<button
|
||
id="copyAnswer"
|
||
class="btn copyButton"
|
||
title="Copy the Answer to the Clipboard"
|
||
>
|
||
<i class="fa-solid fa-copy fa-lg"></i>
|
||
</button>
|
||
</div>
|
||
</div>
|
||
|
||
<script type="module" src="./popup.ts"></script>
|
||
</body>
|
||
</html>
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/chrome-extension/src/popup.ts
|
||
================================================
|
||
"use strict";
|
||
|
||
// This code is partially adapted from the openai-chatgpt-chrome-extension repo:
|
||
// https://github.com/jessedi0n/openai-chatgpt-chrome-extension
|
||
|
||
import "./popup.css";
|
||
|
||
import {
|
||
MLCEngineInterface,
|
||
InitProgressReport,
|
||
CreateMLCEngine,
|
||
ChatCompletionMessageParam,
|
||
prebuiltAppConfig,
|
||
} from "@mlc-ai/web-llm";
|
||
import { ProgressBar, Line } from "progressbar.js";
|
||
|
||
// modified setLabel to not throw error
|
||
function setLabel(id: string, text: string) {
|
||
const label = document.getElementById(id);
|
||
if (label != null) {
|
||
label.innerText = text;
|
||
}
|
||
}
|
||
|
||
function getElementAndCheck(id: string): HTMLElement {
|
||
const element = document.getElementById(id);
|
||
if (element == null) {
|
||
throw Error("Cannot find element " + id);
|
||
}
|
||
return element;
|
||
}
|
||
|
||
const sleep = (ms: number) => new Promise((r) => setTimeout(r, ms));
|
||
|
||
const queryInput = getElementAndCheck("query-input")!;
|
||
const submitButton = getElementAndCheck("submit-button")!;
|
||
const modelName = getElementAndCheck("model-name");
|
||
|
||
let context = "";
|
||
let modelDisplayName = "";
|
||
|
||
// throws runtime.lastError if you refresh extension AND try to access a webpage that is already open
|
||
fetchPageContents();
|
||
|
||
(<HTMLButtonElement>submitButton).disabled = true;
|
||
|
||
let progressBar: ProgressBar = new Line("#loadingContainer", {
|
||
strokeWidth: 4,
|
||
easing: "easeInOut",
|
||
duration: 1400,
|
||
color: "#ffd166",
|
||
trailColor: "#eee",
|
||
trailWidth: 1,
|
||
svgStyle: { width: "100%", height: "100%" },
|
||
});
|
||
|
||
let isLoadingParams = true;
|
||
|
||
let initProgressCallback = (report: InitProgressReport) => {
|
||
setLabel("init-label", report.text);
|
||
progressBar.animate(report.progress, {
|
||
duration: 50,
|
||
});
|
||
if (report.progress == 1.0) {
|
||
enableInputs();
|
||
}
|
||
};
|
||
|
||
// initially selected model
|
||
let selectedModel = "Qwen2-0.5B-Instruct-q4f16_1-MLC";
|
||
|
||
// populate model-selection
|
||
const modelSelector = getElementAndCheck(
|
||
"model-selection",
|
||
) as HTMLSelectElement;
|
||
for (let i = 0; i < prebuiltAppConfig.model_list.length; ++i) {
|
||
const model = prebuiltAppConfig.model_list[i];
|
||
const opt = document.createElement("option");
|
||
opt.value = model.model_id;
|
||
opt.innerHTML = model.model_id;
|
||
opt.selected = false;
|
||
|
||
// set initial selection as the initially selected model
|
||
if (model.model_id == selectedModel) {
|
||
opt.selected = true;
|
||
}
|
||
|
||
modelSelector.appendChild(opt);
|
||
}
|
||
|
||
modelName.innerText = "Loading initial model...";
|
||
const engine: MLCEngineInterface = await CreateMLCEngine(selectedModel, {
|
||
initProgressCallback: initProgressCallback,
|
||
});
|
||
modelName.innerText = "Now chatting with " + modelDisplayName;
|
||
|
||
let chatHistory: ChatCompletionMessageParam[] = [];
|
||
|
||
function enableInputs() {
|
||
if (isLoadingParams) {
|
||
sleep(500);
|
||
isLoadingParams = false;
|
||
}
|
||
|
||
// remove loading bar and loading bar descriptors, if exists
|
||
const initLabel = document.getElementById("init-label");
|
||
initLabel?.remove();
|
||
const loadingBarContainer = document.getElementById("loadingContainer")!;
|
||
loadingBarContainer?.remove();
|
||
queryInput.focus();
|
||
|
||
const modelNameArray = selectedModel.split("-");
|
||
modelDisplayName = modelNameArray[0];
|
||
let j = 1;
|
||
while (j < modelNameArray.length && modelNameArray[j][0] != "q") {
|
||
modelDisplayName = modelDisplayName + "-" + modelNameArray[j];
|
||
j++;
|
||
}
|
||
}
|
||
|
||
let requestInProgress = false;
|
||
|
||
// Disable submit button if input field is empty
|
||
queryInput.addEventListener("keyup", () => {
|
||
if (
|
||
(<HTMLInputElement>queryInput).value === "" ||
|
||
requestInProgress ||
|
||
isLoadingParams
|
||
) {
|
||
(<HTMLButtonElement>submitButton).disabled = true;
|
||
} else {
|
||
(<HTMLButtonElement>submitButton).disabled = false;
|
||
}
|
||
});
|
||
|
||
// If user presses enter, click submit button
|
||
queryInput.addEventListener("keyup", (event) => {
|
||
if (event.code === "Enter") {
|
||
event.preventDefault();
|
||
submitButton.click();
|
||
}
|
||
});
|
||
|
||
// Listen for clicks on submit button
|
||
async function handleClick() {
|
||
requestInProgress = true;
|
||
(<HTMLButtonElement>submitButton).disabled = true;
|
||
|
||
// Get the message from the input field
|
||
const message = (<HTMLInputElement>queryInput).value;
|
||
console.log("message", message);
|
||
// Clear the answer
|
||
document.getElementById("answer")!.innerHTML = "";
|
||
// Hide the answer
|
||
document.getElementById("answerWrapper")!.style.display = "none";
|
||
// Show the loading indicator
|
||
document.getElementById("loading-indicator")!.style.display = "block";
|
||
|
||
// Generate response
|
||
let inp = message;
|
||
if (context.length > 0) {
|
||
inp =
|
||
"Use only the following context when answering the question at the end. Don't use any other knowledge.\n" +
|
||
context +
|
||
"\n\nQuestion: " +
|
||
message +
|
||
"\n\nHelpful Answer: ";
|
||
}
|
||
console.log("Input:", inp);
|
||
chatHistory.push({ role: "user", content: inp });
|
||
|
||
let curMessage = "";
|
||
const completion = await engine.chat.completions.create({
|
||
stream: true,
|
||
messages: chatHistory,
|
||
});
|
||
for await (const chunk of completion) {
|
||
const curDelta = chunk.choices[0].delta.content;
|
||
if (curDelta) {
|
||
curMessage += curDelta;
|
||
}
|
||
updateAnswer(curMessage);
|
||
}
|
||
const response = await engine.getMessage();
|
||
chatHistory.push({ role: "assistant", content: await engine.getMessage() });
|
||
console.log("response", response);
|
||
|
||
requestInProgress = false;
|
||
(<HTMLButtonElement>submitButton).disabled = false;
|
||
}
|
||
submitButton.addEventListener("click", handleClick);
|
||
|
||
// listen for changes in modelSelector
|
||
async function handleSelectChange() {
|
||
if (isLoadingParams) {
|
||
return;
|
||
}
|
||
|
||
modelName.innerText = "";
|
||
|
||
const initLabel = document.createElement("p");
|
||
initLabel.id = "init-label";
|
||
initLabel.innerText = "Initializing model...";
|
||
const loadingContainer = document.createElement("div");
|
||
loadingContainer.id = "loadingContainer";
|
||
|
||
const loadingBox = getElementAndCheck("loadingBox");
|
||
loadingBox.appendChild(initLabel);
|
||
loadingBox.appendChild(loadingContainer);
|
||
|
||
isLoadingParams = true;
|
||
(<HTMLButtonElement>submitButton).disabled = true;
|
||
|
||
if (requestInProgress) {
|
||
engine.interruptGenerate();
|
||
}
|
||
engine.resetChat();
|
||
chatHistory = [];
|
||
await engine.unload();
|
||
|
||
selectedModel = modelSelector.value;
|
||
|
||
progressBar = new Line("#loadingContainer", {
|
||
strokeWidth: 4,
|
||
easing: "easeInOut",
|
||
duration: 1400,
|
||
color: "#ffd166",
|
||
trailColor: "#eee",
|
||
trailWidth: 1,
|
||
svgStyle: { width: "100%", height: "100%" },
|
||
});
|
||
|
||
initProgressCallback = (report: InitProgressReport) => {
|
||
setLabel("init-label", report.text);
|
||
progressBar.animate(report.progress, {
|
||
duration: 50,
|
||
});
|
||
if (report.progress == 1.0) {
|
||
enableInputs();
|
||
}
|
||
};
|
||
|
||
engine.setInitProgressCallback(initProgressCallback);
|
||
|
||
requestInProgress = true;
|
||
modelName.innerText = "Reloading with new model...";
|
||
await engine.reload(selectedModel);
|
||
requestInProgress = false;
|
||
modelName.innerText = "Now chatting with " + modelDisplayName;
|
||
}
|
||
modelSelector.addEventListener("change", handleSelectChange);
|
||
|
||
// Listen for messages from the background script
|
||
chrome.runtime.onMessage.addListener(({ answer, error }) => {
|
||
if (answer) {
|
||
updateAnswer(answer);
|
||
}
|
||
});
|
||
|
||
function updateAnswer(answer: string) {
|
||
// Show answer
|
||
document.getElementById("answerWrapper")!.style.display = "block";
|
||
const answerWithBreaks = answer.replace(/\n/g, "<br>");
|
||
document.getElementById("answer")!.innerHTML = answerWithBreaks;
|
||
// Add event listener to copy button
|
||
document.getElementById("copyAnswer")!.addEventListener("click", () => {
|
||
// Get the answer text
|
||
const answerText = answer;
|
||
// Copy the answer text to the clipboard
|
||
navigator.clipboard
|
||
.writeText(answerText)
|
||
.then(() => console.log("Answer text copied to clipboard"))
|
||
.catch((err) => console.error("Could not copy text: ", err));
|
||
});
|
||
const options: Intl.DateTimeFormatOptions = {
|
||
month: "short",
|
||
day: "2-digit",
|
||
hour: "2-digit",
|
||
minute: "2-digit",
|
||
second: "2-digit",
|
||
};
|
||
const time = new Date().toLocaleString("en-US", options);
|
||
// Update timestamp
|
||
document.getElementById("timestamp")!.innerText = time;
|
||
// Hide loading indicator
|
||
document.getElementById("loading-indicator")!.style.display = "none";
|
||
}
|
||
|
||
function fetchPageContents() {
|
||
chrome.tabs.query({ currentWindow: true, active: true }, function (tabs) {
|
||
const port = chrome.tabs.connect(tabs[0].id, { name: "channelName" });
|
||
port.postMessage({});
|
||
port.onMessage.addListener(function (msg) {
|
||
console.log("Page contents:", msg.contents);
|
||
context = msg.contents;
|
||
});
|
||
});
|
||
}
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/chrome-extension-webgpu-service-worker/README.md
|
||
================================================
|
||
# WebLLM Chrome Extension using WebGPU Running on Service Worker
|
||
|
||

|
||
|
||
> [!WARNING]
|
||
> Service worker support in WebGPU is enabled by default in [Chrome 124](https://chromiumdash.appspot.com/commit/8d78510e4aca5ac3cd8ee4a33e96b404eaa43246).
|
||
> If you are using Chrome 123, go to `chrome://flags/#enable-experimental-web-platform-features`, enable the `#enable-experimental-web-platform-features` flag, and **relaunch the browser**.
|
||
|
||
This example shows how we can create a Chrome extension using WebGPU and service worker.
|
||
|
||
- The project structure is as follows:
|
||
- `manifest.json`: A required file that lists important information about the structure and behavior of that extension. Here we are using manifest V3.
|
||
- `popup.ts`: Script of the extension pop-up window.
|
||
- `background.ts`: Script of the service worker. An extension service worker is loaded when it is needed, and unloaded when it goes dormant.
|
||
- `content.js`: Content script that interacts with DOM.
|
||
- Run
|
||
|
||
```bash
|
||
npm install
|
||
npm run build
|
||
```
|
||
|
||
This will create a new directory at `./dist/`. To load the extension into Chrome, go to Extensions > Manage Extensions and select Load Unpacked. Add the `./dist/` directory. You can now pin the extension to your toolbar and use it to chat with your favorite model!
|
||
|
||
**Note**: This example disables chatting using the contents of the active tab by default.
|
||
To enable it, set `useContext` in `popup.ts` to `true`. More info about this feature can be found
|
||
[here](https://github.com/mlc-ai/web-llm/pull/190).
|
||
However, if the web content is too large, it might run into issues. We recommend using `example.html` to
|
||
test this feature.
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/chrome-extension-webgpu-service-worker/package.json
|
||
================================================
|
||
{
|
||
"name": "chrome-extension",
|
||
"version": "1.0.0",
|
||
"description": "",
|
||
"private": true,
|
||
"scripts": {
|
||
"build": "parcel build src/manifest.json --config @parcel/config-webextension"
|
||
},
|
||
"author": "",
|
||
"license": "ISC",
|
||
"devDependencies": {
|
||
"@parcel/config-webextension": "^2.9.3",
|
||
"@types/chrome": "^0.0.242",
|
||
"buffer": "^6.0.3",
|
||
"parcel": "^2.9.3",
|
||
"process": "^0.11.10",
|
||
"url": "^0.11.1"
|
||
},
|
||
"dependencies": {
|
||
"@mlc-ai/web-llm": "^0.2.80",
|
||
"progressbar.js": "^1.1.0"
|
||
}
|
||
}
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/chrome-extension-webgpu-service-worker/src/background.ts
|
||
================================================
|
||
import { ExtensionServiceWorkerMLCEngineHandler } from "@mlc-ai/web-llm";
|
||
|
||
// Hookup an engine to a service worker handler
|
||
let handler;
|
||
|
||
chrome.runtime.onConnect.addListener(function (port) {
|
||
console.assert(port.name === "web_llm_service_worker");
|
||
if (handler === undefined) {
|
||
handler = new ExtensionServiceWorkerMLCEngineHandler(port);
|
||
} else {
|
||
handler.setPort(port);
|
||
}
|
||
port.onMessage.addListener(handler.onmessage.bind(handler));
|
||
});
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/chrome-extension-webgpu-service-worker/src/content.js
|
||
================================================
|
||
// Only the content script is able to access the DOM
|
||
chrome.runtime.onConnect.addListener(function (port) {
|
||
port.onMessage.addListener(function (msg) {
|
||
port.postMessage({ contents: document.body.innerHTML });
|
||
});
|
||
});
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/chrome-extension-webgpu-service-worker/src/example.html
|
||
================================================
|
||
In the year 2154, humanity had colonized several planets in the distant reaches
|
||
of the galaxy. The planet of Xylophia-IV was one of the most remote and
|
||
inhospitable, with temperatures often dropping to -200 degrees Celsius. Despite
|
||
these harsh conditions, a team of scientists had established a research station
|
||
on the planet to study the unique geological formations and exotic flora and
|
||
fauna. One day, while conducting a routine survey of the planet's surface, the
|
||
team discovered an strange object buried deep in the ice. As they examined it
|
||
closer, they realized it was a small, metallic capsule with a glowing blue
|
||
symbol etched onto its surface. The team's leader, a brilliant scientist named
|
||
Dr. Maria Rodriguez, was immediately intrigued by the capsule's mysterious
|
||
origins. She ordered her team to bring it back to the research station for
|
||
further analysis. After weeks of studying the capsule, the team finally cracked
|
||
the code to the symbol etched onto its surface. It was a message from an alien
|
||
race, warning Earth of an impending attack from an unknown threat. The team was
|
||
shocked and dismayed by the news, but they knew they had to act quickly to warn
|
||
the rest of humanity. They transmitted the message to the nearest space station,
|
||
which relayed it to Earth's government. As the threat of attack loomed near, the
|
||
team remained on high alert, ready to face whatever dangers lay ahead. They had
|
||
uncovered a secrets of the universe, and now they were determined to protect
|
||
their planet and its inhabitants at all costs.
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/chrome-extension-webgpu-service-worker/src/manifest.json
|
||
================================================
|
||
{
|
||
"manifest_version": 3,
|
||
"name": "MLCBot",
|
||
"version": "0.1.0",
|
||
"description": "Chat with your browser",
|
||
"icons": {
|
||
"16": "icons/icon-16.png",
|
||
"32": "icons/icon-32.png",
|
||
"64": "icons/icon-64.png",
|
||
"128": "icons/icon-128.png"
|
||
},
|
||
"content_security_policy": {
|
||
"extension_pages": "style-src-elem 'self' https://cdnjs.cloudflare.com; font-src 'self' https://cdnjs.cloudflare.com; script-src 'self' 'wasm-unsafe-eval'; default-src 'self' data:; connect-src 'self' data: http://localhost:8000 https://huggingface.co https://cdn-lfs.huggingface.co https://cdn-lfs-us-1.huggingface.co https://raw.githubusercontent.com https://cdn-lfs-us-1.hf.co"
|
||
},
|
||
"action": {
|
||
"default_title": "MLCBot",
|
||
"default_popup": "popup.html"
|
||
},
|
||
"content_scripts": [
|
||
{
|
||
"matches": ["<all_urls>"],
|
||
"js": ["content.js"]
|
||
}
|
||
],
|
||
"background": {
|
||
"service_worker": "background.ts",
|
||
"type": "module"
|
||
},
|
||
"permissions": ["storage", "tabs", "webNavigation"]
|
||
}
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/chrome-extension-webgpu-service-worker/src/popup.css
|
||
================================================
|
||
*,
|
||
*::before,
|
||
*::after {
|
||
margin: 0;
|
||
padding: 0;
|
||
box-sizing: border-box;
|
||
}
|
||
|
||
html {
|
||
font-family:
|
||
-apple-system,
|
||
BlinkMacSystemFont,
|
||
Segoe UI,
|
||
Helvetica,
|
||
Arial,
|
||
sans-serif;
|
||
color: #222;
|
||
}
|
||
|
||
body {
|
||
margin: 0;
|
||
padding: 0.5rem;
|
||
background-color: #778da9;
|
||
width: 320px;
|
||
font-size: small;
|
||
}
|
||
|
||
p {
|
||
margin: 0;
|
||
}
|
||
|
||
/* LOADING BAR */
|
||
#loadingContainer {
|
||
margin-bottom: 15px;
|
||
width: 300px;
|
||
height: 8px;
|
||
}
|
||
|
||
/* INPUT AREA */
|
||
#query-input {
|
||
border: 1px solid #ccc;
|
||
border-radius: 4px;
|
||
}
|
||
|
||
.input-container {
|
||
display: flex;
|
||
flex-direction: row;
|
||
align-items: center;
|
||
}
|
||
|
||
.input-container input {
|
||
width: 100%;
|
||
outline: none;
|
||
padding: 0.5rem;
|
||
margin-right: 0.5rem;
|
||
}
|
||
|
||
/* SUBMIT BUTTON */
|
||
.btn {
|
||
background-color: #1b263b;
|
||
color: white;
|
||
font-size: small;
|
||
cursor: pointer;
|
||
border-radius: 4px;
|
||
border: none;
|
||
padding: 0.5rem;
|
||
}
|
||
|
||
.btn:hover {
|
||
background-color: #d0d0d0;
|
||
}
|
||
|
||
.btn:disabled {
|
||
background-color: #a7a7a7;
|
||
color: rgb(255, 255, 255);
|
||
cursor: default;
|
||
}
|
||
|
||
.btn img {
|
||
width: 1rem;
|
||
height: 1rem;
|
||
}
|
||
|
||
/* LOADING */
|
||
|
||
.stage {
|
||
display: flex;
|
||
justify-content: center;
|
||
align-items: center;
|
||
position: relative;
|
||
margin: 0 -5%;
|
||
overflow: hidden;
|
||
}
|
||
|
||
#loading-indicator {
|
||
display: none;
|
||
color: white;
|
||
margin-top: 0.5rem;
|
||
}
|
||
|
||
.dot-flashing {
|
||
position: relative;
|
||
width: 10px;
|
||
height: 10px;
|
||
border-radius: 5px;
|
||
background-color: #1b263b;
|
||
color: #1b263b;
|
||
animation: dot-flashing 0.4s infinite linear alternate;
|
||
animation-delay: 0.2s;
|
||
}
|
||
|
||
.dot-flashing::before,
|
||
.dot-flashing::after {
|
||
content: "";
|
||
display: inline-block;
|
||
position: absolute;
|
||
top: 0;
|
||
}
|
||
|
||
.dot-flashing::before {
|
||
left: -15px;
|
||
width: 10px;
|
||
height: 10px;
|
||
border-radius: 5px;
|
||
background-color: #1b263b;
|
||
color: #1b263b;
|
||
animation: dot-flashing 0.4s infinite alternate;
|
||
animation-delay: 0s;
|
||
}
|
||
|
||
.dot-flashing::after {
|
||
left: 15px;
|
||
width: 10px;
|
||
height: 10px;
|
||
border-radius: 5px;
|
||
background-color: #1b263b;
|
||
color: #1b263b;
|
||
animation: dot-flashing 0.4s infinite alternate;
|
||
animation-delay: 0.4s;
|
||
}
|
||
|
||
@keyframes dot-flashing {
|
||
0% {
|
||
background-color: #1b263b;
|
||
}
|
||
|
||
50%,
|
||
100% {
|
||
background-color: #415a77;
|
||
}
|
||
}
|
||
|
||
/* ANSWERS */
|
||
#queriesAnswersContainer {
|
||
display: block;
|
||
color: white;
|
||
margin-top: 0.5rem;
|
||
}
|
||
|
||
#answer {
|
||
color: #333333;
|
||
}
|
||
|
||
#answerWrapper {
|
||
display: none;
|
||
background-color: #ffd166;
|
||
border-radius: 8px;
|
||
padding: 0.5rem;
|
||
margin-top: 0.5rem;
|
||
}
|
||
|
||
.queriesAnswers {
|
||
border-radius: 8px;
|
||
background-color: #ffd166;
|
||
padding: 0.5rem;
|
||
color: #333333;
|
||
}
|
||
|
||
#lastQuery {
|
||
color: rgb(188, 188, 188);
|
||
}
|
||
|
||
#lastAnswer {
|
||
color: white;
|
||
margin-top: 0.5rem;
|
||
}
|
||
|
||
#lastRequest {
|
||
padding: 0.5rem;
|
||
margin-top: 0.5rem;
|
||
background-color: #333333;
|
||
border-radius: 4px;
|
||
}
|
||
|
||
/* ANSWER OPTIONS */
|
||
.timeStamp {
|
||
color: #9a8c98;
|
||
}
|
||
|
||
.copyRow {
|
||
display: flex;
|
||
flex-direction: row;
|
||
align-items: end;
|
||
justify-content: space-between;
|
||
color: #a7a7a7;
|
||
margin-top: 0.5rem;
|
||
}
|
||
|
||
.copyText {
|
||
display: none;
|
||
color: #a7a7a7;
|
||
margin-right: 0.5rem;
|
||
}
|
||
|
||
.copyButton {
|
||
color: #415a77;
|
||
background-color: transparent;
|
||
border: none;
|
||
cursor: pointer;
|
||
padding: 0;
|
||
margin-left: 0.5rem;
|
||
}
|
||
|
||
.copyButton:hover {
|
||
color: #5e80a7;
|
||
background-color: transparent;
|
||
}
|
||
|
||
.removeButton {
|
||
color: #415a77;
|
||
background-color: transparent;
|
||
border: none;
|
||
cursor: pointer;
|
||
padding: 0;
|
||
}
|
||
|
||
.removeButton:hover {
|
||
color: #5e80a7;
|
||
background-color: transparent;
|
||
}
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/chrome-extension-webgpu-service-worker/src/popup.html
|
||
================================================
|
||
<!doctype html>
|
||
<html lang="en">
|
||
<head>
|
||
<meta charset="UTF-8" />
|
||
<title>Chatbot</title>
|
||
<link rel="stylesheet" href="popup.css" />
|
||
<link
|
||
rel="stylesheet"
|
||
href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css"
|
||
/>
|
||
</head>
|
||
<body>
|
||
<div id="loadingContainer"></div>
|
||
|
||
<div class="input-container form-group">
|
||
<input
|
||
type="search"
|
||
id="query-input"
|
||
placeholder="What's on your mind?"
|
||
/>
|
||
<button id="submit-button" class="btn">
|
||
<i class="fa fa-comments"></i>
|
||
</button>
|
||
</div>
|
||
|
||
<div class="stage">
|
||
<div id="loading-indicator" class="dot-flashing"></div>
|
||
</div>
|
||
|
||
<div id="answerWrapper">
|
||
<div id="answer"></div>
|
||
<div class="copyRow">
|
||
<span id="timestamp"></span>
|
||
<button
|
||
id="copyAnswer"
|
||
class="btn copyButton"
|
||
title="Copy the Answer to the Clipboard"
|
||
>
|
||
<i class="fa-solid fa-copy fa-lg"></i>
|
||
</button>
|
||
</div>
|
||
</div>
|
||
|
||
<script type="module" src="./popup.ts"></script>
|
||
</body>
|
||
</html>
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/chrome-extension-webgpu-service-worker/src/popup.ts
|
||
================================================
|
||
"use strict";
|
||
|
||
// This code is partially adapted from the openai-chatgpt-chrome-extension repo:
|
||
// https://github.com/jessedi0n/openai-chatgpt-chrome-extension
|
||
|
||
import "./popup.css";
|
||
|
||
import {
|
||
ChatCompletionMessageParam,
|
||
CreateExtensionServiceWorkerMLCEngine,
|
||
MLCEngineInterface,
|
||
InitProgressReport,
|
||
} from "@mlc-ai/web-llm";
|
||
import { ProgressBar, Line } from "progressbar.js";
|
||
|
||
/***************** UI elements *****************/
|
||
// Whether or not to use the content from the active tab as the context
|
||
const useContext = false;
|
||
const sleep = (ms: number) => new Promise((r) => setTimeout(r, ms));
|
||
|
||
const queryInput = document.getElementById("query-input")!;
|
||
const submitButton = document.getElementById("submit-button")!;
|
||
|
||
let isLoadingParams = false;
|
||
|
||
(<HTMLButtonElement>submitButton).disabled = true;
|
||
|
||
const progressBar: ProgressBar = new Line("#loadingContainer", {
|
||
strokeWidth: 4,
|
||
easing: "easeInOut",
|
||
duration: 1400,
|
||
color: "#ffd166",
|
||
trailColor: "#eee",
|
||
trailWidth: 1,
|
||
svgStyle: { width: "100%", height: "100%" },
|
||
});
|
||
|
||
/***************** Web-LLM MLCEngine Configuration *****************/
|
||
const initProgressCallback = (report: InitProgressReport) => {
|
||
progressBar.animate(report.progress, {
|
||
duration: 50,
|
||
});
|
||
if (report.progress == 1.0) {
|
||
enableInputs();
|
||
}
|
||
};
|
||
|
||
const engine: MLCEngineInterface = await CreateExtensionServiceWorkerMLCEngine(
|
||
"Qwen2-0.5B-Instruct-q4f16_1-MLC",
|
||
{ initProgressCallback: initProgressCallback },
|
||
);
|
||
const chatHistory: ChatCompletionMessageParam[] = [];
|
||
|
||
isLoadingParams = true;
|
||
|
||
function enableInputs() {
|
||
if (isLoadingParams) {
|
||
sleep(500);
|
||
(<HTMLButtonElement>submitButton).disabled = false;
|
||
const loadingBarContainer = document.getElementById("loadingContainer")!;
|
||
loadingBarContainer.remove();
|
||
queryInput.focus();
|
||
isLoadingParams = false;
|
||
}
|
||
}
|
||
|
||
/***************** Event Listeners *****************/
|
||
|
||
// Disable submit button if input field is empty
|
||
queryInput.addEventListener("keyup", () => {
|
||
if ((<HTMLInputElement>queryInput).value === "") {
|
||
(<HTMLButtonElement>submitButton).disabled = true;
|
||
} else {
|
||
(<HTMLButtonElement>submitButton).disabled = false;
|
||
}
|
||
});
|
||
|
||
// If user presses enter, click submit button
|
||
queryInput.addEventListener("keyup", (event) => {
|
||
if (event.code === "Enter") {
|
||
event.preventDefault();
|
||
submitButton.click();
|
||
}
|
||
});
|
||
|
||
// Listen for clicks on submit button
|
||
async function handleClick() {
|
||
// Get the message from the input field
|
||
const message = (<HTMLInputElement>queryInput).value;
|
||
console.log("message", message);
|
||
chatHistory.push({ role: "user", content: message });
|
||
|
||
// Clear the answer
|
||
document.getElementById("answer")!.innerHTML = "";
|
||
// Hide the answer
|
||
document.getElementById("answerWrapper")!.style.display = "none";
|
||
// Show the loading indicator
|
||
document.getElementById("loading-indicator")!.style.display = "block";
|
||
|
||
// Send the chat completion message to the engine
|
||
let curMessage = "";
|
||
const completion = await engine.chat.completions.create({
|
||
stream: true,
|
||
messages: chatHistory,
|
||
});
|
||
|
||
// Update the answer as the model generates more text
|
||
for await (const chunk of completion) {
|
||
const curDelta = chunk.choices[0].delta.content;
|
||
if (curDelta) {
|
||
curMessage += curDelta;
|
||
}
|
||
updateAnswer(curMessage);
|
||
}
|
||
chatHistory.push({ role: "assistant", content: await engine.getMessage() });
|
||
}
|
||
|
||
submitButton.addEventListener("click", handleClick);
|
||
|
||
function updateAnswer(answer: string) {
|
||
// Show answer
|
||
document.getElementById("answerWrapper")!.style.display = "block";
|
||
const answerWithBreaks = answer.replace(/\n/g, "<br>");
|
||
document.getElementById("answer")!.innerHTML = answerWithBreaks;
|
||
// Add event listener to copy button
|
||
document.getElementById("copyAnswer")!.addEventListener("click", () => {
|
||
// Get the answer text
|
||
const answerText = answer;
|
||
// Copy the answer text to the clipboard
|
||
navigator.clipboard
|
||
.writeText(answerText)
|
||
.then(() => console.log("Answer text copied to clipboard"))
|
||
.catch((err) => console.error("Could not copy text: ", err));
|
||
});
|
||
const options: Intl.DateTimeFormatOptions = {
|
||
month: "short",
|
||
day: "2-digit",
|
||
hour: "2-digit",
|
||
minute: "2-digit",
|
||
second: "2-digit",
|
||
};
|
||
const time = new Date().toLocaleString("en-US", options);
|
||
// Update timestamp
|
||
document.getElementById("timestamp")!.innerText = time;
|
||
// Hide loading indicator
|
||
document.getElementById("loading-indicator")!.style.display = "none";
|
||
}
|
||
|
||
function fetchPageContents() {
|
||
chrome.tabs.query({ currentWindow: true, active: true }, function (tabs) {
|
||
if (tabs[0]?.id) {
|
||
const port = chrome.tabs.connect(tabs[0].id, { name: "channelName" });
|
||
port.postMessage({});
|
||
port.onMessage.addListener(function (msg) {
|
||
console.log("Page contents:", msg.contents);
|
||
chrome.runtime.sendMessage({ context: msg.contents });
|
||
});
|
||
}
|
||
});
|
||
}
|
||
|
||
// Grab the page contents when the popup is opened
|
||
window.onload = function () {
|
||
if (useContext) {
|
||
fetchPageContents();
|
||
}
|
||
};
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/embeddings/README.md
|
||
================================================
|
||
# WebLLM Get Started App
|
||
|
||
This folder provides a minimum demo to show WebLLM API in a webapp setting.
|
||
To try it out, you can do the following steps under this folder
|
||
|
||
```bash
|
||
npm install
|
||
npm start
|
||
```
|
||
|
||
Note if you would like to hack WebLLM core package.
|
||
You can change web-llm dependencies as `"file:../.."`, and follow the build from source
|
||
instruction in the project to build webllm locally. This option is only recommended
|
||
if you would like to hack WebLLM core package.
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/embeddings/package.json
|
||
================================================
|
||
{
|
||
"name": "embeddings-example",
|
||
"version": "0.1.0",
|
||
"private": true,
|
||
"scripts": {
|
||
"start": "parcel src/embeddings.html --port 8885",
|
||
"build": "parcel build src/embeddings.html --dist-dir lib"
|
||
},
|
||
"devDependencies": {
|
||
"buffer": "^5.7.1",
|
||
"parcel": "^2.8.3",
|
||
"process": "^0.11.10",
|
||
"tslib": "^2.3.1",
|
||
"typescript": "^4.9.5",
|
||
"url": "^0.11.3"
|
||
},
|
||
"dependencies": {
|
||
"@mlc-ai/web-llm": "^0.2.80",
|
||
"langchain": "0.2.15"
|
||
}
|
||
}
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/embeddings/src/embeddings.html
|
||
================================================
|
||
<!doctype html>
|
||
<html>
|
||
<script>
|
||
webLLMGlobal = {};
|
||
</script>
|
||
<body>
|
||
<h2>WebLLM Test Page</h2>
|
||
Open console to see output
|
||
<br />
|
||
<br />
|
||
<label id="init-label"> </label>
|
||
|
||
<h3>Prompt</h3>
|
||
<label id="prompt-label"> </label>
|
||
|
||
<h3>Response</h3>
|
||
<label id="generate-label"> </label>
|
||
<br />
|
||
<label id="stats-label"> </label>
|
||
|
||
<script type="module" src="./embeddings.ts"></script>
|
||
</body>
|
||
</html>
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/embeddings/src/embeddings.ts
|
||
================================================
|
||
import * as webllm from "@mlc-ai/web-llm";
|
||
import { MemoryVectorStore } from "langchain/vectorstores/memory";
|
||
import type { EmbeddingsInterface } from "@langchain/core/embeddings";
|
||
import type { Document } from "@langchain/core/documents";
|
||
import { formatDocumentsAsString } from "langchain/util/document";
|
||
import { PromptTemplate } from "@langchain/core/prompts";
|
||
import {
|
||
RunnableSequence,
|
||
RunnablePassthrough,
|
||
} from "@langchain/core/runnables";
|
||
|
||
function setLabel(id: string, text: string) {
|
||
const label = document.getElementById(id);
|
||
if (label == null) {
|
||
throw Error("Cannot find label " + id);
|
||
}
|
||
label.innerText = text;
|
||
}
|
||
|
||
const initProgressCallback = (report: webllm.InitProgressReport) => {
|
||
setLabel("init-label", report.text);
|
||
};
|
||
|
||
// For integration with Langchain
|
||
class WebLLMEmbeddings implements EmbeddingsInterface {
|
||
engine: webllm.MLCEngineInterface;
|
||
modelId: string;
|
||
constructor(engine: webllm.MLCEngineInterface, modelId: string) {
|
||
this.engine = engine;
|
||
this.modelId = modelId;
|
||
}
|
||
|
||
async _embed(texts: string[]): Promise<number[][]> {
|
||
const reply = await this.engine.embeddings.create({
|
||
input: texts,
|
||
model: this.modelId,
|
||
});
|
||
const result: number[][] = [];
|
||
for (let i = 0; i < texts.length; i++) {
|
||
result.push(reply.data[i].embedding);
|
||
}
|
||
return result;
|
||
}
|
||
|
||
async embedQuery(document: string): Promise<number[]> {
|
||
return this._embed([document]).then((embeddings) => embeddings[0]);
|
||
}
|
||
|
||
async embedDocuments(documents: string[]): Promise<number[][]> {
|
||
return this._embed(documents);
|
||
}
|
||
}
|
||
|
||
// Prepare inputs
|
||
const documents_og = ["The Data Cloud!", "Mexico City of Course!"];
|
||
const queries_og = ["what is snowflake?", "Where can I get the best tacos?"];
|
||
const documents: string[] = [];
|
||
const queries: string[] = [];
|
||
const query_prefix =
|
||
"Represent this sentence for searching relevant passages: ";
|
||
// Process according to Snowflake model
|
||
documents_og.forEach(function (item, index) {
|
||
documents[index] = `[CLS] ${item} [SEP]`;
|
||
});
|
||
queries_og.forEach(function (item, index) {
|
||
queries[index] = `[CLS] ${query_prefix}${item} [SEP]`;
|
||
});
|
||
console.log("Formatted documents: ", documents);
|
||
console.log("Formatted queries: ", queries);
|
||
|
||
// Using webllm's API
|
||
async function webllmAPI() {
|
||
// b4 means the max batch size is compiled as 4. That is, the model can process 4 inputs in a
|
||
// batch. If given more than 4, the model will forward multiple times. The larger the max batch
|
||
// size, the more memory it consumes.
|
||
// const selectedModel = "snowflake-arctic-embed-m-q0f32-MLC-b32";
|
||
const selectedModel = "snowflake-arctic-embed-m-q0f32-MLC-b4";
|
||
const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine(
|
||
selectedModel,
|
||
{
|
||
initProgressCallback: initProgressCallback,
|
||
logLevel: "INFO", // specify the log level
|
||
},
|
||
);
|
||
|
||
const docReply = await engine.embeddings.create({ input: documents });
|
||
console.log(docReply);
|
||
console.log(docReply.usage);
|
||
|
||
const queryReply = await engine.embeddings.create({ input: queries });
|
||
console.log(queryReply);
|
||
console.log(queryReply.usage);
|
||
|
||
// Calculate similarity (we use langchain here, but any method works)
|
||
const vectorStore = await MemoryVectorStore.fromExistingIndex(
|
||
new WebLLMEmbeddings(engine, selectedModel),
|
||
);
|
||
// See score
|
||
for (let i = 0; i < queries_og.length; i++) {
|
||
console.log(`Similarity with: ${queries_og[i]}`);
|
||
for (let j = 0; j < documents_og.length; j++) {
|
||
const similarity = vectorStore.similarity(
|
||
queryReply.data[i].embedding,
|
||
docReply.data[j].embedding,
|
||
);
|
||
console.log(`${documents_og[j]}: ${similarity}`);
|
||
}
|
||
}
|
||
}
|
||
|
||
// Alternatively, integrating with Langchain's API
|
||
async function langchainAPI() {
|
||
// b4 means the max batch size is compiled as 4. That is, the model can process 4 inputs in a
|
||
// batch. If given more than 4, the model will forward multiple times. The larger the max batch
|
||
// size, the more memory it consumes.
|
||
// const selectedModel = "snowflake-arctic-embed-m-q0f32-MLC-b32";
|
||
const selectedModel = "snowflake-arctic-embed-m-q0f32-MLC-b4";
|
||
const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine(
|
||
selectedModel,
|
||
{
|
||
initProgressCallback: initProgressCallback,
|
||
logLevel: "INFO", // specify the log level
|
||
},
|
||
);
|
||
|
||
const vectorStore = await MemoryVectorStore.fromExistingIndex(
|
||
new WebLLMEmbeddings(engine, selectedModel),
|
||
);
|
||
const document0: Document = {
|
||
pageContent: documents[0],
|
||
metadata: {},
|
||
};
|
||
const document1: Document = {
|
||
pageContent: documents[1],
|
||
metadata: {},
|
||
};
|
||
await vectorStore.addDocuments([document0, document1]);
|
||
|
||
const similaritySearchResults0 = await vectorStore.similaritySearch(
|
||
queries[0],
|
||
1,
|
||
);
|
||
for (const doc of similaritySearchResults0) {
|
||
console.log(`* ${doc.pageContent}`);
|
||
}
|
||
|
||
const similaritySearchResults1 = await vectorStore.similaritySearch(
|
||
queries[1],
|
||
1,
|
||
);
|
||
for (const doc of similaritySearchResults1) {
|
||
console.log(`* ${doc.pageContent}`);
|
||
}
|
||
}
|
||
|
||
// RAG with Langchain.js using WebLLM for both LLM and Embedding in a single engine
|
||
// Followed https://js.langchain.com/v0.1/docs/expression_language/cookbook/retrieval/
|
||
// There are many possible ways to achieve RAG (e.g. degree of integration with Langchain,
|
||
// using WebWorker, etc.). We provide a minimal example here.
|
||
async function simpleRAG() {
|
||
// 0. Load both embedding model and LLM to a single WebLLM Engine
|
||
const embeddingModelId = "snowflake-arctic-embed-m-q0f32-MLC-b4";
|
||
const llmModelId = "gemma-2-2b-it-q4f32_1-MLC-1k";
|
||
const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine(
|
||
[embeddingModelId, llmModelId],
|
||
{
|
||
initProgressCallback: initProgressCallback,
|
||
logLevel: "INFO", // specify the log level
|
||
},
|
||
);
|
||
|
||
const vectorStore = await MemoryVectorStore.fromTexts(
|
||
["mitochondria is the powerhouse of the cell"],
|
||
[{ id: 1 }],
|
||
new WebLLMEmbeddings(engine, embeddingModelId),
|
||
);
|
||
const retriever = vectorStore.asRetriever();
|
||
|
||
const prompt =
|
||
PromptTemplate.fromTemplate(`Answer the question based only on the following context:
|
||
{context}
|
||
|
||
Question: {question}`);
|
||
|
||
const chain = RunnableSequence.from([
|
||
{
|
||
context: retriever.pipe(formatDocumentsAsString),
|
||
question: new RunnablePassthrough(),
|
||
},
|
||
prompt,
|
||
]);
|
||
|
||
const formattedPrompt = (
|
||
await chain.invoke("What is the powerhouse of the cell?")
|
||
).toString();
|
||
const reply = await engine.chat.completions.create({
|
||
messages: [{ role: "user", content: formattedPrompt }],
|
||
model: llmModelId,
|
||
});
|
||
|
||
console.log(reply.choices[0].message.content);
|
||
|
||
/*
|
||
"The powerhouse of the cell is the mitochondria."
|
||
*/
|
||
}
|
||
|
||
// Select one to run
|
||
// webllmAPI();
|
||
// langchainAPI();
|
||
simpleRAG();
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/function-calling/README.md
|
||
================================================
|
||
### OpenAI API Demos - Function calling
|
||
|
||
This folder contains two main ways of using function calling with WebLLM.
|
||
|
||
`function-calling-manual` demonstrates how you can use function calling with Llama3.1 and Hermes2
|
||
without using the `tools`, `tool_choice`, and `tool_call` fields. This is the most flexible way and you can follow
|
||
the instruction given by the model releaser and iterate yourself on top of that. However, you need to do parsing on your own, which differs for each model. For instance, Hermes2 models use `<tool_call>` and `</tool_call>` to wrap around a tool call, which may be very different from other models' format.
|
||
|
||
`function-calling-openai` conforms to the OpenAI function calling usage, leveraging `tools`, `tool_choice`, and `tool_call`
|
||
fields. This is more usable, but sacrifices the flexibility since we have pre-defined system prompt
|
||
for this.
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/function-calling/function-calling-manual/README.md
|
||
================================================
|
||
### Demos - Function calling
|
||
|
||
Run `npm install` first, followed by `npm start`.
|
||
|
||
Note if you would like to hack WebLLM core package,
|
||
you can change web-llm dependencies as `"file:../../.."`, and follow the build from source
|
||
instruction in the project to build webllm locally. This option is only recommended
|
||
if you would like to hack WebLLM core package.
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/function-calling/function-calling-manual/package.json
|
||
================================================
|
||
{
|
||
"name": "openai-api",
|
||
"version": "0.1.0",
|
||
"private": true,
|
||
"scripts": {
|
||
"start": "parcel src/function_calling_manual.html --port 8888",
|
||
"build": "parcel build src/function_calling_manual.html --dist-dir lib"
|
||
},
|
||
"devDependencies": {
|
||
"buffer": "^5.7.1",
|
||
"parcel": "^2.8.3",
|
||
"process": "^0.11.10",
|
||
"tslib": "^2.3.1",
|
||
"typescript": "^4.9.5",
|
||
"url": "^0.11.3"
|
||
},
|
||
"dependencies": {
|
||
"@mlc-ai/web-llm": "^0.2.80"
|
||
}
|
||
}
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/function-calling/function-calling-manual/src/function_calling_manual.html
|
||
================================================
|
||
<!doctype html>
|
||
<html>
|
||
<script>
|
||
webLLMGlobal = {};
|
||
</script>
|
||
|
||
<body>
|
||
<h2>WebLLM Test Page</h2>
|
||
Open console to see output
|
||
<br />
|
||
<br />
|
||
<label id="init-label"> </label>
|
||
<label id="generate-label"> </label>
|
||
|
||
<script type="module" src="./function_calling_manual.ts"></script>
|
||
</body>
|
||
</html>
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/function-calling/function-calling-manual/src/function_calling_manual.ts
|
||
================================================
|
||
/* eslint-disable no-useless-escape */
|
||
import * as webllm from "@mlc-ai/web-llm";
|
||
|
||
// Common helper methods
|
||
function setLabel(id: string, text: string) {
|
||
const label = document.getElementById(id);
|
||
if (label == null) {
|
||
throw Error("Cannot find label " + id);
|
||
}
|
||
label.innerText = text;
|
||
}
|
||
|
||
const initProgressCallback = (report: webllm.InitProgressReport) => {
|
||
setLabel("init-label", report.text);
|
||
};
|
||
|
||
// Same example as https://huggingface.co/NousResearch/Hermes-2-Theta-Llama-3-8B#prompt-format-for-function-calling
|
||
async function hermes2_example() {
|
||
// 0. Setups
|
||
// Most manual function calling models specify the tools inside the system prompt
|
||
const system_prompt = `You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: <tools> {"type": "function", "function": {"name": "get_stock_fundamentals", "description": "get_stock_fundamentals(symbol: str) -> dict - Get fundamental data for a given stock symbol using yfinance API.\\n\\n Args:\\n symbol (str): The stock symbol.\\n\\n Returns:\\n dict: A dictionary containing fundamental data.\\n Keys:\\n - \'symbol\': The stock symbol.\\n - \'company_name\': The long name of the company.\\n - \'sector\': The sector to which the company belongs.\\n - \'industry\': The industry to which the company belongs.\\n - \'market_cap\': The market capitalization of the company.\\n - \'pe_ratio\': The forward price-to-earnings ratio.\\n - \'pb_ratio\': The price-to-book ratio.\\n - \'dividend_yield\': The dividend yield.\\n - \'eps\': The trailing earnings per share.\\n - \'beta\': The beta value of the stock.\\n - \'52_week_high\': The 52-week high price of the stock.\\n - \'52_week_low\': The 52-week low price of the stock.", "parameters": {"type": "object", "properties": {"symbol": {"type": "string"}}, "required": ["symbol"]}}} </tools> Use the following pydantic model json schema for each tool call you will make: {"properties": {"arguments": {"title": "Arguments", "type": "object"}, "name": {"title": "Name", "type": "string"}}, "required": ["arguments", "name"], "title": "FunctionCall", "type": "object"} For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows:\n<tool_call>\n{"arguments": <args-dict>, "name": <function-name>}\n</tool_call>`;
|
||
// Same formatting for Hermes-2-Pro-Llama-3, Hermes-2-Theta-Llama-3
|
||
// const selectedModel = "Hermes-2-Theta-Llama-3-8B-q4f16_1-MLC";
|
||
const selectedModel = "Hermes-2-Pro-Llama-3-8B-q4f16_1-MLC";
|
||
const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine(
|
||
selectedModel,
|
||
{ initProgressCallback: initProgressCallback, logLevel: "INFO" },
|
||
);
|
||
const seed = 0;
|
||
|
||
// 1. First request, expect to generate tool call
|
||
const messages: webllm.ChatCompletionMessageParam[] = [
|
||
{ role: "system", content: system_prompt },
|
||
{
|
||
role: "user",
|
||
content: "Fetch the stock fundamentals data for Tesla (TSLA)",
|
||
},
|
||
];
|
||
const request1: webllm.ChatCompletionRequest = {
|
||
stream: false, // works with either streaming or non-streaming; code below assumes non-streaming
|
||
messages: messages,
|
||
seed: seed,
|
||
};
|
||
const reply1 = await engine.chat.completions.create(request1);
|
||
const response1 = reply1.choices[0].message.content;
|
||
console.log(reply1.usage);
|
||
console.log("Response 1: " + response1);
|
||
messages.push({ role: "assistant", content: response1 });
|
||
// <tool_call>\n{"arguments": {"symbol": "TSLA"}, "name": "get_stock_fundamentals"}\n</tool_call>
|
||
|
||
// 2. Call function on your own to get tool response
|
||
const tool_response = `<tool_response>\n{"name": "get_stock_fundamentals", "content": {'symbol': 'TSLA', 'company_name': 'Tesla, Inc.', 'sector': 'Consumer Cyclical', 'industry': 'Auto Manufacturers', 'market_cap': 611384164352, 'pe_ratio': 49.604652, 'pb_ratio': 9.762013, 'dividend_yield': None, 'eps': 4.3, 'beta': 2.427, '52_week_high': 299.29, '52_week_low': 152.37}}\n</tool_response>`;
|
||
messages.push({ role: "tool", content: tool_response, tool_call_id: "0" });
|
||
|
||
// 3. Get natural language response
|
||
const request2: webllm.ChatCompletionRequest = {
|
||
stream: false, // works with either streaming or non-streaming; code below assumes non-streaming
|
||
messages: messages,
|
||
seed: seed,
|
||
};
|
||
const reply2 = await engine.chat.completions.create(request2);
|
||
const response2 = reply2.choices[0].message.content;
|
||
messages.push({ role: "assistant", content: response2 });
|
||
console.log(reply2.usage);
|
||
console.log("Response 2: " + response2);
|
||
|
||
// 4. Another function call
|
||
messages.push({
|
||
role: "user",
|
||
content: "Now do another one with NVIDIA, symbol being NVDA.",
|
||
});
|
||
const request3: webllm.ChatCompletionRequest = {
|
||
stream: false, // works with either streaming or non-streaming; code below assumes non-streaming
|
||
messages: messages,
|
||
seed: seed,
|
||
};
|
||
const reply3 = await engine.chat.completions.create(request3);
|
||
const response3 = reply3.choices[0].message.content;
|
||
messages.push({ role: "assistant", content: response3 });
|
||
console.log(reply3.usage);
|
||
console.log("Response 3: " + response3);
|
||
// <tool_call>\n{"arguments": {"symbol": "NVDA"}, "name": "get_stock_fundamentals"}\n</tool_call>
|
||
}
|
||
|
||
// Similar example to https://llama.meta.com/docs/model-cards-and-prompt-formats/llama3_1/#user-defined-custom-tool-calling
|
||
async function llama3_1_example() {
|
||
// Follows example, but tweaks the formatting with <function>
|
||
const system_prompt = `Cutting Knowledge Date: December 2023
|
||
Today Date: 23 Jul 2024
|
||
# Tool Instructions
|
||
- When looking for real time information use relevant functions if available
|
||
You have access to the following functions:
|
||
|
||
{
|
||
"type": "function",
|
||
"function": {
|
||
"name": "get_current_temperature",
|
||
"description": "Get the current temperature at a location.",
|
||
"parameters": {
|
||
"type": "object",
|
||
"properties": {
|
||
"location": {
|
||
"type": "string",
|
||
"description": "The location to get the temperature for, in the format \"City, Country\""
|
||
}
|
||
},
|
||
"required": [
|
||
"location"
|
||
]
|
||
},
|
||
"return": {
|
||
"type": "number",
|
||
"description": "The current temperature at the specified location in the specified units, as a float."
|
||
}
|
||
}
|
||
}
|
||
{
|
||
"type": "function",
|
||
"function": {
|
||
"name": "send_message",
|
||
"description": "Send a message to a recipient.",
|
||
"parameters": {
|
||
"type": "object",
|
||
"properties": {
|
||
"recipient": {
|
||
"type": "string",
|
||
"description": "Name of the recipient of the message"
|
||
}
|
||
"content": {
|
||
"type": "string",
|
||
"description": "Content of the message"
|
||
}
|
||
},
|
||
"required": [
|
||
"recipient",
|
||
"content"
|
||
]
|
||
},
|
||
"return": {
|
||
"type": "None"
|
||
}
|
||
}
|
||
}
|
||
If a you choose to call a function ONLY reply in the following format:
|
||
<function>{"name": function name, "parameters": dictionary of argument name and its value}</function>
|
||
Here is an example,
|
||
<function>{"name": "example_function_name", "parameters": {"example_name": "example_value"}}</function>
|
||
Reminder:
|
||
- Function calls MUST follow the specified format and use BOTH <function> and </function>
|
||
- Required parameters MUST be specified
|
||
- Only call one function at a time
|
||
- When calling a function, do NOT add any other words, ONLY the function calling
|
||
- Put the entire function call reply on one line
|
||
- Always add your sources when using search results to answer the user query
|
||
You are a helpful Assistant.`;
|
||
|
||
const selectedModel = "Llama-3.1-8B-Instruct-q4f16_1-MLC";
|
||
const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine(
|
||
selectedModel,
|
||
{ initProgressCallback: initProgressCallback, logLevel: "INFO" },
|
||
);
|
||
const seed = 0;
|
||
|
||
// 1. First request, expect to generate tool call to get temperature of Paris
|
||
const messages: webllm.ChatCompletionMessageParam[] = [
|
||
{ role: "system", content: system_prompt },
|
||
{
|
||
role: "user",
|
||
content: "Hey, what's the temperature in Paris right now?",
|
||
},
|
||
];
|
||
const request1: webllm.ChatCompletionRequest = {
|
||
stream: false, // works with either streaming or non-streaming; code below assumes non-streaming
|
||
messages: messages,
|
||
seed: seed,
|
||
};
|
||
const reply1 = await engine.chat.completions.create(request1);
|
||
const response1 = reply1.choices[0].message.content;
|
||
console.log(reply1.usage);
|
||
console.log("Response 1: " + response1);
|
||
messages.push({ role: "assistant", content: response1 });
|
||
// <function>{"name": "get_current_temperature", "parameters": {"location": "Paris, France"}}</function>
|
||
|
||
// 2. Call function on your own to get tool response
|
||
const tool_response = `{"output": 22.5}`;
|
||
messages.push({ role: "tool", content: tool_response, tool_call_id: "0" });
|
||
|
||
// 3. Get natural language response
|
||
const request2: webllm.ChatCompletionRequest = {
|
||
stream: false, // works with either streaming or non-streaming; code below assumes non-streaming
|
||
messages: messages,
|
||
seed: seed,
|
||
};
|
||
const reply2 = await engine.chat.completions.create(request2);
|
||
const response2 = reply2.choices[0].message.content;
|
||
messages.push({ role: "assistant", content: response2 });
|
||
console.log(reply2.usage);
|
||
console.log("Response 2: " + response2);
|
||
// The current temperature in Paris is 22.5°C.
|
||
|
||
// 4. Make another request, expect model to call `send_message`
|
||
messages.push({
|
||
role: "user",
|
||
content: "Send a message to Tom to tell him this information.",
|
||
});
|
||
const request3: webllm.ChatCompletionRequest = {
|
||
stream: false, // works with either streaming or non-streaming; code below assumes non-streaming
|
||
messages: messages,
|
||
seed: seed,
|
||
};
|
||
const reply3 = await engine.chat.completions.create(request3);
|
||
const response3 = reply3.choices[0].message.content;
|
||
messages.push({ role: "assistant", content: response3 });
|
||
console.log(reply3.usage);
|
||
console.log("Response 3: " + response3);
|
||
// <function>{"name": "send_message", "parameters": {"recipient": "Tom", "content": "The current temperature in Paris is 22.5°C."}}</function>
|
||
|
||
// 5. Call API, which has no return value, so simply prompt model again
|
||
const tool_response2 = `{"output": None}`;
|
||
messages.push({ role: "tool", content: tool_response2, tool_call_id: "1" });
|
||
const request4: webllm.ChatCompletionRequest = {
|
||
stream: false, // works with either streaming or non-streaming; code below assumes non-streaming
|
||
messages: messages,
|
||
seed: seed,
|
||
};
|
||
const reply4 = await engine.chat.completions.create(request4);
|
||
const response4 = reply4.choices[0].message.content;
|
||
console.log(reply4.usage);
|
||
console.log("Response 4: " + response4);
|
||
// The message has been sent to Tom.
|
||
}
|
||
|
||
// Pick one to run
|
||
// hermes2_example();
|
||
llama3_1_example();
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/function-calling/function-calling-openai/README.md
|
||
================================================
|
||
### Demos - Function calling
|
||
|
||
Run `npm install` first, followed by `npm start`.
|
||
|
||
Note if you would like to hack WebLLM core package,
|
||
you can change web-llm dependencies as `"file:../../.."`, and follow the build from source
|
||
instruction in the project to build webllm locally. This option is only recommended
|
||
if you would like to hack WebLLM core package.
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/function-calling/function-calling-openai/package.json
|
||
================================================
|
||
{
|
||
"name": "openai-api",
|
||
"version": "0.1.0",
|
||
"private": true,
|
||
"scripts": {
|
||
"start": "parcel src/function_calling_openai.html --port 8888",
|
||
"build": "parcel build src/function_calling_openai.html --dist-dir lib"
|
||
},
|
||
"devDependencies": {
|
||
"buffer": "^5.7.1",
|
||
"parcel": "^2.8.3",
|
||
"process": "^0.11.10",
|
||
"tslib": "^2.3.1",
|
||
"typescript": "^4.9.5",
|
||
"url": "^0.11.3"
|
||
},
|
||
"dependencies": {
|
||
"@mlc-ai/web-llm": "^0.2.80"
|
||
}
|
||
}
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/function-calling/function-calling-openai/src/function_calling_openai.html
|
||
================================================
|
||
<!doctype html>
|
||
<html>
|
||
<script>
|
||
webLLMGlobal = {};
|
||
</script>
|
||
|
||
<body>
|
||
<h2>WebLLM Test Page</h2>
|
||
Open console to see output
|
||
<br />
|
||
<br />
|
||
<label id="init-label"> </label>
|
||
<label id="generate-label"> </label>
|
||
|
||
<script type="module" src="./function_calling_openai.ts"></script>
|
||
</body>
|
||
</html>
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/function-calling/function-calling-openai/src/function_calling_openai.ts
|
||
================================================
|
||
import * as webllm from "@mlc-ai/web-llm";
|
||
|
||
function setLabel(id: string, text: string) {
|
||
const label = document.getElementById(id);
|
||
if (label == null) {
|
||
throw Error("Cannot find label " + id);
|
||
}
|
||
label.innerText = text;
|
||
}
|
||
|
||
async function main() {
|
||
const initProgressCallback = (report: webllm.InitProgressReport) => {
|
||
setLabel("init-label", report.text);
|
||
};
|
||
const selectedModel = "Hermes-2-Pro-Llama-3-8B-q4f16_1-MLC";
|
||
const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine(
|
||
selectedModel,
|
||
{ initProgressCallback: initProgressCallback },
|
||
);
|
||
|
||
const tools: Array<webllm.ChatCompletionTool> = [
|
||
{
|
||
type: "function",
|
||
function: {
|
||
name: "get_current_weather",
|
||
description: "Get the current weather in a given location",
|
||
parameters: {
|
||
type: "object",
|
||
properties: {
|
||
location: {
|
||
type: "string",
|
||
description: "The city and state, e.g. San Francisco, CA",
|
||
},
|
||
unit: { type: "string", enum: ["celsius", "fahrenheit"] },
|
||
},
|
||
required: ["location"],
|
||
},
|
||
},
|
||
},
|
||
];
|
||
|
||
const request: webllm.ChatCompletionRequest = {
|
||
stream: true, // works with stream as well, where the last chunk returns tool_calls
|
||
stream_options: { include_usage: true },
|
||
messages: [
|
||
{
|
||
role: "user",
|
||
content:
|
||
"What is the current weather in celsius in Pittsburgh and Tokyo?",
|
||
},
|
||
],
|
||
tool_choice: "auto",
|
||
tools: tools,
|
||
};
|
||
|
||
if (!request.stream) {
|
||
const reply0 = await engine.chat.completions.create(request);
|
||
console.log(reply0.choices[0]);
|
||
console.log(reply0.usage);
|
||
} else {
|
||
// If streaming, the last chunk returns tool calls
|
||
const asyncChunkGenerator = await engine.chat.completions.create(request);
|
||
let message = "";
|
||
let lastChunk: webllm.ChatCompletionChunk | undefined;
|
||
let usageChunk: webllm.ChatCompletionChunk | undefined;
|
||
for await (const chunk of asyncChunkGenerator) {
|
||
console.log(chunk);
|
||
message += chunk.choices[0]?.delta?.content || "";
|
||
setLabel("generate-label", message);
|
||
if (!chunk.usage) {
|
||
lastChunk = chunk;
|
||
}
|
||
usageChunk = chunk;
|
||
}
|
||
console.log(lastChunk!.choices[0].delta);
|
||
console.log(usageChunk!.usage);
|
||
}
|
||
}
|
||
|
||
main();
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/get-started/README.md
|
||
================================================
|
||
# WebLLM Get Started App
|
||
|
||
This folder provides a minimum demo to show WebLLM API in a webapp setting.
|
||
To try it out, you can do the following steps under this folder
|
||
|
||
```bash
|
||
npm install
|
||
npm start
|
||
```
|
||
|
||
Note if you would like to hack WebLLM core package.
|
||
You can change web-llm dependencies as `"file:../.."`, and follow the build from source
|
||
instruction in the project to build webllm locally. This option is only recommended
|
||
if you would like to hack WebLLM core package.
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/get-started/package.json
|
||
================================================
|
||
{
|
||
"name": "get-started",
|
||
"version": "0.1.0",
|
||
"private": true,
|
||
"scripts": {
|
||
"start": "parcel src/get_started.html --port 8888",
|
||
"build": "parcel build src/get_started.html --dist-dir lib"
|
||
},
|
||
"devDependencies": {
|
||
"buffer": "^5.7.1",
|
||
"parcel": "^2.8.3",
|
||
"process": "^0.11.10",
|
||
"tslib": "^2.3.1",
|
||
"typescript": "^4.9.5",
|
||
"url": "^0.11.3"
|
||
},
|
||
"dependencies": {
|
||
"@mlc-ai/web-llm": "^0.2.80"
|
||
}
|
||
}
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/get-started/src/get_started.html
|
||
================================================
|
||
<!doctype html>
|
||
<html>
|
||
<script>
|
||
webLLMGlobal = {};
|
||
</script>
|
||
<body>
|
||
<h2>WebLLM Test Page</h2>
|
||
Open console to see output
|
||
<br />
|
||
<br />
|
||
<label id="init-label"> </label>
|
||
|
||
<h3>Prompt</h3>
|
||
<label id="prompt-label"> </label>
|
||
|
||
<h3>Response</h3>
|
||
<label id="generate-label"> </label>
|
||
<br />
|
||
<label id="stats-label"> </label>
|
||
|
||
<script type="module" src="./get_started.ts"></script>
|
||
</body>
|
||
</html>
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/get-started/src/get_started.ts
|
||
================================================
|
||
import * as webllm from "@mlc-ai/web-llm";
|
||
|
||
function setLabel(id: string, text: string) {
|
||
const label = document.getElementById(id);
|
||
if (label == null) {
|
||
throw Error("Cannot find label " + id);
|
||
}
|
||
label.innerText = text;
|
||
}
|
||
|
||
async function main() {
|
||
const initProgressCallback = (report: webllm.InitProgressReport) => {
|
||
setLabel("init-label", report.text);
|
||
};
|
||
// Option 1: If we do not specify appConfig, we use `prebuiltAppConfig` defined in `config.ts`
|
||
const selectedModel = "Llama-3.1-8B-Instruct-q4f32_1-MLC";
|
||
const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine(
|
||
selectedModel,
|
||
{
|
||
initProgressCallback: initProgressCallback,
|
||
logLevel: "INFO", // specify the log level
|
||
},
|
||
// customize kv cache, use either context_window_size or sliding_window_size (with attention sink)
|
||
{
|
||
context_window_size: 2048,
|
||
// sliding_window_size: 1024,
|
||
// attention_sink_size: 4,
|
||
},
|
||
);
|
||
|
||
// Option 2: Specify your own model other than the prebuilt ones
|
||
// const appConfig: webllm.AppConfig = {
|
||
// model_list: [
|
||
// {
|
||
// model: "https://huggingface.co/mlc-ai/Llama-3.1-8B-Instruct-q4f32_1-MLC",
|
||
// model_id: "Llama-3.1-8B-Instruct-q4f32_1-MLC",
|
||
// model_lib:
|
||
// webllm.modelLibURLPrefix +
|
||
// webllm.modelVersion +
|
||
// "/Llama-3_1-8B-Instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm",
|
||
// overrides: {
|
||
// context_window_size: 2048,
|
||
// },
|
||
// },
|
||
// ],
|
||
// };
|
||
// const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine(
|
||
// selectedModel,
|
||
// { appConfig: appConfig, initProgressCallback: initProgressCallback },
|
||
// );
|
||
|
||
// Option 3: Instantiate MLCEngine() and call reload() separately
|
||
// const engine: webllm.MLCEngineInterface = new webllm.MLCEngine({
|
||
// appConfig: appConfig, // if do not specify, we use webllm.prebuiltAppConfig
|
||
// initProgressCallback: initProgressCallback,
|
||
// });
|
||
// await engine.reload(selectedModel);
|
||
|
||
const reply0 = await engine.chat.completions.create({
|
||
messages: [{ role: "user", content: "List three US states." }],
|
||
// below configurations are all optional
|
||
n: 3,
|
||
temperature: 1.5,
|
||
max_tokens: 256,
|
||
// 46510 and 7188 are "California", and 8421 and 51325 are "Texas" in Llama-3.1-8B-Instruct
|
||
// So we would have a higher chance of seeing the latter two, but never the first in the answer
|
||
logit_bias: {
|
||
"46510": -100,
|
||
"7188": -100,
|
||
"8421": 5,
|
||
"51325": 5,
|
||
},
|
||
logprobs: true,
|
||
top_logprobs: 2,
|
||
});
|
||
console.log(reply0);
|
||
console.log(reply0.usage);
|
||
|
||
// To change model, either create a new engine via `CreateMLCEngine()`, or call `engine.reload(modelId)`
|
||
}
|
||
|
||
main();
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/get-started-latency-breakdown/README.md
|
||
================================================
|
||
# WebLLM Get Started App
|
||
|
||
This folder provides a minimum demo to show WebLLM API in a webapp setting with
|
||
collection of latency statistics for individual token sampling steps.
|
||
To try it out, you can do the following steps under this folder
|
||
|
||
```bash
|
||
npm install
|
||
npm start
|
||
```
|
||
|
||
Note if you would like to hack WebLLM core package.
|
||
You can change web-llm dependencies as `"file:../.."`, and follow the build from source
|
||
instruction in the project to build webllm locally. This option is only recommended
|
||
if you would like to hack WebLLM core package.
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/get-started-latency-breakdown/package.json
|
||
================================================
|
||
{
|
||
"name": "get-started-latency-breakdown",
|
||
"version": "0.1.0",
|
||
"private": true,
|
||
"scripts": {
|
||
"start": "parcel src/get_started_latency_breakdown.html --port 8888",
|
||
"build": "parcel build src/get_started_latency_breakdown.html --dist-dir lib"
|
||
},
|
||
"devDependencies": {
|
||
"buffer": "^5.7.1",
|
||
"parcel": "^2.8.3",
|
||
"process": "^0.11.10",
|
||
"tslib": "^2.3.1",
|
||
"typescript": "^4.9.5",
|
||
"url": "^0.11.3"
|
||
},
|
||
"dependencies": {
|
||
"@mlc-ai/web-llm": "^0.2.80"
|
||
}
|
||
}
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/get-started-latency-breakdown/src/get_started_latency_breakdown.html
|
||
================================================
|
||
<!doctype html>
|
||
<html>
|
||
<script>
|
||
webLLMGlobal = {};
|
||
</script>
|
||
<body>
|
||
<h2>WebLLM Test Page</h2>
|
||
Open console to see output
|
||
<br />
|
||
<br />
|
||
<label id="init-label"> </label>
|
||
|
||
<h3>Prompt</h3>
|
||
<label id="prompt-label"> </label>
|
||
|
||
<h3>Response</h3>
|
||
<label id="generate-label"> </label>
|
||
<br />
|
||
<label id="stats-label"> </label>
|
||
|
||
<script type="module" src="./get_started_latency_breakdown.ts"></script>
|
||
</body>
|
||
</html>
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/get-started-latency-breakdown/src/get_started_latency_breakdown.ts
|
||
================================================
|
||
import * as webllm from "@mlc-ai/web-llm";
|
||
|
||
function setLabel(id: string, text: string) {
|
||
const label = document.getElementById(id);
|
||
if (label == null) {
|
||
throw Error("Cannot find label " + id);
|
||
}
|
||
label.innerText = text;
|
||
}
|
||
|
||
type LatencyBreakdown = {
|
||
logitProcessorTime: number[];
|
||
logitBiasTime: number[];
|
||
penaltyTime: number[];
|
||
sampleTime: number[];
|
||
totalTime: number[];
|
||
grammarBitmaskTime: number[];
|
||
};
|
||
function computeStats(
|
||
latency_breakdown: LatencyBreakdown,
|
||
): Record<string, any> {
|
||
function _computeStats(arr: number[]) {
|
||
if (!arr.length) return undefined;
|
||
const sorted = [...arr].sort((a, b) => a - b);
|
||
const sum = arr.reduce((a, b) => a + b, 0);
|
||
const avg = sum / arr.length;
|
||
const min = sorted[0];
|
||
const max = sorted[sorted.length - 1];
|
||
const p99 = sorted[Math.floor(0.99 * (sorted.length - 1))];
|
||
return { avg, min, max, p99 };
|
||
}
|
||
|
||
const latencyStats: Record<string, any> = {};
|
||
for (const key of Object.keys(latency_breakdown)) {
|
||
const arr = (latency_breakdown as any)[key];
|
||
if (Array.isArray(arr) && arr.length > 0) {
|
||
latencyStats[key] = _computeStats(arr);
|
||
}
|
||
}
|
||
return latencyStats;
|
||
}
|
||
|
||
async function main() {
|
||
const initProgressCallback = (report: webllm.InitProgressReport) => {
|
||
setLabel("init-label", report.text);
|
||
};
|
||
// Option 1: If we do not specify appConfig, we use `prebuiltAppConfig` defined in `config.ts`
|
||
const selectedModel = "Qwen3-0.6B-q0f32-MLC";
|
||
const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine(
|
||
selectedModel,
|
||
{
|
||
initProgressCallback: initProgressCallback,
|
||
logLevel: "INFO", // specify the log level
|
||
},
|
||
// customize kv cache, use either context_window_size or sliding_window_size (with attention sink)
|
||
{
|
||
context_window_size: 2048,
|
||
// sliding_window_size: 1024,
|
||
// attention_sink_size: 4,
|
||
},
|
||
);
|
||
|
||
const latencyBreakdown: LatencyBreakdown = {
|
||
logitProcessorTime: [],
|
||
logitBiasTime: [],
|
||
penaltyTime: [],
|
||
sampleTime: [],
|
||
totalTime: [],
|
||
grammarBitmaskTime: [],
|
||
};
|
||
|
||
const decodeTokensPerS: number[] = [];
|
||
const completionTokens: number[] = [];
|
||
const e2eLatencyS: number[] = [];
|
||
const timePerOutputTokenS: number[] = [];
|
||
|
||
const numTrials = 20;
|
||
for (let i = 0; i < numTrials; i++) {
|
||
console.log(`Trial ${i + 1} / ${numTrials}`);
|
||
const reply0 = await engine.chat.completions.create({
|
||
messages: [{ role: "user", content: "List twenty US states." }],
|
||
// below configurations are all optional
|
||
n: 1,
|
||
temperature: 0,
|
||
max_tokens: 2048,
|
||
// 46510 and 7188 are "California", and 8421 and 51325 are "Texas" in Llama-3.1-8B-Instruct
|
||
// So we would have a higher chance of seeing the latter two, but never the first in the answer
|
||
// logit_bias: {
|
||
// "46510": -100,
|
||
// "7188": -100,
|
||
// "8421": 5,
|
||
// "41325": 5,
|
||
// },
|
||
top_p: 0.8,
|
||
logprobs: true,
|
||
top_logprobs: 2,
|
||
frequency_penalty: 1.2,
|
||
presence_penalty: 1.0,
|
||
repetition_penalty: 1.1,
|
||
});
|
||
|
||
const logitProcessorTime =
|
||
reply0.usage?.extra.latencyBreakdown?.logitProcessorTime;
|
||
const logitBiasTime = reply0.usage?.extra.latencyBreakdown?.logitBiasTime;
|
||
const penaltyTime = reply0.usage?.extra.latencyBreakdown?.penaltyTime;
|
||
const sampleTime = reply0.usage?.extra.latencyBreakdown?.sampleTime;
|
||
const totalTime = reply0.usage?.extra.latencyBreakdown?.totalTime;
|
||
const grammarBitmaskTime =
|
||
reply0.usage?.extra.latencyBreakdown?.grammarBitmaskTime;
|
||
|
||
latencyBreakdown.logitProcessorTime.push(...(logitProcessorTime || []));
|
||
latencyBreakdown.logitBiasTime.push(...(logitBiasTime || []));
|
||
latencyBreakdown.penaltyTime.push(...(penaltyTime || []));
|
||
latencyBreakdown.sampleTime.push(...(sampleTime || []));
|
||
latencyBreakdown.totalTime.push(...(totalTime || []));
|
||
latencyBreakdown.grammarBitmaskTime.push(...(grammarBitmaskTime || []));
|
||
|
||
decodeTokensPerS.push(reply0.usage?.extra.decode_tokens_per_s || 0);
|
||
e2eLatencyS.push(reply0.usage?.extra.e2e_latency_s || 0);
|
||
timePerOutputTokenS.push(reply0.usage?.extra.time_per_output_token_s || 0);
|
||
completionTokens.push(reply0.usage?.completion_tokens || 0);
|
||
}
|
||
|
||
const latencyStats: { [key: string]: number } =
|
||
computeStats(latencyBreakdown);
|
||
console.log("Latency stats: ", latencyStats);
|
||
console.log("Decode tokens per second: ", decodeTokensPerS);
|
||
console.log("Completion tokens: ", completionTokens);
|
||
console.log("E2E latency (s): ", e2eLatencyS);
|
||
console.log("Time per output token (s): ", timePerOutputTokenS);
|
||
|
||
// To change model, either create a new engine via `CreateMLCEngine()`, or call `engine.reload(modelId)`
|
||
}
|
||
|
||
main();
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/get-started-web-worker/README.md
|
||
================================================
|
||
# WebLLM Get Started with WebWorker
|
||
|
||
This folder provides a minimum demo to show WebLLM API using
|
||
[WebWorker](https://developer.mozilla.org/en-US/docs/Web/API/Web_Workers_API/Using_web_workers).
|
||
The main benefit of web worker is that all ML workloads runs on a separate thread as a result
|
||
will less likely block the UI.
|
||
|
||
To try it out, you can do the following steps under this folder
|
||
|
||
```bash
|
||
npm install
|
||
npm start
|
||
```
|
||
|
||
Note if you would like to hack WebLLM core package.
|
||
You can change web-llm dependencies as `"file:../.."`, and follow the build from source
|
||
instruction in the project to build webllm locally. This option is only recommended
|
||
if you would like to hack WebLLM core package.
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/get-started-web-worker/package.json
|
||
================================================
|
||
{
|
||
"name": "get-started-web-worker",
|
||
"version": "0.1.0",
|
||
"private": true,
|
||
"scripts": {
|
||
"start": "parcel src/get_started.html --port 8885",
|
||
"build": "parcel build src/get_started.html --dist-dir lib"
|
||
},
|
||
"devDependencies": {
|
||
"buffer": "^6.0.3",
|
||
"parcel": "^2.8.3",
|
||
"process": "^0.11.10",
|
||
"tslib": "^2.3.1",
|
||
"typescript": "^4.9.5",
|
||
"url": "^0.11.3"
|
||
},
|
||
"dependencies": {
|
||
"@mlc-ai/web-llm": "^0.2.80"
|
||
}
|
||
}
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/get-started-web-worker/src/get_started.html
|
||
================================================
|
||
<!doctype html>
|
||
<html>
|
||
<script>
|
||
webLLMGlobal = {};
|
||
</script>
|
||
<body>
|
||
<h2>WebLLM Test Page</h2>
|
||
Open console to see output
|
||
<br />
|
||
<br />
|
||
<label id="init-label"> </label>
|
||
|
||
<h3>Prompt</h3>
|
||
<label id="prompt-label"> </label>
|
||
|
||
<h3>Response</h3>
|
||
<label id="generate-label"> </label>
|
||
<br />
|
||
<label id="stats-label"> </label>
|
||
|
||
<script type="module" src="./main.ts"></script>
|
||
</body>
|
||
</html>
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/get-started-web-worker/src/main.ts
|
||
================================================
|
||
import * as webllm from "@mlc-ai/web-llm";
|
||
|
||
function setLabel(id: string, text: string) {
|
||
const label = document.getElementById(id);
|
||
if (label == null) {
|
||
throw Error("Cannot find label " + id);
|
||
}
|
||
label.innerText = text;
|
||
}
|
||
|
||
// There are two demonstrations, pick one to run
|
||
|
||
/**
|
||
* Chat completion (OpenAI style) without streaming, where we get the entire response at once.
|
||
*/
|
||
async function mainNonStreaming() {
|
||
const initProgressCallback = (report: webllm.InitProgressReport) => {
|
||
setLabel("init-label", report.text);
|
||
};
|
||
const selectedModel = "Llama-3.1-8B-Instruct-q4f32_1-MLC";
|
||
|
||
const engine: webllm.MLCEngineInterface =
|
||
await webllm.CreateWebWorkerMLCEngine(
|
||
new Worker(new URL("./worker.ts", import.meta.url), { type: "module" }),
|
||
selectedModel,
|
||
{ initProgressCallback: initProgressCallback },
|
||
);
|
||
|
||
const request: webllm.ChatCompletionRequest = {
|
||
messages: [
|
||
{
|
||
role: "system",
|
||
content:
|
||
"You are a helpful, respectful and honest assistant. " +
|
||
"Be as happy as you can when speaking please. ",
|
||
},
|
||
{ role: "user", content: "Provide me three US states." },
|
||
{ role: "assistant", content: "California, New York, Pennsylvania." },
|
||
{ role: "user", content: "Two more please!" },
|
||
],
|
||
n: 3,
|
||
temperature: 1.5,
|
||
max_tokens: 256,
|
||
};
|
||
|
||
const reply0 = await engine.chat.completions.create(request);
|
||
console.log(reply0);
|
||
|
||
console.log(reply0.usage);
|
||
}
|
||
|
||
/**
|
||
* Chat completion (OpenAI style) with streaming, where delta is sent while generating response.
|
||
*/
|
||
async function mainStreaming() {
|
||
const initProgressCallback = (report: webllm.InitProgressReport) => {
|
||
setLabel("init-label", report.text);
|
||
};
|
||
const selectedModel = "Llama-3.1-8B-Instruct-q4f32_1-MLC";
|
||
|
||
const engine: webllm.MLCEngineInterface =
|
||
await webllm.CreateWebWorkerMLCEngine(
|
||
new Worker(new URL("./worker.ts", import.meta.url), { type: "module" }),
|
||
selectedModel,
|
||
{ initProgressCallback: initProgressCallback },
|
||
);
|
||
|
||
const request: webllm.ChatCompletionRequest = {
|
||
stream: true,
|
||
stream_options: { include_usage: true },
|
||
messages: [
|
||
{
|
||
role: "system",
|
||
content:
|
||
"You are a helpful, respectful and honest assistant. " +
|
||
"Be as happy as you can when speaking please. ",
|
||
},
|
||
{ role: "user", content: "Provide me three US states." },
|
||
{ role: "assistant", content: "California, New York, Pennsylvania." },
|
||
{ role: "user", content: "Two more please!" },
|
||
],
|
||
temperature: 1.5,
|
||
max_tokens: 256,
|
||
};
|
||
|
||
const asyncChunkGenerator = await engine.chat.completions.create(request);
|
||
let message = "";
|
||
for await (const chunk of asyncChunkGenerator) {
|
||
console.log(chunk);
|
||
message += chunk.choices[0]?.delta?.content || "";
|
||
setLabel("generate-label", message);
|
||
if (chunk.usage) {
|
||
console.log(chunk.usage); // only last chunk has usage
|
||
}
|
||
// engine.interruptGenerate(); // works with interrupt as well
|
||
}
|
||
console.log("Final message:\n", await engine.getMessage()); // the concatenated message
|
||
}
|
||
|
||
// Run one of the function below
|
||
// mainNonStreaming();
|
||
mainStreaming();
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/get-started-web-worker/src/worker.ts
|
||
================================================
|
||
import { WebWorkerMLCEngineHandler } from "@mlc-ai/web-llm";
|
||
|
||
// Hookup an engine to a worker handler
|
||
const handler = new WebWorkerMLCEngineHandler();
|
||
self.onmessage = (msg: MessageEvent) => {
|
||
handler.onmessage(msg);
|
||
};
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/json-mode/README.md
|
||
================================================
|
||
### OpenAI API Demos
|
||
|
||
Run `npm install` first, followed by `npm start`.
|
||
|
||
Note if you would like to hack WebLLM core package,
|
||
you can change web-llm dependencies as `"file:../.."`, and follow the build from source
|
||
instruction in the project to build webllm locally. This option is only recommended
|
||
if you would like to hack WebLLM core package.
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/json-mode/package.json
|
||
================================================
|
||
{
|
||
"name": "openai-api",
|
||
"version": "0.1.0",
|
||
"private": true,
|
||
"scripts": {
|
||
"start": "parcel src/json_mode.html --port 8888",
|
||
"build": "parcel build src/json_mode.html --dist-dir lib"
|
||
},
|
||
"devDependencies": {
|
||
"buffer": "^5.7.1",
|
||
"parcel": "^2.8.3",
|
||
"process": "^0.11.10",
|
||
"tslib": "^2.3.1",
|
||
"typescript": "^4.9.5",
|
||
"url": "^0.11.3"
|
||
},
|
||
"dependencies": {
|
||
"@mlc-ai/web-llm": "^0.2.80"
|
||
}
|
||
}
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/json-mode/src/json_mode.html
|
||
================================================
|
||
<!doctype html>
|
||
<html>
|
||
<script>
|
||
webLLMGlobal = {};
|
||
</script>
|
||
|
||
<body>
|
||
<h2>WebLLM Test Page</h2>
|
||
Open console to see output.
|
||
<br />
|
||
<br />
|
||
<label id="init-label"> </label>
|
||
|
||
<script type="module" src="./json_mode.ts"></script>
|
||
</body>
|
||
</html>
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/json-mode/src/json_mode.ts
|
||
================================================
|
||
import * as webllm from "@mlc-ai/web-llm";
|
||
|
||
function setLabel(id: string, text: string) {
|
||
const label = document.getElementById(id);
|
||
if (label == null) {
|
||
throw Error("Cannot find label " + id);
|
||
}
|
||
label.innerText = text;
|
||
}
|
||
|
||
async function main() {
|
||
const initProgressCallback = (report: webllm.InitProgressReport) => {
|
||
setLabel("init-label", report.text);
|
||
};
|
||
// Pick any one of these models to start trying -- most models in WebLLM support grammar
|
||
const selectedModel = "Llama-3.2-3B-Instruct-q4f16_1-MLC";
|
||
// const selectedModel = "Qwen2.5-1.5B-Instruct-q4f16_1-MLC";
|
||
// const selectedModel = "Phi-3.5-mini-instruct-q4f16_1-MLC";
|
||
const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine(
|
||
selectedModel,
|
||
{ initProgressCallback: initProgressCallback },
|
||
);
|
||
// Note that you'd need to prompt the model to answer in JSON either in
|
||
// user's message or the system prompt
|
||
const request: webllm.ChatCompletionRequest = {
|
||
stream: false, // works with streaming, logprobs, top_logprobs as well
|
||
messages: [
|
||
{
|
||
role: "user",
|
||
content: "Write a short JSON file introducing yourself.",
|
||
},
|
||
],
|
||
n: 2,
|
||
max_tokens: 128,
|
||
response_format: { type: "json_object" } as webllm.ResponseFormat,
|
||
};
|
||
|
||
const reply0 = await engine.chatCompletion(request);
|
||
console.log(reply0);
|
||
console.log("First reply's last choice:\n" + (await engine.getMessage()));
|
||
console.log(reply0.usage);
|
||
}
|
||
|
||
main();
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/json-schema/README.md
|
||
================================================
|
||
### OpenAI API Demos
|
||
|
||
Run `npm install` first, followed by `npm start`.
|
||
|
||
Note if you would like to hack WebLLM core package,
|
||
you can change web-llm dependencies as `"file:../.."`, and follow the build from source
|
||
instruction in the project to build webllm locally. This option is only recommended
|
||
if you would like to hack WebLLM core package.
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/json-schema/package.json
|
||
================================================
|
||
{
|
||
"name": "openai-api",
|
||
"version": "0.1.0",
|
||
"private": true,
|
||
"scripts": {
|
||
"start": "parcel src/json_schema.html --port 8885",
|
||
"build": "parcel build src/json_schema.html --dist-dir lib"
|
||
},
|
||
"devDependencies": {
|
||
"buffer": "^5.7.1",
|
||
"parcel": "^2.8.3",
|
||
"process": "^0.11.10",
|
||
"tslib": "^2.3.1",
|
||
"typescript": "^4.9.5",
|
||
"url": "^0.11.3"
|
||
},
|
||
"dependencies": {
|
||
"@mlc-ai/web-llm": "^0.2.80"
|
||
}
|
||
}
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/json-schema/src/json_schema.html
|
||
================================================
|
||
<!doctype html>
|
||
<html>
|
||
<script>
|
||
webLLMGlobal = {};
|
||
</script>
|
||
|
||
<body>
|
||
<h2>WebLLM Test Page</h2>
|
||
Open console to see output.
|
||
<br />
|
||
<br />
|
||
<label id="init-label"> </label>
|
||
|
||
<script type="module" src="./json_schema.ts"></script>
|
||
</body>
|
||
</html>
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/json-schema/src/json_schema.ts
|
||
================================================
|
||
import * as webllm from "@mlc-ai/web-llm";
|
||
import { Type, Static } from "@sinclair/typebox";
|
||
|
||
function setLabel(id: string, text: string) {
|
||
const label = document.getElementById(id);
|
||
if (label == null) {
|
||
throw Error("Cannot find label " + id);
|
||
}
|
||
label.innerText = text;
|
||
}
|
||
|
||
async function simpleStructuredTextExample() {
|
||
// There are several options of providing such a schema
|
||
// 1. You can directly define a schema in string
|
||
const schema1 = `{
|
||
"properties": {
|
||
"size": {"title": "Size", "type": "integer"},
|
||
"is_accepted": {"title": "Is Accepted", "type": "boolean"},
|
||
"num": {"title": "Num", "type": "number"}
|
||
},
|
||
"required": ["size", "is_accepted", "num"],
|
||
"title": "Schema", "type": "object"
|
||
}`;
|
||
|
||
// 2. You can use 3rdparty libraries like typebox to create a schema
|
||
const T = Type.Object({
|
||
size: Type.Integer(),
|
||
is_accepted: Type.Boolean(),
|
||
num: Type.Number(),
|
||
});
|
||
type T = Static<typeof T>;
|
||
const schema2 = JSON.stringify(T);
|
||
console.log(schema2);
|
||
// {"type":"object","properties":{"size":{"type":"integer"},"is_accepted":{"type":"boolean"},
|
||
// "num":{"type":"number"}},"required":["size","is_accepted","num"]}
|
||
|
||
const initProgressCallback = (report: webllm.InitProgressReport) => {
|
||
setLabel("init-label", report.text);
|
||
};
|
||
|
||
// Pick any one of these models to start trying -- most models in WebLLM support grammar
|
||
// const selectedModel = "Llama-3.2-3B-Instruct-q4f16_1-MLC";
|
||
// const selectedModel = "Qwen2.5-1.5B-Instruct-q4f16_1-MLC";
|
||
const selectedModel = "Phi-3.5-mini-instruct-q4f16_1-MLC";
|
||
const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine(
|
||
selectedModel,
|
||
{ initProgressCallback: initProgressCallback, logLevel: "INFO" },
|
||
);
|
||
|
||
// Note that you'd need to prompt the model to answer in JSON either in
|
||
// user's message or the system prompt
|
||
const request: webllm.ChatCompletionRequest = {
|
||
stream: false, // works with streaming, logprobs, top_logprobs as well
|
||
messages: [
|
||
{
|
||
role: "user",
|
||
content:
|
||
"Generate a json containing three fields: an integer field named size, a " +
|
||
"boolean field named is_accepted, and a float field named num.",
|
||
},
|
||
],
|
||
max_tokens: 128,
|
||
response_format: {
|
||
type: "json_object",
|
||
schema: schema2,
|
||
} as webllm.ResponseFormat,
|
||
};
|
||
|
||
const reply0 = await engine.chatCompletion(request);
|
||
console.log(reply0);
|
||
console.log("Output:\n" + (await engine.getMessage()));
|
||
console.log(reply0.usage);
|
||
}
|
||
|
||
// The json schema and prompt is taken from
|
||
// https://github.com/sgl-project/sglang/tree/main?tab=readme-ov-file#json-decoding
|
||
async function harryPotterExample() {
|
||
const T = Type.Object({
|
||
name: Type.String(),
|
||
house: Type.Enum({
|
||
Gryffindor: "Gryffindor",
|
||
Hufflepuff: "Hufflepuff",
|
||
Ravenclaw: "Ravenclaw",
|
||
Slytherin: "Slytherin",
|
||
}),
|
||
blood_status: Type.Enum({
|
||
"Pure-blood": "Pure-blood",
|
||
"Half-blood": "Half-blood",
|
||
"Muggle-born": "Muggle-born",
|
||
}),
|
||
occupation: Type.Enum({
|
||
Student: "Student",
|
||
Professor: "Professor",
|
||
"Ministry of Magic": "Ministry of Magic",
|
||
Other: "Other",
|
||
}),
|
||
wand: Type.Object({
|
||
wood: Type.String(),
|
||
core: Type.String(),
|
||
length: Type.Number(),
|
||
}),
|
||
alive: Type.Boolean(),
|
||
patronus: Type.String(),
|
||
});
|
||
|
||
type T = Static<typeof T>;
|
||
const schema = JSON.stringify(T);
|
||
console.log(schema);
|
||
|
||
const initProgressCallback = (report: webllm.InitProgressReport) => {
|
||
setLabel("init-label", report.text);
|
||
};
|
||
|
||
// Pick any one of these models to start trying -- most models in WebLLM support grammar
|
||
const selectedModel = "Llama-3.2-3B-Instruct-q4f16_1-MLC";
|
||
// const selectedModel = "Qwen2.5-1.5B-Instruct-q4f16_1-MLC";
|
||
// const selectedModel = "Phi-3.5-mini-instruct-q4f16_1-MLC";
|
||
|
||
const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine(
|
||
selectedModel,
|
||
{ initProgressCallback: initProgressCallback, logLevel: "INFO" },
|
||
);
|
||
|
||
// Note that you'd need to prompt the model to answer in JSON either in
|
||
// user's message or the system prompt
|
||
const request: webllm.ChatCompletionRequest = {
|
||
stream: false,
|
||
messages: [
|
||
{
|
||
role: "user",
|
||
content:
|
||
"Hermione Granger is a character in Harry Potter. Please fill in the following information about this character in JSON format." +
|
||
"Name is a string of character name. House is one of Gryffindor, Hufflepuff, Ravenclaw, Slytherin. Blood status is one of Pure-blood, Half-blood, Muggle-born. Occupation is one of Student, Professor, Ministry of Magic, Other. Wand is an object with wood, core, and length. Alive is a boolean. Patronus is a string.",
|
||
},
|
||
],
|
||
max_tokens: 128,
|
||
response_format: {
|
||
type: "json_object",
|
||
schema: schema,
|
||
} as webllm.ResponseFormat,
|
||
};
|
||
|
||
const reply = await engine.chatCompletion(request);
|
||
console.log(reply);
|
||
console.log("Output:\n" + (await engine.getMessage()));
|
||
console.log(reply.usage);
|
||
console.log(reply.usage!.extra);
|
||
}
|
||
|
||
async function functionCallingExample() {
|
||
const T = Type.Object({
|
||
tool_calls: Type.Array(
|
||
Type.Object({
|
||
arguments: Type.Any(),
|
||
name: Type.String(),
|
||
}),
|
||
),
|
||
});
|
||
type T = Static<typeof T>;
|
||
const schema = JSON.stringify(T);
|
||
console.log(schema);
|
||
|
||
const tools: Array<webllm.ChatCompletionTool> = [
|
||
{
|
||
type: "function",
|
||
function: {
|
||
name: "get_current_weather",
|
||
description: "Get the current weather in a given location",
|
||
parameters: {
|
||
type: "object",
|
||
properties: {
|
||
location: {
|
||
type: "string",
|
||
description: "The city and state, e.g. San Francisco, CA",
|
||
},
|
||
unit: { type: "string", enum: ["celsius", "fahrenheit"] },
|
||
},
|
||
required: ["location"],
|
||
},
|
||
},
|
||
},
|
||
];
|
||
|
||
const initProgressCallback = (report: webllm.InitProgressReport) => {
|
||
setLabel("init-label", report.text);
|
||
};
|
||
|
||
const selectedModel = "Hermes-2-Pro-Llama-3-8B-q4f16_1-MLC";
|
||
const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine(
|
||
selectedModel,
|
||
{
|
||
initProgressCallback: initProgressCallback,
|
||
},
|
||
);
|
||
|
||
const request: webllm.ChatCompletionRequest = {
|
||
stream: false,
|
||
messages: [
|
||
{
|
||
role: "system",
|
||
content: `You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: <tools> ${JSON.stringify(
|
||
tools,
|
||
)} </tools>. Do not stop calling functions until the task has been accomplished or you've reached max iteration of 10.
|
||
Calling multiple functions at once can overload the system and increase cost so call one function at a time please.
|
||
If you plan to continue with analysis, always call another function.
|
||
Return a valid json object (using double quotes) in the following schema: ${JSON.stringify(
|
||
schema,
|
||
)}.`,
|
||
},
|
||
{
|
||
role: "user",
|
||
content:
|
||
"What is the current weather in celsius in Pittsburgh and Tokyo?",
|
||
},
|
||
],
|
||
response_format: {
|
||
type: "json_object",
|
||
schema: schema,
|
||
} as webllm.ResponseFormat,
|
||
};
|
||
|
||
const reply = await engine.chat.completions.create(request);
|
||
console.log(reply.choices[0].message.content);
|
||
|
||
console.log(reply.usage);
|
||
}
|
||
|
||
async function ebnfGrammarExample() {
|
||
// You can directly define an EBNFGrammar string with ResponseFormat.grammar
|
||
const jsonGrammarStr = String.raw`
|
||
root ::= basic_array | basic_object
|
||
basic_any ::= basic_number | basic_string | basic_boolean | basic_null | basic_array | basic_object
|
||
basic_integer ::= ("0" | "-"? [1-9] [0-9]*) ".0"?
|
||
basic_number ::= ("0" | "-"? [1-9] [0-9]*) ("." [0-9]+)? ([eE] [+-]? [0-9]+)?
|
||
basic_string ::= (([\"] basic_string_1 [\"]))
|
||
basic_string_1 ::= "" | [^"\\\x00-\x1F] basic_string_1 | "\\" escape basic_string_1
|
||
escape ::= ["\\/bfnrt] | "u" [A-Fa-f0-9] [A-Fa-f0-9] [A-Fa-f0-9] [A-Fa-f0-9]
|
||
basic_boolean ::= "true" | "false"
|
||
basic_null ::= "null"
|
||
basic_array ::= "[" ("" | ws basic_any (ws "," ws basic_any)*) ws "]"
|
||
basic_object ::= "{" ("" | ws basic_string ws ":" ws basic_any ( ws "," ws basic_string ws ":" ws basic_any)*) ws "}"
|
||
ws ::= [ \n\t]*
|
||
`;
|
||
|
||
const initProgressCallback = (report: webllm.InitProgressReport) => {
|
||
setLabel("init-label", report.text);
|
||
};
|
||
|
||
// Pick any one of these models to start trying -- most models in WebLLM support grammar
|
||
const selectedModel = "Llama-3.2-3B-Instruct-q4f16_1-MLC";
|
||
// const selectedModel = "Qwen2.5-1.5B-Instruct-q4f16_1-MLC";
|
||
// const selectedModel = "Phi-3.5-mini-instruct-q4f16_1-MLC";
|
||
const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine(
|
||
selectedModel,
|
||
{ initProgressCallback: initProgressCallback, logLevel: "INFO" },
|
||
);
|
||
|
||
// Note that you'd need to prompt the model to answer in JSON either in
|
||
// user's message or the system prompt
|
||
const request: webllm.ChatCompletionRequest = {
|
||
stream: false, // works with streaming, logprobs, top_logprobs as well
|
||
messages: [
|
||
{
|
||
role: "user",
|
||
content: "Introduce yourself in JSON",
|
||
},
|
||
],
|
||
max_tokens: 128,
|
||
response_format: {
|
||
type: "grammar",
|
||
grammar: jsonGrammarStr,
|
||
} as webllm.ResponseFormat,
|
||
};
|
||
|
||
const reply0 = await engine.chatCompletion(request);
|
||
console.log(reply0);
|
||
console.log("Output:\n" + (await engine.getMessage()));
|
||
console.log(reply0.usage);
|
||
}
|
||
|
||
async function main() {
|
||
// await simpleStructuredTextExample();
|
||
await harryPotterExample();
|
||
// await functionCallingExample();
|
||
// await ebnfGrammarExample();
|
||
}
|
||
|
||
main();
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/logit-processor/README.md
|
||
================================================
|
||
# WebLLM Logit Processor and Low-Level API Example
|
||
|
||
This folder explains the usage of `LogitProcessor`, demonstrating how it can be used to
|
||
manipulate the raw logits before sampling the token (e.g. setting certain tokens to `inf` or `-inf`).
|
||
We demonstrate how to use it with and without a web worker, which can be toggled with `USE_WEB_WORKER`
|
||
in `logit_processor.ts` (see `worker.ts` on how `LogitProcessor` plays a role there).
|
||
|
||
We also demonstrate the usage of a low-level API `forwardTokenAndSample()`, which, unlike `chat.completions.create()`
|
||
that assumes the usage is for autoregressive chatting, here we have more fine-grained control.
|
||
|
||
See `my_logit_processor.ts` on how to customize your own logit processor. Here we make the logit
|
||
of token 0 `100.0` manually, large enough that we should expect to always sample token 0, which
|
||
is indeed the case if we observe the console log. We also demonstarte that a LogitProcessor can be
|
||
stateful, and the state can also be cleaned with `LogitProcessor.resetState()`.
|
||
|
||
To try it out, you can do the following steps under this folder
|
||
|
||
```bash
|
||
npm install
|
||
npm start
|
||
```
|
||
|
||
Note if you would like to hack WebLLM core package, you can change web-llm dependencies as `"file:../.."`, and follow the build from source instruction in the project to build webllm locally. This option is only recommended if you would like to hack WebLLM core package.
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/logit-processor/package.json
|
||
================================================
|
||
{
|
||
"name": "logit-processor",
|
||
"version": "0.1.0",
|
||
"private": true,
|
||
"scripts": {
|
||
"start": "parcel src/logit_processor.html --port 8885",
|
||
"build": "parcel build src/logit_processor.html --dist-dir lib"
|
||
},
|
||
"devDependencies": {
|
||
"buffer": "^5.7.1",
|
||
"parcel": "^2.8.3",
|
||
"process": "^0.11.10",
|
||
"tslib": "^2.3.1",
|
||
"typescript": "^4.9.5",
|
||
"url": "^0.11.3"
|
||
},
|
||
"dependencies": {
|
||
"@mlc-ai/web-llm": "^0.2.80"
|
||
}
|
||
}
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/logit-processor/src/logit_processor.html
|
||
================================================
|
||
<!doctype html>
|
||
<html>
|
||
<script>
|
||
webLLMGlobal = {};
|
||
</script>
|
||
|
||
<body>
|
||
<h2>WebLLM Logit Processor Test Page</h2>
|
||
Open console to see the effect of your logit processor.
|
||
<br />
|
||
<br />
|
||
<label id="init-label"> </label>
|
||
|
||
<script type="module" src="./logit_processor.ts"></script>
|
||
</body>
|
||
</html>
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/logit-processor/src/logit_processor.ts
|
||
================================================
|
||
import * as webllm from "@mlc-ai/web-llm";
|
||
import { MyLogitProcessor } from "./my_logit_processor";
|
||
|
||
const USE_WEB_WORKER = true; // Toggle this to use Logit Processor without a web worker
|
||
const AUTOREGRESS_LIMIT = 32; // How many tokens to generate for this test
|
||
|
||
function setLabel(id: string, text: string) {
|
||
const label = document.getElementById(id);
|
||
if (label == null) {
|
||
throw Error("Cannot find label " + id);
|
||
}
|
||
label.innerText = text;
|
||
}
|
||
|
||
async function main() {
|
||
const initProgressCallback = (report: webllm.InitProgressReport) => {
|
||
setLabel("init-label", report.text);
|
||
};
|
||
// Instantiate myLogitProcessor, registering in the logitProcessorRegistry
|
||
const myLogitProcessor = new MyLogitProcessor();
|
||
const logitProcessorRegistry = new Map<string, webllm.LogitProcessor>();
|
||
logitProcessorRegistry.set("phi-2-q4f32_1-MLC", myLogitProcessor);
|
||
|
||
let engine: webllm.MLCEngineInterface;
|
||
|
||
// Depending on whether we use a web worker, the code is slightly different
|
||
if (USE_WEB_WORKER) {
|
||
// see worker.ts on how LogitProcessor plays a role there
|
||
engine = await webllm.CreateWebWorkerMLCEngine(
|
||
new Worker(new URL("./worker.ts", import.meta.url), { type: "module" }),
|
||
"phi-2-q4f32_1-MLC",
|
||
{ initProgressCallback: initProgressCallback },
|
||
);
|
||
} else {
|
||
engine = await webllm.CreateMLCEngine("phi-2-q4f32_1-MLC", {
|
||
initProgressCallback: initProgressCallback,
|
||
logitProcessorRegistry: logitProcessorRegistry,
|
||
});
|
||
}
|
||
|
||
// Below we demonstrate the usage of a low-level API `forwardTokensAndSample()`
|
||
const prompt: Array<number> = [42];
|
||
let nextToken = await engine.forwardTokensAndSample(
|
||
prompt,
|
||
/*isPrefill=*/ true,
|
||
);
|
||
console.log(nextToken);
|
||
|
||
let counter = prompt.length;
|
||
while (counter < AUTOREGRESS_LIMIT) {
|
||
counter += 1;
|
||
nextToken = await engine.forwardTokensAndSample(
|
||
[nextToken],
|
||
/*isPrefill=*/ false,
|
||
);
|
||
console.log(nextToken);
|
||
}
|
||
|
||
// By calling `engine.resetChat()`, we triggers MyLogitProcessor.resetState()
|
||
engine.resetChat();
|
||
counter = prompt.length;
|
||
nextToken = await engine.forwardTokensAndSample(prompt, /*isPrefill=*/ true);
|
||
console.log(nextToken);
|
||
while (counter < AUTOREGRESS_LIMIT) {
|
||
counter += 1;
|
||
nextToken = await engine.forwardTokensAndSample(
|
||
[nextToken],
|
||
/*isPrefill=*/ false,
|
||
);
|
||
console.log(nextToken);
|
||
}
|
||
|
||
// `forwardTokensAndSample()` is made compatible with registering runtime stats.
|
||
console.log(await engine.runtimeStatsText());
|
||
}
|
||
|
||
main();
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/logit-processor/src/my_logit_processor.ts
|
||
================================================
|
||
import * as webllm from "@mlc-ai/web-llm";
|
||
|
||
// Define LogitProcessor
|
||
export class MyLogitProcessor implements webllm.LogitProcessor {
|
||
private tokenSequence: Array<number> = [];
|
||
|
||
processLogits(logits: Float32Array): Float32Array {
|
||
logits[0] = 100.0; // should be enough so that we always sample token 0 below
|
||
return logits;
|
||
}
|
||
|
||
processSampledToken(token: number): void {
|
||
this.tokenSequence.push(token);
|
||
console.log("processSampledToken: " + this.tokenSequence.length);
|
||
}
|
||
|
||
resetState(): void {
|
||
this.tokenSequence = [];
|
||
console.log("resetState");
|
||
}
|
||
}
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/logit-processor/src/worker.ts
|
||
================================================
|
||
// Serve the chat workload through web worker
|
||
import * as webllm from "@mlc-ai/web-llm";
|
||
import { MyLogitProcessor } from "./my_logit_processor";
|
||
|
||
console.log("Use web worker for logit processor");
|
||
|
||
const myLogitProcessor = new MyLogitProcessor();
|
||
const logitProcessorRegistry = new Map<string, webllm.LogitProcessor>();
|
||
logitProcessorRegistry.set("phi-2-q4f32_1-MLC", myLogitProcessor);
|
||
|
||
const handler = new webllm.WebWorkerMLCEngineHandler();
|
||
handler.setLogitProcessorRegistry(logitProcessorRegistry);
|
||
self.onmessage = (msg: MessageEvent) => {
|
||
handler.onmessage(msg);
|
||
};
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/multi-models/README.md
|
||
================================================
|
||
# WebLLM Get Started App
|
||
|
||
This folder provides a minimum demo to show WebLLM API in a webapp setting.
|
||
To try it out, you can do the following steps under this folder
|
||
|
||
```bash
|
||
npm install
|
||
npm start
|
||
```
|
||
|
||
Note if you would like to hack WebLLM core package.
|
||
You can change web-llm dependencies as `"file:../.."`, and follow the build from source
|
||
instruction in the project to build webllm locally. This option is only recommended
|
||
if you would like to hack WebLLM core package.
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/multi-models/package.json
|
||
================================================
|
||
{
|
||
"name": "get-started",
|
||
"version": "0.1.0",
|
||
"private": true,
|
||
"scripts": {
|
||
"start": "parcel src/multi_models.html --port 8888",
|
||
"build": "parcel build src/multi_models.html --dist-dir lib"
|
||
},
|
||
"devDependencies": {
|
||
"buffer": "^5.7.1",
|
||
"parcel": "^2.8.3",
|
||
"process": "^0.11.10",
|
||
"tslib": "^2.3.1",
|
||
"typescript": "^4.9.5",
|
||
"url": "^0.11.3"
|
||
},
|
||
"dependencies": {
|
||
"@mlc-ai/web-llm": "^0.2.80"
|
||
}
|
||
}
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/multi-models/src/main.ts
|
||
================================================
|
||
/**
|
||
* This example demonstrates loading multiple models in the same engine concurrently.
|
||
* sequentialGeneration() shows inference each model one at a time.
|
||
* parallelGeneration() shows inference both models at the same time.
|
||
* This example uses WebWorkerMLCEngine, but the same idea applies to MLCEngine and
|
||
* ServiceWorkerMLCEngine as well.
|
||
*/
|
||
|
||
import * as webllm from "@mlc-ai/web-llm";
|
||
|
||
function setLabel(id: string, text: string) {
|
||
const label = document.getElementById(id);
|
||
if (label == null) {
|
||
throw Error("Cannot find label " + id);
|
||
}
|
||
label.innerText = text;
|
||
}
|
||
|
||
const initProgressCallback = (report: webllm.InitProgressReport) => {
|
||
setLabel("init-label", report.text);
|
||
};
|
||
|
||
// Prepare request for each model, same for both methods
|
||
const selectedModel1 = "Phi-3.5-mini-instruct-q4f32_1-MLC-1k";
|
||
const selectedModel2 = "gemma-2-2b-it-q4f32_1-MLC-1k";
|
||
const prompt1 = "Tell me about California in 3 short sentences.";
|
||
const prompt2 = "Tell me about New York City in 3 short sentences.";
|
||
setLabel("prompt-label-1", `(with model ${selectedModel1})\n` + prompt1);
|
||
setLabel("prompt-label-2", `(with model ${selectedModel2})\n` + prompt2);
|
||
|
||
const request1: webllm.ChatCompletionRequestStreaming = {
|
||
stream: true,
|
||
stream_options: { include_usage: true },
|
||
messages: [{ role: "user", content: prompt1 }],
|
||
model: selectedModel1, // without specifying it, error will throw due to ambiguity
|
||
max_tokens: 128,
|
||
};
|
||
|
||
const request2: webllm.ChatCompletionRequestStreaming = {
|
||
stream: true,
|
||
stream_options: { include_usage: true },
|
||
messages: [{ role: "user", content: prompt2 }],
|
||
model: selectedModel2, // without specifying it, error will throw due to ambiguity
|
||
max_tokens: 128,
|
||
};
|
||
|
||
/**
|
||
* Chat completion (OpenAI style) with streaming, with two models in the pipeline.
|
||
*/
|
||
async function sequentialGeneration() {
|
||
const engine = await webllm.CreateWebWorkerMLCEngine(
|
||
new Worker(new URL("./worker.ts", import.meta.url), { type: "module" }),
|
||
[selectedModel1, selectedModel2],
|
||
{ initProgressCallback: initProgressCallback },
|
||
);
|
||
|
||
const asyncChunkGenerator1 = await engine.chat.completions.create(request1);
|
||
let message1 = "";
|
||
for await (const chunk of asyncChunkGenerator1) {
|
||
// console.log(chunk);
|
||
message1 += chunk.choices[0]?.delta?.content || "";
|
||
setLabel("generate-label-1", message1);
|
||
if (chunk.usage) {
|
||
console.log(chunk.usage); // only last chunk has usage
|
||
}
|
||
// engine.interruptGenerate(); // works with interrupt as well
|
||
}
|
||
const asyncChunkGenerator2 = await engine.chat.completions.create(request2);
|
||
let message2 = "";
|
||
for await (const chunk of asyncChunkGenerator2) {
|
||
// console.log(chunk);
|
||
message2 += chunk.choices[0]?.delta?.content || "";
|
||
setLabel("generate-label-2", message2);
|
||
if (chunk.usage) {
|
||
console.log(chunk.usage); // only last chunk has usage
|
||
}
|
||
// engine.interruptGenerate(); // works with interrupt as well
|
||
}
|
||
|
||
// without specifying from which model to get message, error will throw due to ambiguity
|
||
console.log("Final message 1:\n", await engine.getMessage(selectedModel1));
|
||
console.log("Final message 2:\n", await engine.getMessage(selectedModel2));
|
||
}
|
||
|
||
/**
|
||
* Chat completion (OpenAI style) with streaming, with two models in the pipeline.
|
||
*/
|
||
async function parallelGeneration() {
|
||
const engine = await webllm.CreateWebWorkerMLCEngine(
|
||
new Worker(new URL("./worker.ts", import.meta.url), { type: "module" }),
|
||
[selectedModel1, selectedModel2],
|
||
{ initProgressCallback: initProgressCallback },
|
||
);
|
||
|
||
// We can serve the two requests concurrently
|
||
async function getModel1Response() {
|
||
let message1 = "";
|
||
const asyncChunkGenerator1 = await engine.chat.completions.create(request1);
|
||
for await (const chunk of asyncChunkGenerator1) {
|
||
// console.log(chunk);
|
||
message1 += chunk.choices[0]?.delta?.content || "";
|
||
setLabel("generate-label-1", message1);
|
||
if (chunk.usage) {
|
||
console.log(chunk.usage); // only last chunk has usage
|
||
}
|
||
// engine.interruptGenerate(); // works with interrupt as well
|
||
}
|
||
}
|
||
|
||
async function getModel2Response() {
|
||
let message2 = "";
|
||
const asyncChunkGenerator2 = await engine.chat.completions.create(request2);
|
||
for await (const chunk of asyncChunkGenerator2) {
|
||
// console.log(chunk);
|
||
message2 += chunk.choices[0]?.delta?.content || "";
|
||
setLabel("generate-label-2", message2);
|
||
if (chunk.usage) {
|
||
console.log(chunk.usage); // only last chunk has usage
|
||
}
|
||
// engine.interruptGenerate(); // works with interrupt as well
|
||
}
|
||
}
|
||
|
||
await Promise.all([getModel1Response(), getModel2Response()]);
|
||
// Note: concurrent requests to the same model are executed sequentially in FCFS,
|
||
// unlike to different models like above
|
||
// Fore more, see https://github.com/mlc-ai/web-llm/pull/549
|
||
// await Promise.all([getModel1Response(), getModel1Response()]);
|
||
|
||
// without specifying from which model to get message, error will throw due to ambiguity
|
||
console.log("Final message 1:\n", await engine.getMessage(selectedModel1));
|
||
console.log("Final message 2:\n", await engine.getMessage(selectedModel2));
|
||
}
|
||
|
||
// Pick one to run
|
||
sequentialGeneration();
|
||
// parallelGeneration();
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/multi-models/src/multi_models.html
|
||
================================================
|
||
<!doctype html>
|
||
<html>
|
||
<script>
|
||
webLLMGlobal = {};
|
||
</script>
|
||
<body>
|
||
<h2>WebLLM Test Page</h2>
|
||
Open console to see output
|
||
<br />
|
||
<br />
|
||
<label id="init-label"> </label>
|
||
|
||
<h3>Prompt 1</h3>
|
||
<label id="prompt-label-1"> </label>
|
||
|
||
<h3>Response from model 1</h3>
|
||
<label id="generate-label-1"> </label>
|
||
<br />
|
||
|
||
<h3>Prompt 2</h3>
|
||
<label id="prompt-label-2"> </label>
|
||
|
||
<h3>Response from model 2</h3>
|
||
<label id="generate-label-2"> </label>
|
||
<br />
|
||
<label id="stats-label"> </label>
|
||
|
||
<script type="module" src="./main.ts"></script>
|
||
</body>
|
||
</html>
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/multi-models/src/worker.ts
|
||
================================================
|
||
import { WebWorkerMLCEngineHandler } from "@mlc-ai/web-llm";
|
||
|
||
// Hookup an engine to a worker handler
|
||
const handler = new WebWorkerMLCEngineHandler();
|
||
self.onmessage = (msg: MessageEvent) => {
|
||
handler.onmessage(msg);
|
||
};
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/multi-round-chat/README.md
|
||
================================================
|
||
### OpenAI API Demos
|
||
|
||
Run `npm install` first, followed by `npm start`.
|
||
|
||
Note if you would like to hack WebLLM core package,
|
||
you can change web-llm dependencies as `"file:../.."`, and follow the build from source
|
||
instruction in the project to build webllm locally. This option is only recommended
|
||
if you would like to hack WebLLM core package.
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/multi-round-chat/package.json
|
||
================================================
|
||
{
|
||
"name": "openai-api",
|
||
"version": "0.1.0",
|
||
"private": true,
|
||
"scripts": {
|
||
"start": "parcel src/multi_round_chat.html --port 8888",
|
||
"build": "parcel build src/multi_round_chat.html --dist-dir lib"
|
||
},
|
||
"devDependencies": {
|
||
"buffer": "^5.7.1",
|
||
"parcel": "^2.8.3",
|
||
"process": "^0.11.10",
|
||
"tslib": "^2.3.1",
|
||
"typescript": "^4.9.5",
|
||
"url": "^0.11.3"
|
||
},
|
||
"dependencies": {
|
||
"@mlc-ai/web-llm": "^0.2.80"
|
||
}
|
||
}
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/multi-round-chat/src/multi_round_chat.html
|
||
================================================
|
||
<!doctype html>
|
||
<html>
|
||
<script>
|
||
webLLMGlobal = {};
|
||
</script>
|
||
|
||
<body>
|
||
<h2>WebLLM Test Page</h2>
|
||
Open console to see output
|
||
<br />
|
||
<br />
|
||
<label id="init-label"> </label>
|
||
|
||
<script type="module" src="./multi_round_chat.ts"></script>
|
||
</body>
|
||
</html>
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/multi-round-chat/src/multi_round_chat.ts
|
||
================================================
|
||
import * as webllm from "@mlc-ai/web-llm";
|
||
|
||
function setLabel(id: string, text: string) {
|
||
const label = document.getElementById(id);
|
||
if (label == null) {
|
||
throw Error("Cannot find label " + id);
|
||
}
|
||
label.innerText = text;
|
||
}
|
||
|
||
/**
|
||
* We demonstrate multiround chatting. Though users are required to maintain chat history, internally
|
||
* we compare provided `messages` with the internal chat history. If it matches, we will reuse KVs
|
||
* and hence save computation -- essentially an implicit internal optimization.
|
||
*/
|
||
async function main() {
|
||
const initProgressCallback = (report: webllm.InitProgressReport) => {
|
||
setLabel("init-label", report.text);
|
||
};
|
||
const selectedModel = "Llama-3.1-8B-Instruct-q4f32_1-MLC";
|
||
const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine(
|
||
selectedModel,
|
||
{ initProgressCallback: initProgressCallback },
|
||
);
|
||
|
||
// Round 0
|
||
const messages: webllm.ChatCompletionMessageParam[] = [
|
||
{
|
||
role: "system",
|
||
content:
|
||
"You are a helpful, respectful and honest assistant. " +
|
||
"Be as happy as you can when speaking please. ",
|
||
},
|
||
{ role: "user", content: "Provide me three US states." },
|
||
];
|
||
|
||
const request0: webllm.ChatCompletionRequest = {
|
||
stream: false, // can be streaming, same behavior
|
||
messages: messages,
|
||
};
|
||
|
||
const reply0 = await engine.chat.completions.create(request0);
|
||
const replyMessage0 = await engine.getMessage();
|
||
console.log(reply0);
|
||
console.log(replyMessage0);
|
||
console.log(reply0.usage);
|
||
|
||
// Round 1
|
||
// Append generated response to messages
|
||
messages.push({ role: "assistant", content: replyMessage0 });
|
||
// Append new user input
|
||
messages.push({ role: "user", content: "Two more please!" });
|
||
// Below line would cause an internal reset (clear KV cache, etc.) since the history no longer
|
||
// matches the new request
|
||
// messages[0].content = "Another system prompt";
|
||
|
||
const request1: webllm.ChatCompletionRequest = {
|
||
stream: false, // can be streaming, same behavior
|
||
messages: messages,
|
||
};
|
||
|
||
const reply1 = await engine.chat.completions.create(request1);
|
||
const replyMessage1 = await engine.getMessage();
|
||
console.log(reply1);
|
||
console.log(replyMessage1);
|
||
console.log(reply1.usage);
|
||
|
||
// If we used multiround chat, request1 should only prefill a small number of tokens
|
||
const prefillTokens0 = reply0.usage?.prompt_tokens;
|
||
const prefillTokens1 = reply1.usage?.prompt_tokens;
|
||
console.log("Requset 0 prompt tokens: ", prefillTokens0);
|
||
console.log("Requset 1 prompt tokens: ", prefillTokens1);
|
||
if (
|
||
prefillTokens0 === undefined ||
|
||
prefillTokens1 === undefined ||
|
||
prefillTokens1 > prefillTokens0
|
||
) {
|
||
throw Error("Multi-round chat is not triggered as expected.");
|
||
}
|
||
}
|
||
|
||
main();
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/next-simple-chat/README.md
|
||
================================================
|
||
This is a [Next.js](https://nextjs.org/) project using web-llm.
|
||
|
||
## Getting Started
|
||
|
||
First, install web-llm from source.
|
||
|
||
Then, run the development server:
|
||
|
||
```bash
|
||
npm run dev
|
||
# or
|
||
yarn dev
|
||
# or
|
||
pnpm dev
|
||
```
|
||
|
||
Open [http://localhost:3000](http://localhost:3000) with your browser to see the result.
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/next-simple-chat/next.config.js
|
||
================================================
|
||
/** @type {import('next').NextConfig} */
|
||
const nextConfig = {
|
||
reactStrictMode: true,
|
||
|
||
webpack: (config, { isServer }) => {
|
||
// Fixes npm packages that depend on `fs` module
|
||
if (!isServer) {
|
||
config.resolve.fallback = {
|
||
...config.resolve.fallback, // if you miss it, all the other options in fallback, specified
|
||
// by next.js will be dropped. Doesn't make much sense, but how it is
|
||
fs: false, // the solution
|
||
module: false,
|
||
perf_hooks: false,
|
||
};
|
||
}
|
||
|
||
return config;
|
||
},
|
||
};
|
||
|
||
module.exports = nextConfig;
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/next-simple-chat/package.json
|
||
================================================
|
||
{
|
||
"name": "next-simple-chat",
|
||
"version": "0.1.0",
|
||
"private": true,
|
||
"scripts": {
|
||
"dev": "next dev",
|
||
"build": "next build",
|
||
"start": "next start",
|
||
"lint": "next lint"
|
||
},
|
||
"dependencies": {
|
||
"@mlc-ai/web-llm": "^0.2.80",
|
||
"@types/node": "20.3.3",
|
||
"@types/react": "18.2.14",
|
||
"@types/react-dom": "18.2.6",
|
||
"autoprefixer": "10.4.14",
|
||
"eslint": "8.44.0",
|
||
"eslint-config-next": "13.4.7",
|
||
"next": "^13.5.6",
|
||
"postcss": "8.4.24",
|
||
"react": "18.2.0",
|
||
"react-dom": "18.2.0",
|
||
"tailwindcss": "3.3.2",
|
||
"typescript": "5.1.6"
|
||
}
|
||
}
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/next-simple-chat/postcss.config.js
|
||
================================================
|
||
module.exports = {
|
||
plugins: {
|
||
tailwindcss: {},
|
||
autoprefixer: {},
|
||
},
|
||
};
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/next-simple-chat/tailwind.config.js
|
||
================================================
|
||
/** @type {import('tailwindcss').Config} */
|
||
module.exports = {
|
||
content: [
|
||
"./src/pages/**/*.{js,ts,jsx,tsx,mdx}",
|
||
"./src/components/**/*.{js,ts,jsx,tsx,mdx}",
|
||
"./src/app/**/*.{js,ts,jsx,tsx,mdx}",
|
||
],
|
||
theme: {
|
||
extend: {
|
||
backgroundImage: {
|
||
"gradient-radial": "radial-gradient(var(--tw-gradient-stops))",
|
||
"gradient-conic":
|
||
"conic-gradient(from 180deg at 50% 50%, var(--tw-gradient-stops))",
|
||
},
|
||
},
|
||
},
|
||
plugins: [],
|
||
};
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/next-simple-chat/tsconfig.json
|
||
================================================
|
||
{
|
||
"compilerOptions": {
|
||
"target": "es5",
|
||
"lib": ["dom", "dom.iterable", "esnext"],
|
||
"allowJs": true,
|
||
"skipLibCheck": true,
|
||
"strict": true,
|
||
"forceConsistentCasingInFileNames": true,
|
||
"noEmit": true,
|
||
"esModuleInterop": true,
|
||
"module": "esnext",
|
||
"moduleResolution": "node",
|
||
"resolveJsonModule": true,
|
||
"isolatedModules": true,
|
||
"jsx": "preserve",
|
||
"incremental": true,
|
||
"paths": {
|
||
"~/*": ["./src/*"]
|
||
}
|
||
},
|
||
"include": ["next-env.d.ts", "**/*.ts", "**/*.tsx"],
|
||
"exclude": ["node_modules"]
|
||
}
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/next-simple-chat/src/pages/_app.tsx
|
||
================================================
|
||
import "~/styles/globals.css";
|
||
import type { AppProps } from "next/app";
|
||
|
||
export default function App({ Component, pageProps }: AppProps) {
|
||
return <Component {...pageProps} />;
|
||
}
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/next-simple-chat/src/pages/_document.tsx
|
||
================================================
|
||
import { Html, Head, Main, NextScript } from "next/document";
|
||
|
||
export default function Document() {
|
||
return (
|
||
<Html lang="en">
|
||
<Head />
|
||
<body>
|
||
<Main />
|
||
<NextScript />
|
||
</body>
|
||
</Html>
|
||
);
|
||
}
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/next-simple-chat/src/pages/index.tsx
|
||
================================================
|
||
import Head from "next/head";
|
||
import ChatComponent from "~/utils/chat_component";
|
||
import { Inter } from "next/font/google";
|
||
|
||
const inter = Inter({ subsets: ["latin"] });
|
||
|
||
export default function Home() {
|
||
return (
|
||
<>
|
||
<Head>
|
||
<title>Example App</title>
|
||
<meta
|
||
name="description"
|
||
content="Example app for web llm next compatibility"
|
||
/>
|
||
<link rel="icon" href="/favicon.ico" />
|
||
</Head>
|
||
<main
|
||
className={`flex min-h-screen flex-col items-center justify-between p-24 ${inter.className}`}
|
||
>
|
||
<ChatComponent />
|
||
</main>
|
||
</>
|
||
);
|
||
}
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/next-simple-chat/src/pages/api/hello.ts
|
||
================================================
|
||
// Next.js API route support: https://nextjs.org/docs/api-routes/introduction
|
||
import type { NextApiRequest, NextApiResponse } from "next";
|
||
|
||
type Data = {
|
||
name: string;
|
||
};
|
||
|
||
export default function handler(
|
||
req: NextApiRequest,
|
||
res: NextApiResponse<Data>,
|
||
) {
|
||
res.status(200).json({ name: "John Doe" });
|
||
}
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/next-simple-chat/src/styles/globals.css
|
||
================================================
|
||
@tailwind base;
|
||
@tailwind components;
|
||
@tailwind utilities;
|
||
|
||
:root {
|
||
--foreground-rgb: 0, 0, 0;
|
||
--background-start-rgb: 214, 219, 220;
|
||
--background-end-rgb: 255, 255, 255;
|
||
}
|
||
|
||
@media (prefers-color-scheme: dark) {
|
||
:root {
|
||
--foreground-rgb: 255, 255, 255;
|
||
--background-start-rgb: 0, 0, 0;
|
||
--background-end-rgb: 0, 0, 0;
|
||
}
|
||
}
|
||
|
||
body {
|
||
color: rgb(var(--foreground-rgb));
|
||
background: linear-gradient(
|
||
to bottom,
|
||
transparent,
|
||
rgb(var(--background-end-rgb))
|
||
)
|
||
rgb(var(--background-start-rgb));
|
||
}
|
||
|
||
a {
|
||
color: inherit;
|
||
text-decoration: none;
|
||
}
|
||
|
||
* {
|
||
box-sizing: border-box;
|
||
}
|
||
|
||
chatui-chat {
|
||
height: 100;
|
||
}
|
||
|
||
.chatui {
|
||
display: flex;
|
||
flex-flow: column wrap;
|
||
justify-content: space-between;
|
||
width: 100%;
|
||
max-width: 867px;
|
||
margin: 25px 10px;
|
||
height: 600px;
|
||
border: 2px solid #ddd;
|
||
border-radius: 5px;
|
||
box-shadow: 0 15px 15px -5px rgba(0, 0, 0, 0.2);
|
||
}
|
||
|
||
s .chatui-header {
|
||
display: flex;
|
||
justify-content: space-between;
|
||
padding: 10px;
|
||
border-bottom: 2px solid #ddd;
|
||
background: #eee;
|
||
color: #666;
|
||
}
|
||
|
||
.chatui-chat {
|
||
flex: 1;
|
||
overflow-y: auto;
|
||
padding: 10px;
|
||
}
|
||
|
||
.chatui-chat::-webkit-scrollbar {
|
||
width: 6px;
|
||
}
|
||
|
||
.chatui-chat::-webkit-scrollbar-track {
|
||
background: #ddd;
|
||
}
|
||
|
||
.chatui-chat::-webkit-scrollbar-thumb {
|
||
background: #bdbdbd;
|
||
}
|
||
|
||
.msg {
|
||
display: flex;
|
||
align-items: flex-end;
|
||
margin-bottom: 10px;
|
||
}
|
||
|
||
.msg:last-of-type {
|
||
margin: 0;
|
||
}
|
||
|
||
.msg-bubble {
|
||
max-width: 450px;
|
||
padding: 15px;
|
||
border-radius: 15px;
|
||
background: #ececec;
|
||
}
|
||
|
||
.left-msg .msg-bubble {
|
||
border-bottom-left-radius: 0;
|
||
}
|
||
|
||
.error-msg .msg-bubble {
|
||
border-bottom-left-radius: 0;
|
||
color: #f15959;
|
||
}
|
||
|
||
.init-msg .msg-bubble {
|
||
border-bottom-left-radius: 0;
|
||
}
|
||
|
||
.right-msg {
|
||
flex-direction: row-reverse;
|
||
}
|
||
|
||
.right-msg .msg-bubble {
|
||
background: #579ffb;
|
||
color: #fff;
|
||
border-bottom-right-radius: 0;
|
||
}
|
||
|
||
.chatui-inputarea {
|
||
display: flex;
|
||
padding: 10px;
|
||
border-top: 2px solid #ddd;
|
||
background: #eee;
|
||
}
|
||
|
||
.chatui-inputarea * {
|
||
padding: 10px;
|
||
border: none;
|
||
border-radius: 3px;
|
||
font-size: 1em;
|
||
}
|
||
|
||
.chatui-input {
|
||
flex: 1;
|
||
background: #ddd;
|
||
}
|
||
|
||
.chatui-btn {
|
||
margin-left: 10px;
|
||
background: #579ffb;
|
||
color: #fff;
|
||
font-weight: bold;
|
||
cursor: pointer;
|
||
padding: 10px;
|
||
}
|
||
|
||
.chatui-btn:hover {
|
||
background: #577bfb;
|
||
}
|
||
|
||
.chatui-chat {
|
||
background-color: #fcfcfe;
|
||
}
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/next-simple-chat/src/utils/chat_component.tsx
|
||
================================================
|
||
import { useState } from "react";
|
||
import { MLCEngine } from "@mlc-ai/web-llm";
|
||
import ChatUI from "~/utils/chat_ui";
|
||
|
||
const ChatComponent = () => {
|
||
const [messages, setMessages] = useState<{ kind: string; text: string }[]>(
|
||
[],
|
||
);
|
||
const [prompt, setPrompt] = useState("");
|
||
const [runtimeStats, setRuntimeStats] = useState("");
|
||
const [chat_ui] = useState(new ChatUI(new MLCEngine()));
|
||
const updateMessage = (kind: string, text: string, append: boolean) => {
|
||
if (kind == "init") {
|
||
text = "[System Initalize] " + text;
|
||
}
|
||
const msgCopy = [...messages];
|
||
if (msgCopy.length == 0 || append) {
|
||
setMessages([...msgCopy, { kind, text }]);
|
||
} else {
|
||
msgCopy[msgCopy.length - 1] = { kind, text };
|
||
setMessages([...msgCopy]);
|
||
}
|
||
};
|
||
return (
|
||
<div className="flex flex-col items-center">
|
||
<button
|
||
className="chatui-btn"
|
||
onClick={() => {
|
||
chat_ui.asyncInitChat(updateMessage).catch((error) => {
|
||
console.log(error);
|
||
});
|
||
}}
|
||
>
|
||
Download Model
|
||
</button>
|
||
|
||
<div className="chatui">
|
||
<div className="chatui-chat" id="chatui-chat">
|
||
{messages.map((value, index) => (
|
||
<div key={index} className={`msg ${value.kind}-msg`}>
|
||
<div className="msg-bubble">
|
||
<div className="msg-text">${value.text}</div>
|
||
</div>
|
||
</div>
|
||
))}
|
||
</div>
|
||
|
||
<div className="chatui-inputarea">
|
||
<input
|
||
id="chatui-input"
|
||
type="text"
|
||
className="chatui-input"
|
||
placeholder="Enter your message..."
|
||
onKeyDown={(event) => {
|
||
if (event.key === "Enter") {
|
||
chat_ui
|
||
.onGenerate(prompt, updateMessage, setRuntimeStats)
|
||
.catch((error) => console.log(error));
|
||
}
|
||
}}
|
||
value={prompt}
|
||
onChange={(event) => setPrompt(event.target.value)}
|
||
/>
|
||
<button
|
||
className="chatui-btn"
|
||
onClick={() => {
|
||
chat_ui
|
||
.onGenerate(prompt, updateMessage, setRuntimeStats)
|
||
.catch((error) => console.log(error));
|
||
}}
|
||
>
|
||
Send
|
||
</button>
|
||
</div>
|
||
</div>
|
||
|
||
<div className="chatui-extra-control">
|
||
<button
|
||
className="chatui-btn"
|
||
onClick={() => {
|
||
chat_ui
|
||
.onReset(() => {
|
||
setMessages([]);
|
||
})
|
||
.catch((error) => console.log(error));
|
||
}}
|
||
>
|
||
Reset Chat
|
||
</button>
|
||
<label id="chatui-info-label">{runtimeStats}</label>
|
||
</div>
|
||
</div>
|
||
);
|
||
};
|
||
|
||
export default ChatComponent;
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/next-simple-chat/src/utils/chat_ui.ts
|
||
================================================
|
||
import {
|
||
MLCEngineInterface,
|
||
ChatCompletionMessageParam,
|
||
CompletionUsage,
|
||
} from "@mlc-ai/web-llm";
|
||
|
||
export default class ChatUI {
|
||
private engine: MLCEngineInterface;
|
||
private chatLoaded = false;
|
||
private requestInProgress = false;
|
||
// We use a request chain to ensure that
|
||
// all requests send to chat are sequentialized
|
||
private chatRequestChain: Promise<void> = Promise.resolve();
|
||
private chatHistory: ChatCompletionMessageParam[] = [];
|
||
|
||
constructor(engine: MLCEngineInterface) {
|
||
this.engine = engine;
|
||
}
|
||
/**
|
||
* Push a task to the execution queue.
|
||
*
|
||
* @param task The task to be executed;
|
||
*/
|
||
private pushTask(task: () => Promise<void>) {
|
||
const lastEvent = this.chatRequestChain;
|
||
this.chatRequestChain = lastEvent.then(task);
|
||
}
|
||
// Event handlers
|
||
// all event handler pushes the tasks to a queue
|
||
// that get executed sequentially
|
||
// the tasks previous tasks, which causes them to early stop
|
||
// can be interrupted by chat.interruptGenerate
|
||
async onGenerate(
|
||
prompt: string,
|
||
messageUpdate: (kind: string, text: string, append: boolean) => void,
|
||
setRuntimeStats: (runtimeStats: string) => void,
|
||
) {
|
||
if (this.requestInProgress) {
|
||
return;
|
||
}
|
||
this.pushTask(async () => {
|
||
await this.asyncGenerate(prompt, messageUpdate, setRuntimeStats);
|
||
});
|
||
return this.chatRequestChain;
|
||
}
|
||
|
||
async onReset(clearMessages: () => void) {
|
||
if (this.requestInProgress) {
|
||
// interrupt previous generation if any
|
||
this.engine.interruptGenerate();
|
||
}
|
||
this.chatHistory = [];
|
||
// try reset after previous requests finishes
|
||
this.pushTask(async () => {
|
||
await this.engine.resetChat();
|
||
clearMessages();
|
||
});
|
||
return this.chatRequestChain;
|
||
}
|
||
|
||
async asyncInitChat(
|
||
messageUpdate: (kind: string, text: string, append: boolean) => void,
|
||
) {
|
||
if (this.chatLoaded) return;
|
||
this.requestInProgress = true;
|
||
messageUpdate("init", "", true);
|
||
const initProgressCallback = (report: { text: string }) => {
|
||
messageUpdate("init", report.text, false);
|
||
};
|
||
this.engine.setInitProgressCallback(initProgressCallback);
|
||
|
||
try {
|
||
const selectedModel = "Llama-3.1-8B-Instruct-q4f32_1-MLC";
|
||
// const selectedModel = "TinyLlama-1.1B-Chat-v0.4-q4f16_1-MLC-1k";
|
||
await this.engine.reload(selectedModel);
|
||
} catch (err: unknown) {
|
||
messageUpdate("error", "Init error, " + (err?.toString() ?? ""), true);
|
||
console.log(err);
|
||
await this.unloadChat();
|
||
this.requestInProgress = false;
|
||
return;
|
||
}
|
||
this.requestInProgress = false;
|
||
this.chatLoaded = true;
|
||
}
|
||
|
||
private async unloadChat() {
|
||
await this.engine.unload();
|
||
this.chatLoaded = false;
|
||
}
|
||
|
||
/**
|
||
* Run generate
|
||
*/
|
||
private async asyncGenerate(
|
||
prompt: string,
|
||
messageUpdate: (kind: string, text: string, append: boolean) => void,
|
||
setRuntimeStats: (runtimeStats: string) => void,
|
||
) {
|
||
await this.asyncInitChat(messageUpdate);
|
||
this.requestInProgress = true;
|
||
// const prompt = this.uiChatInput.value;
|
||
if (prompt == "") {
|
||
this.requestInProgress = false;
|
||
return;
|
||
}
|
||
|
||
messageUpdate("right", prompt, true);
|
||
// this.uiChatInput.value = "";
|
||
// this.uiChatInput.setAttribute("placeholder", "Generating...");
|
||
|
||
messageUpdate("left", "", true);
|
||
|
||
try {
|
||
this.chatHistory.push({ role: "user", content: prompt });
|
||
let curMessage = "";
|
||
let usage: CompletionUsage | undefined = undefined;
|
||
const completion = await this.engine.chat.completions.create({
|
||
stream: true,
|
||
messages: this.chatHistory,
|
||
stream_options: { include_usage: true },
|
||
});
|
||
for await (const chunk of completion) {
|
||
const curDelta = chunk.choices[0]?.delta.content;
|
||
if (curDelta) {
|
||
curMessage += curDelta;
|
||
}
|
||
messageUpdate("left", curMessage, false);
|
||
if (chunk.usage) {
|
||
usage = chunk.usage;
|
||
}
|
||
}
|
||
const output = await this.engine.getMessage();
|
||
this.chatHistory.push({ role: "assistant", content: output });
|
||
messageUpdate("left", output, false);
|
||
if (usage) {
|
||
const runtimeStats =
|
||
`prompt_tokens: ${usage.prompt_tokens}, ` +
|
||
`completion_tokens: ${usage.completion_tokens}, ` +
|
||
`prefill: ${usage.extra.prefill_tokens_per_s.toFixed(4)} tokens/sec, ` +
|
||
`decoding: ${usage.extra.decode_tokens_per_s.toFixed(4)} tokens/sec`;
|
||
setRuntimeStats(runtimeStats);
|
||
}
|
||
} catch (err: unknown) {
|
||
messageUpdate(
|
||
"error",
|
||
"Generate error, " + (err?.toString() ?? ""),
|
||
true,
|
||
);
|
||
console.log(err);
|
||
await this.unloadChat();
|
||
}
|
||
this.requestInProgress = false;
|
||
}
|
||
}
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/qwen3/README.md
|
||
================================================
|
||
### OpenAI API Demos w/ Qwen3
|
||
|
||
Run `npm install` first, followed by `npm start`.
|
||
|
||
Note if you would like to hack WebLLM core package,
|
||
you can change web-llm dependencies as `"file:../.."`, and follow the build from source
|
||
instruction in the project to build webllm locally. This option is only recommended
|
||
if you would like to hack WebLLM core package.
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/qwen3/package.json
|
||
================================================
|
||
{
|
||
"name": "qwen3_example",
|
||
"version": "0.1.0",
|
||
"private": true,
|
||
"scripts": {
|
||
"start": "parcel src/qwen3_example.html --port 8883",
|
||
"build": "parcel build src/qwen3_example.html --dist-dir lib"
|
||
},
|
||
"devDependencies": {
|
||
"buffer": "^5.7.1",
|
||
"parcel": "^2.8.3",
|
||
"process": "^0.11.10",
|
||
"tslib": "^2.3.1",
|
||
"typescript": "^4.9.5",
|
||
"url": "^0.11.3"
|
||
},
|
||
"dependencies": {
|
||
"@mlc-ai/web-llm": "^0.2.80"
|
||
}
|
||
}
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/qwen3/src/qwen3_example.html
|
||
================================================
|
||
<!doctype html>
|
||
<html>
|
||
<script>
|
||
webLLMGlobal = {};
|
||
</script>
|
||
|
||
<body>
|
||
<h2>WebLLM Test Page</h2>
|
||
Open console to see output
|
||
<br />
|
||
<br />
|
||
<label id="init-label"> </label>
|
||
<h3>Response</h3>
|
||
<label id="generate-label"> </label>
|
||
<script type="module" src="./qwen3_example.ts"></script>
|
||
</body>
|
||
</html>
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/qwen3/src/qwen3_example.ts
|
||
================================================
|
||
import * as webllm from "@mlc-ai/web-llm";
|
||
|
||
function setLabel(id: string, text: string) {
|
||
const label = document.getElementById(id);
|
||
if (label == null) {
|
||
throw Error("Cannot find label " + id);
|
||
}
|
||
label.innerText = text;
|
||
}
|
||
|
||
// Helper method to stream responses from the engine
|
||
async function streamResponse(
|
||
engine: webllm.MLCEngineInterface,
|
||
request: webllm.ChatCompletionRequestStreaming,
|
||
): Promise<void> {
|
||
console.log("Requesting chat completion with request:", request);
|
||
const asyncChunkGenerator = await engine.chat.completions.create(request);
|
||
let message = "";
|
||
for await (const chunk of asyncChunkGenerator) {
|
||
message += chunk.choices[0]?.delta?.content || "";
|
||
setLabel("generate-label", message);
|
||
if (chunk.usage) {
|
||
console.log(chunk.usage); // only last chunk has usage
|
||
}
|
||
// engine.interruptGenerate(); // works with interrupt as well
|
||
}
|
||
console.log("Final message:\n", await engine.getMessage()); // the concatenated message
|
||
}
|
||
|
||
/**
|
||
* We demonstrate how Qwen3's best practices can be followed in WebLLM. For more, see
|
||
* https://huggingface.co/Qwen/Qwen3-8B#best-practices.
|
||
*/
|
||
async function main() {
|
||
const initProgressCallback = (report: webllm.InitProgressReport) => {
|
||
setLabel("init-label", report.text);
|
||
};
|
||
const selectedModel = "Qwen3-4B-q4f16_1-MLC";
|
||
const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine(
|
||
selectedModel,
|
||
{ initProgressCallback: initProgressCallback },
|
||
);
|
||
|
||
/**
|
||
* 1. Default behavior: enable thinking
|
||
*/
|
||
let request: webllm.ChatCompletionRequest = {
|
||
stream: true,
|
||
stream_options: { include_usage: true },
|
||
messages: [
|
||
{
|
||
role: "user",
|
||
content: "How many r's are there in the word strawberry?",
|
||
},
|
||
],
|
||
// Specifying `enable_thinking` is optional, as it defaults to think.
|
||
// extra_body: {
|
||
// enable_thinking: true,
|
||
// }
|
||
};
|
||
await streamResponse(engine, request);
|
||
|
||
/**
|
||
* 2. Disable thinking with `enable_thinking: false`.
|
||
*/
|
||
request = {
|
||
stream: true,
|
||
stream_options: { include_usage: true },
|
||
messages: [
|
||
{
|
||
role: "user",
|
||
content: "How many r's are there in the word strawberry?",
|
||
},
|
||
],
|
||
extra_body: {
|
||
enable_thinking: false,
|
||
},
|
||
};
|
||
await streamResponse(engine, request);
|
||
|
||
/**
|
||
* 3. Disable thinking with soft switch /no_think
|
||
* or enable thinking with soft switch /think.
|
||
* Using soft switch: "When enable_thinking=True, regardless of whether the user
|
||
* uses /think or /no_think, the model will always output a block wrapped in
|
||
* <think>...</think>. However, the content inside this block may be empty if
|
||
* thinking is disabled. When enable_thinking=False, the soft switches are not
|
||
* valid. Regardless of any /think or /no_think tags input by the user, the
|
||
* model will not generate think content and will not include a <think>...</think> block.
|
||
*/
|
||
request = {
|
||
stream: true,
|
||
stream_options: { include_usage: true },
|
||
messages: [
|
||
{
|
||
role: "user",
|
||
content: "How many r's are there in the word strawberry? /no_think",
|
||
// content: "How many r's are there in the word strawberry? /think",
|
||
},
|
||
],
|
||
};
|
||
await streamResponse(engine, request);
|
||
|
||
/**
|
||
* 4. For multi-turn messages, it is recommended to
|
||
* parse out the thinking content in the history
|
||
* messages as described in the Best Practices section.
|
||
*/
|
||
const history: webllm.ChatCompletionMessageParam[] = [
|
||
{
|
||
role: "user",
|
||
content: "How many r's are there in the word strawberry? /think",
|
||
},
|
||
{
|
||
role: "assistant",
|
||
content:
|
||
"<think>Dummy thinking content here...</think>\n\nThe answer is 3.",
|
||
},
|
||
];
|
||
// Preprocess history to remove thinking content
|
||
const preprocessedHistory = history.map((msg) => {
|
||
if (msg.role === "assistant") {
|
||
// Remove <think>...</think> block from assistant messages that is at the start
|
||
// and may contain two \n\n line breaks.
|
||
const thinkRegex = /<think>.*?<\/think>\n?\n?/s; // Match <think>...</think> with optional \n\n
|
||
const contentWithoutThink = msg.content!.replace(thinkRegex, "").trim();
|
||
return { ...msg, content: contentWithoutThink };
|
||
}
|
||
return msg; // User messages remain unchanged
|
||
});
|
||
console.log("Preprocessed history:", preprocessedHistory);
|
||
|
||
// Now use the preprocessed history in the request
|
||
const newMessage: webllm.ChatCompletionMessageParam = {
|
||
role: "user",
|
||
content: "What about blueberries?",
|
||
};
|
||
|
||
request = {
|
||
stream: true,
|
||
stream_options: { include_usage: true },
|
||
messages: [...preprocessedHistory, newMessage],
|
||
};
|
||
await streamResponse(engine, request);
|
||
}
|
||
|
||
main();
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/seed-to-reproduce/README.md
|
||
================================================
|
||
### OpenAI API Demos
|
||
|
||
Run `npm install` first, followed by `npm start`.
|
||
|
||
Note if you would like to hack WebLLM core package,
|
||
you can change web-llm dependencies as `"file:../.."`, and follow the build from source
|
||
instruction in the project to build webllm locally. This option is only recommended
|
||
if you would like to hack WebLLM core package.
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/seed-to-reproduce/package.json
|
||
================================================
|
||
{
|
||
"name": "seed-to-reproduce",
|
||
"version": "0.1.0",
|
||
"private": true,
|
||
"scripts": {
|
||
"start": "parcel src/seed.html --port 8888",
|
||
"build": "parcel build src/seed.html --dist-dir lib"
|
||
},
|
||
"devDependencies": {
|
||
"buffer": "^5.7.1",
|
||
"parcel": "^2.8.3",
|
||
"process": "^0.11.10",
|
||
"tslib": "^2.3.1",
|
||
"typescript": "^4.9.5",
|
||
"url": "^0.11.3"
|
||
},
|
||
"dependencies": {
|
||
"@mlc-ai/web-llm": "^0.2.80"
|
||
}
|
||
}
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/seed-to-reproduce/src/seed.html
|
||
================================================
|
||
<!doctype html>
|
||
<html>
|
||
<script>
|
||
webLLMGlobal = {};
|
||
</script>
|
||
|
||
<body>
|
||
<h2>WebLLM Test Page</h2>
|
||
Open console to see output. We make two generations with same seed, we
|
||
should expect them to be the same.
|
||
<br />
|
||
<br />
|
||
<label id="init-label"> </label>
|
||
|
||
<script type="module" src="./seed.ts"></script>
|
||
</body>
|
||
</html>
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/seed-to-reproduce/src/seed.ts
|
||
================================================
|
||
import * as webllm from "@mlc-ai/web-llm";
|
||
|
||
function setLabel(id: string, text: string) {
|
||
const label = document.getElementById(id);
|
||
if (label == null) {
|
||
throw Error("Cannot find label " + id);
|
||
}
|
||
label.innerText = text;
|
||
}
|
||
|
||
/**
|
||
* We domnstrate the effect of seeding. The prompt is about writing a poem and we use a high
|
||
* `temperature`, making the sampling distribution supposedly more random. However, we demonstrate
|
||
* that with seeding, we should see the exact same result being generated across two trials.
|
||
* With `n > 1`, all choices should also be exactly the same.
|
||
*/
|
||
async function main() {
|
||
const initProgressCallback = (report: webllm.InitProgressReport) => {
|
||
setLabel("init-label", report.text);
|
||
};
|
||
const selectedModel = "Llama-3.1-8B-Instruct-q4f32_1-MLC";
|
||
const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine(
|
||
selectedModel,
|
||
{ initProgressCallback: initProgressCallback },
|
||
);
|
||
|
||
const request: webllm.ChatCompletionRequest = {
|
||
stream: false, // works with streaming as well
|
||
messages: [
|
||
{ role: "user", content: "Write a creative Haiku about Pittsburgh" },
|
||
],
|
||
n: 3,
|
||
temperature: 1.2, // high temperature gives much more random results
|
||
max_tokens: 128, // To save time; enough to demonstrate the effect
|
||
seed: 42,
|
||
};
|
||
|
||
const reply0 = await engine.chat.completions.create(request);
|
||
console.log(reply0);
|
||
console.log("First reply's last choice:\n" + (await engine.getMessage()));
|
||
console.log(reply0.usage);
|
||
|
||
const reply1 = await engine.chat.completions.create(request);
|
||
console.log(reply1);
|
||
console.log("Second reply's last choice:\n" + (await engine.getMessage()));
|
||
|
||
// Rigorously check the generation results of each choice for the two requests
|
||
for (const choice0 of reply0.choices) {
|
||
const id = choice0.index;
|
||
const choice1 = reply1.choices[id];
|
||
if (choice0.message.content !== choice1.message.content) {
|
||
throw Error(
|
||
"Chocie " +
|
||
id +
|
||
" of the two generations are different despite seeding",
|
||
);
|
||
}
|
||
}
|
||
|
||
console.log(reply1.usage);
|
||
}
|
||
|
||
// Run one of the functions
|
||
main();
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/service-worker/README.md
|
||
================================================
|
||
# WebLLM Service Worker Example
|
||
|
||
This example shows how we can create a page with Web-LLM running in service worker.
|
||
|
||
```bash
|
||
npm install
|
||
npm run build
|
||
```
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/service-worker/package.json
|
||
================================================
|
||
{
|
||
"name": "web-llm-service-worker",
|
||
"version": "0.1.0",
|
||
"private": true,
|
||
"scripts": {
|
||
"start": "rm -rf .parcel-cache && parcel src/index.html --port 3000",
|
||
"build": "rm -rf .parcel-cache && parcel build src/index.html --dist-dir lib"
|
||
},
|
||
"devDependencies": {
|
||
"buffer": "^6.0.3",
|
||
"parcel": "^2.8.3",
|
||
"process": "^0.11.10",
|
||
"tslib": "^2.3.1",
|
||
"typescript": "^4.9.5",
|
||
"url": "^0.11.3"
|
||
},
|
||
"dependencies": {
|
||
"@mlc-ai/web-llm": "^0.2.80"
|
||
}
|
||
}
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/service-worker/src/index.html
|
||
================================================
|
||
<!doctype html>
|
||
<html>
|
||
<script>
|
||
webLLMGlobal = {};
|
||
</script>
|
||
<body>
|
||
<h2>WebLLM Test Page</h2>
|
||
Open console to see output
|
||
<br />
|
||
<br />
|
||
<label id="init-label"> </label>
|
||
|
||
<h3>Prompt</h3>
|
||
<label id="prompt-label"> </label>
|
||
|
||
<h3>Response</h3>
|
||
<label id="generate-label"> </label>
|
||
<br />
|
||
<label id="stats-label"> </label>
|
||
|
||
<script type="module" src="./main.ts"></script>
|
||
</body>
|
||
</html>
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/service-worker/src/main.ts
|
||
================================================
|
||
import * as webllm from "@mlc-ai/web-llm";
|
||
|
||
const registerServiceWorker = async () => {
|
||
if ("serviceWorker" in navigator) {
|
||
try {
|
||
const registration = await navigator.serviceWorker.register(
|
||
new URL("sw.ts", import.meta.url),
|
||
{ type: "module" },
|
||
);
|
||
if (registration.installing) {
|
||
console.log("Service worker installing");
|
||
} else if (registration.waiting) {
|
||
console.log("Service worker installed");
|
||
} else if (registration.active) {
|
||
console.log("Service worker active");
|
||
}
|
||
} catch (error) {
|
||
console.error(`Registration failed with ${error}`);
|
||
}
|
||
}
|
||
};
|
||
|
||
function setLabel(id: string, text: string) {
|
||
const label = document.getElementById(id);
|
||
if (label == null) {
|
||
throw Error("Cannot find label " + id);
|
||
}
|
||
label.innerText = text;
|
||
}
|
||
|
||
// There are two demonstrations, pick one to run
|
||
|
||
/**
|
||
* Chat completion (OpenAI style) without streaming, where we get the entire response at once.
|
||
*/
|
||
async function mainNonStreaming() {
|
||
const initProgressCallback = (report: webllm.InitProgressReport) => {
|
||
setLabel("init-label", report.text);
|
||
};
|
||
const selectedModel = "Llama-3.1-8B-Instruct-q4f32_1-MLC";
|
||
|
||
const engine: webllm.MLCEngineInterface =
|
||
await webllm.CreateServiceWorkerMLCEngine(selectedModel, {
|
||
initProgressCallback: initProgressCallback,
|
||
});
|
||
|
||
const request: webllm.ChatCompletionRequest = {
|
||
messages: [
|
||
{
|
||
role: "system",
|
||
content:
|
||
"You are a helpful, respectful and honest assistant. " +
|
||
"Be as happy as you can when speaking please. ",
|
||
},
|
||
{ role: "user", content: "Provide me three US states." },
|
||
{ role: "assistant", content: "California, New York, Pennsylvania." },
|
||
{ role: "user", content: "Two more please!" },
|
||
],
|
||
n: 3,
|
||
temperature: 1.5,
|
||
max_tokens: 256,
|
||
};
|
||
|
||
const reply0 = await engine.chat.completions.create(request);
|
||
console.log(reply0);
|
||
setLabel("generate-label", reply0.choices[0].message.content || "");
|
||
|
||
console.log(reply0.usage);
|
||
}
|
||
|
||
/**
|
||
* Chat completion (OpenAI style) with streaming, where delta is sent while generating response.
|
||
*/
|
||
async function mainStreaming() {
|
||
const initProgressCallback = (report: webllm.InitProgressReport) => {
|
||
setLabel("init-label", report.text);
|
||
};
|
||
const selectedModel = "Llama-3.1-8B-Instruct-q4f32_1-MLC";
|
||
|
||
const engine: webllm.ServiceWorkerMLCEngine =
|
||
await webllm.CreateServiceWorkerMLCEngine(selectedModel, {
|
||
initProgressCallback: initProgressCallback,
|
||
});
|
||
|
||
const request: webllm.ChatCompletionRequest = {
|
||
stream: true,
|
||
stream_options: { include_usage: true },
|
||
messages: [
|
||
{
|
||
role: "system",
|
||
content:
|
||
"You are a helpful, respectful and honest assistant. " +
|
||
"Be as happy as you can when speaking please. ",
|
||
},
|
||
{ role: "user", content: "Provide me three US states." },
|
||
{ role: "assistant", content: "California, New York, Pennsylvania." },
|
||
{ role: "user", content: "Two more please!" },
|
||
],
|
||
temperature: 1.5,
|
||
max_tokens: 256,
|
||
};
|
||
|
||
const asyncChunkGenerator = await engine.chat.completions.create(request);
|
||
let message = "";
|
||
for await (const chunk of asyncChunkGenerator) {
|
||
console.log(chunk);
|
||
message += chunk.choices[0]?.delta?.content || "";
|
||
setLabel("generate-label", message);
|
||
if (chunk.usage) {
|
||
console.log(chunk.usage); // only last chunk has usage
|
||
}
|
||
// engine.interruptGenerate(); // works with interrupt as well
|
||
}
|
||
console.log("Final message:\n", await engine.getMessage()); // the concatenated message
|
||
}
|
||
|
||
registerServiceWorker();
|
||
// Run one of the function below
|
||
// mainNonStreaming();
|
||
mainStreaming();
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/service-worker/src/sw.ts
|
||
================================================
|
||
import { ServiceWorkerMLCEngineHandler } from "@mlc-ai/web-llm";
|
||
|
||
let handler: ServiceWorkerMLCEngineHandler;
|
||
|
||
self.addEventListener("activate", function (event) {
|
||
handler = new ServiceWorkerMLCEngineHandler();
|
||
console.log("Web-LLM Service Worker Activated");
|
||
});
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/simple-chat-js/index.css
|
||
================================================
|
||
body,
|
||
html {
|
||
font-family: Arial, sans-serif;
|
||
padding: 10px 20px;
|
||
}
|
||
|
||
.download-container {
|
||
display: flex;
|
||
justify-content: space-between;
|
||
margin-bottom: 20px;
|
||
}
|
||
|
||
#download-status {
|
||
border: solid 1px black;
|
||
box-shadow:
|
||
0 10px 15px -3px rgba(0, 0, 0, 0.1),
|
||
0 4px 6px -2px rgba(0, 0, 0, 0.05);
|
||
padding: 10px;
|
||
}
|
||
|
||
.chat-container {
|
||
height: 400px;
|
||
width: 100%;
|
||
border: 2px solid black;
|
||
display: flex;
|
||
flex-direction: column;
|
||
}
|
||
|
||
.chat-box {
|
||
overflow-y: scroll;
|
||
background-color: #c3c3c3;
|
||
border: 1px solid #ccc;
|
||
padding: 5px;
|
||
flex: 1 1;
|
||
}
|
||
|
||
.chat-stats {
|
||
background-color: #d3eceb;
|
||
flex: 0 0;
|
||
padding: 10px;
|
||
font-size: 0.75rem;
|
||
}
|
||
|
||
.message-container {
|
||
width: 100%;
|
||
display: flex;
|
||
}
|
||
|
||
.message {
|
||
padding: 10px;
|
||
margin: 10px 0;
|
||
border-radius: 10px;
|
||
width: fit-content;
|
||
}
|
||
|
||
.message-container.user {
|
||
justify-content: end;
|
||
}
|
||
|
||
.message-container.assistant {
|
||
justify-content: start;
|
||
}
|
||
|
||
.message-container.user .message {
|
||
background: #007bff;
|
||
color: #fff;
|
||
}
|
||
|
||
.message-container.assistant .message {
|
||
background: #f1f0f0;
|
||
color: #333;
|
||
}
|
||
|
||
.chat-input-container {
|
||
min-height: 40px;
|
||
flex: 0 0;
|
||
display: flex;
|
||
}
|
||
|
||
#user-input {
|
||
width: 70%;
|
||
padding: 10px;
|
||
border: 1px solid #ccc;
|
||
}
|
||
|
||
button {
|
||
width: 25%;
|
||
padding: 10px;
|
||
border: none;
|
||
background-color: #007bff;
|
||
color: white;
|
||
cursor: pointer;
|
||
}
|
||
|
||
button:disabled {
|
||
background-color: lightgray;
|
||
cursor: not-allowed;
|
||
}
|
||
|
||
button:hover:not(:disabled) {
|
||
background-color: #0056b3;
|
||
}
|
||
|
||
.hidden {
|
||
display: none;
|
||
}
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/simple-chat-js/index.html
|
||
================================================
|
||
<!doctype html>
|
||
<html>
|
||
<head>
|
||
<title>Simple Chatbot</title>
|
||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||
<meta charset="UTF-8" />
|
||
<link rel="stylesheet" href="./index.css" />
|
||
</head>
|
||
|
||
<body>
|
||
<p>Step 1: Initialize WebLLM and Download Model</p>
|
||
<div class="download-container">
|
||
<select id="model-selection"></select>
|
||
<button id="download">Download</button>
|
||
</div>
|
||
<p id="download-status" class="hidden"></p>
|
||
|
||
<p>Step 2: Chat</p>
|
||
<div class="chat-container">
|
||
<div id="chat-box" class="chat-box"></div>
|
||
<div id="chat-stats" class="chat-stats hidden"></div>
|
||
<div class="chat-input-container">
|
||
<input type="text" id="user-input" placeholder="Type a message..." />
|
||
<button id="send" disabled>Send</button>
|
||
</div>
|
||
</div>
|
||
|
||
<script src="./index.js" type="module"></script>
|
||
</body>
|
||
</html>
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/simple-chat-js/index.js
|
||
================================================
|
||
import * as webllm from "https://esm.run/@mlc-ai/web-llm";
|
||
|
||
/*************** WebLLM logic ***************/
|
||
const messages = [
|
||
{
|
||
content: "You are a helpful AI agent helping users.",
|
||
role: "system",
|
||
},
|
||
];
|
||
|
||
const availableModels = webllm.prebuiltAppConfig.model_list.map(
|
||
(m) => m.model_id,
|
||
);
|
||
let selectedModel = "Llama-3.1-8B-Instruct-q4f32_1-1k";
|
||
|
||
// Callback function for initializing progress
|
||
function updateEngineInitProgressCallback(report) {
|
||
console.log("initialize", report.progress);
|
||
document.getElementById("download-status").textContent = report.text;
|
||
}
|
||
|
||
// Create engine instance
|
||
const engine = new webllm.MLCEngine();
|
||
engine.setInitProgressCallback(updateEngineInitProgressCallback);
|
||
|
||
async function initializeWebLLMEngine() {
|
||
document.getElementById("download-status").classList.remove("hidden");
|
||
selectedModel = document.getElementById("model-selection").value;
|
||
const config = {
|
||
temperature: 1.0,
|
||
top_p: 1,
|
||
};
|
||
await engine.reload(selectedModel, config);
|
||
}
|
||
|
||
async function streamingGenerating(messages, onUpdate, onFinish, onError) {
|
||
try {
|
||
let curMessage = "";
|
||
let usage;
|
||
const completion = await engine.chat.completions.create({
|
||
stream: true,
|
||
messages,
|
||
stream_options: { include_usage: true },
|
||
});
|
||
for await (const chunk of completion) {
|
||
const curDelta = chunk.choices[0]?.delta.content;
|
||
if (curDelta) {
|
||
curMessage += curDelta;
|
||
}
|
||
if (chunk.usage) {
|
||
usage = chunk.usage;
|
||
}
|
||
onUpdate(curMessage);
|
||
}
|
||
const finalMessage = await engine.getMessage();
|
||
onFinish(finalMessage, usage);
|
||
} catch (err) {
|
||
onError(err);
|
||
}
|
||
}
|
||
|
||
/*************** UI logic ***************/
|
||
function onMessageSend() {
|
||
const input = document.getElementById("user-input").value.trim();
|
||
const message = {
|
||
content: input,
|
||
role: "user",
|
||
};
|
||
if (input.length === 0) {
|
||
return;
|
||
}
|
||
document.getElementById("send").disabled = true;
|
||
|
||
messages.push(message);
|
||
appendMessage(message);
|
||
|
||
document.getElementById("user-input").value = "";
|
||
document
|
||
.getElementById("user-input")
|
||
.setAttribute("placeholder", "Generating...");
|
||
|
||
const aiMessage = {
|
||
content: "typing...",
|
||
role: "assistant",
|
||
};
|
||
appendMessage(aiMessage);
|
||
|
||
const onFinishGenerating = (finalMessage, usage) => {
|
||
updateLastMessage(finalMessage);
|
||
document.getElementById("send").disabled = false;
|
||
const usageText =
|
||
`prompt_tokens: ${usage.prompt_tokens}, ` +
|
||
`completion_tokens: ${usage.completion_tokens}, ` +
|
||
`prefill: ${usage.extra.prefill_tokens_per_s.toFixed(4)} tokens/sec, ` +
|
||
`decoding: ${usage.extra.decode_tokens_per_s.toFixed(4)} tokens/sec`;
|
||
document.getElementById("chat-stats").classList.remove("hidden");
|
||
document.getElementById("chat-stats").textContent = usageText;
|
||
};
|
||
|
||
streamingGenerating(
|
||
messages,
|
||
updateLastMessage,
|
||
onFinishGenerating,
|
||
console.error,
|
||
);
|
||
}
|
||
|
||
function appendMessage(message) {
|
||
const chatBox = document.getElementById("chat-box");
|
||
const container = document.createElement("div");
|
||
container.classList.add("message-container");
|
||
const newMessage = document.createElement("div");
|
||
newMessage.classList.add("message");
|
||
newMessage.textContent = message.content;
|
||
|
||
if (message.role === "user") {
|
||
container.classList.add("user");
|
||
} else {
|
||
container.classList.add("assistant");
|
||
}
|
||
|
||
container.appendChild(newMessage);
|
||
chatBox.appendChild(container);
|
||
chatBox.scrollTop = chatBox.scrollHeight; // Scroll to the latest message
|
||
}
|
||
|
||
function updateLastMessage(content) {
|
||
const messageDoms = document
|
||
.getElementById("chat-box")
|
||
.querySelectorAll(".message");
|
||
const lastMessageDom = messageDoms[messageDoms.length - 1];
|
||
lastMessageDom.textContent = content;
|
||
}
|
||
|
||
/*************** UI binding ***************/
|
||
availableModels.forEach((modelId) => {
|
||
const option = document.createElement("option");
|
||
option.value = modelId;
|
||
option.textContent = modelId;
|
||
document.getElementById("model-selection").appendChild(option);
|
||
});
|
||
document.getElementById("model-selection").value = selectedModel;
|
||
document.getElementById("download").addEventListener("click", function () {
|
||
initializeWebLLMEngine().then(() => {
|
||
document.getElementById("send").disabled = false;
|
||
});
|
||
});
|
||
document.getElementById("send").addEventListener("click", function () {
|
||
onMessageSend();
|
||
});
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/simple-chat-ts/README.md
|
||
================================================
|
||
# SimpleChat
|
||
|
||
This folder provides a complete implementation of a simple
|
||
chat app based on WebLLM. To try it out, you can do the following steps
|
||
under this folder
|
||
|
||
```bash
|
||
npm install
|
||
npm start
|
||
```
|
||
|
||
Note if you would like to hack WebLLM core package.
|
||
You can change web-llm dependencies as `"file:../.."`, and follow the build from source
|
||
instruction in the project to build webllm locally. This option is only recommended
|
||
if you would like to hack WebLLM core package.
|
||
|
||
Due to the differences in command-line tools between Unix/Linux and Windows systems, special adaptation is necessary for Windows. Unix/Linux systems natively support commands like `cp` for file operations, which are not directly available in Windows. To ensure cross-platform compatibility, we use a Node.js script for file copying in Windows.
|
||
|
||
### Steps for Windows Users
|
||
|
||
1. **Create a Node.js Script File**:
|
||
- In the `examples\simple-chat` directory, create a file named `copy-config.js`.
|
||
- Add the following code to handle file copying:
|
||
```javascript
|
||
const fs = require("fs");
|
||
// Copy file
|
||
fs.copyFileSync("src/gh-config.js", "src/app-config.js");
|
||
```
|
||
|
||
2. **Modify `package.json`**:
|
||
- In the `scripts` section of your `package.json`, replace Unix-style `cp` commands with our new Node.js script. For example:
|
||
```json
|
||
"scripts": {
|
||
"start": "node copy-config.js && parcel src/llm_chat.html --port 8888",
|
||
"mlc-local": "node copy-config.js && parcel src/llm_chat.html --port 8888",
|
||
"build": "node copy-config.js && parcel build src/llm_chat.html --dist-dir lib --no-content-hash"
|
||
},
|
||
```
|
||
|
||
3. **Run the Application**:
|
||
- Save your changes and run `npm start` in CMD or PowerShell to start the application.
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/simple-chat-ts/package.json
|
||
================================================
|
||
{
|
||
"name": "simple-chat",
|
||
"version": "0.1.0",
|
||
"private": true,
|
||
"scripts": {
|
||
"start": "cp src/gh-config.js src/app-config.js && parcel src/llm_chat.html --port 8883",
|
||
"build": "cp src/gh-config.js src/app-config.js && parcel build src/llm_chat.html --dist-dir lib --no-content-hash"
|
||
},
|
||
"devDependencies": {
|
||
"buffer": "^5.7.1",
|
||
"parcel": "^2.8.3",
|
||
"process": "^0.11.10",
|
||
"tslib": "^2.3.1",
|
||
"typescript": "^4.9.5",
|
||
"url": "^0.11.3"
|
||
},
|
||
"dependencies": {
|
||
"@mlc-ai/web-llm": "^0.2.80"
|
||
}
|
||
}
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/simple-chat-ts/src/gh-config.js
|
||
================================================
|
||
import { prebuiltAppConfig } from "@mlc-ai/web-llm";
|
||
|
||
export default {
|
||
model_list: prebuiltAppConfig.model_list,
|
||
use_web_worker: true,
|
||
};
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/simple-chat-ts/src/llm_chat.css
|
||
================================================
|
||
.chatui {
|
||
display: flex;
|
||
position: relative;
|
||
flex-flow: column wrap;
|
||
justify-content: space-between;
|
||
width: 100%;
|
||
max-width: 867px;
|
||
margin: 25px 10px;
|
||
height: 600px;
|
||
border: 2px solid #ddd;
|
||
border-radius: 5px;
|
||
background-color: #1f2027;
|
||
}
|
||
|
||
.chatui-select-wrapper {
|
||
display: flex;
|
||
justify-content: center;
|
||
background-color: #1f2027;
|
||
padding: 10px 0;
|
||
}
|
||
|
||
#chatui-select {
|
||
width: 350px;
|
||
background-color: #1f2027;
|
||
color: white;
|
||
border: none;
|
||
}
|
||
|
||
#chatui-select:focus {
|
||
outline: none;
|
||
}
|
||
|
||
#chatui-select::-webkit-scrollbar {
|
||
display: none;
|
||
}
|
||
|
||
#chatui-select option {
|
||
background-color: #1f2027;
|
||
color: white;
|
||
}
|
||
|
||
#chatui-select option:hover {
|
||
background-color: #474747;
|
||
color: white;
|
||
}
|
||
|
||
s .chatui-header {
|
||
display: flex;
|
||
justify-content: space-between;
|
||
padding: 10px;
|
||
border-bottom: 2px solid #ddd;
|
||
background: #eee;
|
||
color: #666;
|
||
}
|
||
|
||
/* Used to remove tiny white lines in android devices; not sure if there is a better way */
|
||
*,
|
||
*::before,
|
||
*::after {
|
||
box-sizing: content-box;
|
||
}
|
||
|
||
.chatui-chat {
|
||
flex: 1;
|
||
overflow-y: auto;
|
||
padding: 10px;
|
||
background-color: #1f2027;
|
||
}
|
||
|
||
.chatui-chat::-webkit-scrollbar {
|
||
width: 6px;
|
||
}
|
||
|
||
.chatui-chat::-webkit-scrollbar-track {
|
||
background: #1f2027;
|
||
}
|
||
|
||
.chatui-chat::-webkit-scrollbar-thumb {
|
||
background: #888;
|
||
}
|
||
|
||
.chatui-chat::-webkit-scrollbar-thumb:hover {
|
||
background: #555;
|
||
}
|
||
|
||
.msg {
|
||
display: flex;
|
||
align-items: flex-end;
|
||
margin-bottom: 10px;
|
||
}
|
||
|
||
.msg:last-of-type {
|
||
margin: 0;
|
||
}
|
||
|
||
.msg-bubble {
|
||
background-color: #f0f0f0;
|
||
border-radius: 8px;
|
||
padding: 16px;
|
||
margin: 5px auto;
|
||
width: calc(100% - 20px);
|
||
box-sizing: border-box;
|
||
color: black;
|
||
border: none;
|
||
font-size: medium;
|
||
margin-left: auto;
|
||
margin-right: auto;
|
||
}
|
||
|
||
.left-msg .msg-bubble {
|
||
background-color: #343541;
|
||
color: #ececec;
|
||
}
|
||
|
||
.error-msg .msg-bubble {
|
||
background-color: #343541;
|
||
color: #f15959;
|
||
}
|
||
|
||
.init-msg .msg-bubble {
|
||
background-color: #343541;
|
||
color: #ececec;
|
||
}
|
||
|
||
.right-msg .msg-bubble {
|
||
background-color: #444654;
|
||
color: #ececec;
|
||
}
|
||
|
||
.chatui-inputarea {
|
||
display: flex;
|
||
padding: 10px;
|
||
border-top: 2px solid transparent;
|
||
background-color: #1f2027;
|
||
}
|
||
|
||
.chatui-inputarea * {
|
||
padding: 10px;
|
||
border: none;
|
||
border-radius: 3px;
|
||
font-size: 1em;
|
||
color: white;
|
||
background: rgba(0, 0, 0, 0.3);
|
||
}
|
||
|
||
.chatui-input {
|
||
flex: 1;
|
||
background-color: #40414f;
|
||
color: white;
|
||
}
|
||
|
||
.chatui-reset-btn {
|
||
margin-left: 10px;
|
||
background-color: #40414f;
|
||
color: #fff;
|
||
font-weight: bold;
|
||
cursor: pointer;
|
||
background-image: url("img/reset.png");
|
||
background-repeat: no-repeat;
|
||
background-position: center;
|
||
width: 40px;
|
||
background-repeat: no-repeat;
|
||
background-position: center;
|
||
background-size: 20px 20px;
|
||
}
|
||
|
||
.chatui-reset-btn:hover {
|
||
background-color: #03a33e;
|
||
}
|
||
|
||
.chatui-send-btn {
|
||
margin-left: 10px;
|
||
background-color: #40414f;
|
||
color: #fff;
|
||
font-weight: bold;
|
||
cursor: pointer;
|
||
background-image: url("img/plane.png");
|
||
background-repeat: no-repeat;
|
||
background-position: center;
|
||
width: 40px;
|
||
background-repeat: no-repeat;
|
||
background-position: center;
|
||
background-size: 20px 20px;
|
||
}
|
||
|
||
.chatui-send-btn:hover {
|
||
background-color: #03a33e;
|
||
}
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/simple-chat-ts/src/llm_chat.html
|
||
================================================
|
||
<link href="./llm_chat.css" rel="stylesheet" type="text/css" />
|
||
|
||
<div class="chatui">
|
||
<div class="chatui-select-wrapper">
|
||
<select id="chatui-select"></select>
|
||
</div>
|
||
<div class="chatui-chat" id="chatui-chat" height="100"></div>
|
||
|
||
<div class="chatui-inputarea">
|
||
<input
|
||
id="chatui-input"
|
||
type="text"
|
||
class="chatui-input"
|
||
placeholder="Enter your message..."
|
||
/>
|
||
<button id="chatui-send-btn" class="chatui-send-btn"></button>
|
||
<button id="chatui-reset-btn" class="chatui-reset-btn"></button>
|
||
</div>
|
||
</div>
|
||
|
||
<div class="chatui-extra-control">
|
||
<label id="chatui-info-label"></label>
|
||
</div>
|
||
<!--- Place script after ui to make sure ui loads first -->
|
||
<script type="module" src="./simple_chat.ts"></script>
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/simple-chat-ts/src/simple_chat.ts
|
||
================================================
|
||
import appConfig from "./app-config";
|
||
import * as webllm from "@mlc-ai/web-llm";
|
||
|
||
function getElementAndCheck(id: string): HTMLElement {
|
||
const element = document.getElementById(id);
|
||
if (element == null) {
|
||
throw Error("Cannot find element " + id);
|
||
}
|
||
return element;
|
||
}
|
||
|
||
class ChatUI {
|
||
private uiChat: HTMLElement;
|
||
private uiChatInput: HTMLInputElement;
|
||
private uiChatInfoLabel: HTMLLabelElement;
|
||
private engine: webllm.MLCEngineInterface | webllm.WebWorkerMLCEngine;
|
||
private config: webllm.AppConfig = appConfig;
|
||
private selectedModel: string;
|
||
private chatLoaded = false;
|
||
private requestInProgress = false;
|
||
private chatHistory: webllm.ChatCompletionMessageParam[] = [];
|
||
// We use a request chain to ensure that
|
||
// all requests send to chat are sequentialized
|
||
private chatRequestChain: Promise<void> = Promise.resolve();
|
||
|
||
/**
|
||
* An asynchronous factory constructor since we need to await getMaxStorageBufferBindingSize();
|
||
* this is not allowed in a constructor (which cannot be asynchronous).
|
||
*/
|
||
public static CreateAsync = async (engine: webllm.MLCEngineInterface) => {
|
||
const chatUI = new ChatUI();
|
||
chatUI.engine = engine;
|
||
// get the elements
|
||
chatUI.uiChat = getElementAndCheck("chatui-chat");
|
||
chatUI.uiChatInput = getElementAndCheck("chatui-input") as HTMLInputElement;
|
||
chatUI.uiChatInfoLabel = getElementAndCheck(
|
||
"chatui-info-label",
|
||
) as HTMLLabelElement;
|
||
// register event handlers
|
||
getElementAndCheck("chatui-reset-btn").onclick = () => {
|
||
chatUI.onReset();
|
||
};
|
||
getElementAndCheck("chatui-send-btn").onclick = () => {
|
||
chatUI.onGenerate();
|
||
};
|
||
// TODO: find other alternative triggers
|
||
getElementAndCheck("chatui-input").onkeypress = (event) => {
|
||
if (event.keyCode === 13) {
|
||
chatUI.onGenerate();
|
||
}
|
||
};
|
||
|
||
// When we detect low maxStorageBufferBindingSize, we assume that the device (e.g. an Android
|
||
// phone) can only handle small models and make all other models unselectable. Otherwise, the
|
||
// browser may crash. See https://github.com/mlc-ai/web-llm/issues/209.
|
||
// Also use GPU vendor to decide whether it is a mobile device (hence with limited resources).
|
||
const androidMaxStorageBufferBindingSize = 1 << 27; // 128MB
|
||
const mobileVendors = new Set<string>(["qualcomm", "arm"]);
|
||
let restrictModels = false;
|
||
let maxStorageBufferBindingSize: number;
|
||
let gpuVendor: string;
|
||
try {
|
||
[maxStorageBufferBindingSize, gpuVendor] = await Promise.all([
|
||
engine.getMaxStorageBufferBindingSize(),
|
||
engine.getGPUVendor(),
|
||
]);
|
||
} catch (err) {
|
||
chatUI.appendMessage("error", "Init error, " + err.toString());
|
||
console.log(err.stack);
|
||
return;
|
||
}
|
||
if (
|
||
(gpuVendor.length != 0 && mobileVendors.has(gpuVendor)) ||
|
||
maxStorageBufferBindingSize <= androidMaxStorageBufferBindingSize
|
||
) {
|
||
chatUI.appendMessage(
|
||
"init",
|
||
"Your device seems to have " +
|
||
"limited resources, so we restrict the selectable models.",
|
||
);
|
||
restrictModels = true;
|
||
}
|
||
|
||
// Populate modelSelector
|
||
const modelSelector = getElementAndCheck(
|
||
"chatui-select",
|
||
) as HTMLSelectElement;
|
||
for (let i = 0; i < chatUI.config.model_list.length; ++i) {
|
||
const item = chatUI.config.model_list[i];
|
||
const opt = document.createElement("option");
|
||
opt.value = item.model_id;
|
||
opt.innerHTML = item.model_id;
|
||
opt.selected = i == 0;
|
||
if (
|
||
(restrictModels &&
|
||
(item.low_resource_required === undefined ||
|
||
!item.low_resource_required)) ||
|
||
(item.buffer_size_required_bytes &&
|
||
maxStorageBufferBindingSize < item.buffer_size_required_bytes)
|
||
) {
|
||
// Either on a low-resource device and not a low-resource model
|
||
// Or device's maxStorageBufferBindingSize does not satisfy the model's need (if specified)
|
||
const params = new URLSearchParams(location.search);
|
||
opt.disabled = !params.has("bypassRestrictions");
|
||
opt.selected = false;
|
||
}
|
||
if (
|
||
!modelSelector.lastChild?.textContent?.startsWith(
|
||
opt.value.split("-")[0],
|
||
)
|
||
) {
|
||
modelSelector.appendChild(document.createElement("hr"));
|
||
}
|
||
modelSelector.appendChild(opt);
|
||
}
|
||
modelSelector.appendChild(document.createElement("hr"));
|
||
|
||
chatUI.selectedModel = modelSelector.value;
|
||
modelSelector.onchange = () => {
|
||
chatUI.onSelectChange(modelSelector);
|
||
};
|
||
|
||
return chatUI;
|
||
};
|
||
|
||
/**
|
||
* Push a task to the execution queue.
|
||
*
|
||
* @param task The task to be executed;
|
||
*/
|
||
private pushTask(task: () => Promise<void>) {
|
||
const lastEvent = this.chatRequestChain;
|
||
this.chatRequestChain = lastEvent.then(task);
|
||
}
|
||
// Event handlers
|
||
// all event handler pushes the tasks to a queue
|
||
// that get executed sequentially
|
||
// the tasks previous tasks, which causes them to early stop
|
||
// can be interrupted by engine.interruptGenerate
|
||
private async onGenerate() {
|
||
if (this.requestInProgress) {
|
||
return;
|
||
}
|
||
this.pushTask(async () => {
|
||
await this.asyncGenerate();
|
||
});
|
||
}
|
||
|
||
private async onSelectChange(modelSelector: HTMLSelectElement) {
|
||
if (this.requestInProgress) {
|
||
// interrupt previous generation if any
|
||
this.engine.interruptGenerate();
|
||
}
|
||
// try reset after previous requests finishes
|
||
this.pushTask(async () => {
|
||
await this.engine.resetChat();
|
||
this.resetChatHistory();
|
||
await this.unloadChat();
|
||
this.selectedModel = modelSelector.value;
|
||
await this.asyncInitChat();
|
||
});
|
||
}
|
||
|
||
private async onReset() {
|
||
if (this.requestInProgress) {
|
||
// interrupt previous generation if any
|
||
this.engine.interruptGenerate();
|
||
}
|
||
// try reset after previous requests finishes
|
||
this.pushTask(async () => {
|
||
await this.engine.resetChat();
|
||
this.resetChatHistory();
|
||
});
|
||
}
|
||
|
||
// Internal helper functions
|
||
private appendMessage(kind, text) {
|
||
if (kind == "init") {
|
||
text = "[System Initalize] " + text;
|
||
}
|
||
if (this.uiChat === undefined) {
|
||
throw Error("cannot find ui chat");
|
||
}
|
||
const msg = `
|
||
<div class="msg ${kind}-msg">
|
||
<div class="msg-bubble">
|
||
<div class="msg-text">${text}</div>
|
||
</div>
|
||
</div>
|
||
`;
|
||
this.uiChat.insertAdjacentHTML("beforeend", msg);
|
||
this.uiChat.scrollTo(0, this.uiChat.scrollHeight);
|
||
}
|
||
|
||
// Special care for user input such that we treat it as pure text instead of html
|
||
private appendUserMessage(text: string) {
|
||
if (this.uiChat === undefined) {
|
||
throw Error("cannot find ui chat");
|
||
}
|
||
const msg = `
|
||
<div class="msg right-msg">
|
||
<div class="msg-bubble">
|
||
<div class="msg-text"></div>
|
||
</div>
|
||
</div>
|
||
`;
|
||
this.uiChat.insertAdjacentHTML("beforeend", msg);
|
||
// Recurse three times to get `msg-text`
|
||
const msgElement = this.uiChat.lastElementChild?.lastElementChild
|
||
?.lastElementChild as HTMLElement;
|
||
msgElement.insertAdjacentText("beforeend", text);
|
||
this.uiChat.scrollTo(0, this.uiChat.scrollHeight);
|
||
}
|
||
|
||
private updateLastMessage(kind, text) {
|
||
if (kind == "init") {
|
||
text = "[System Initialize] " + text;
|
||
}
|
||
if (this.uiChat === undefined) {
|
||
throw Error("cannot find ui chat");
|
||
}
|
||
const matches = this.uiChat.getElementsByClassName(`msg ${kind}-msg`);
|
||
if (matches.length == 0) throw Error(`${kind} message do not exist`);
|
||
const msg = matches[matches.length - 1];
|
||
const msgText = msg.getElementsByClassName("msg-text");
|
||
if (msgText.length != 1) throw Error("Expect msg-text");
|
||
if (msgText[0].innerHTML == text) return;
|
||
const list = text.split("\n").map((t) => {
|
||
const item = document.createElement("div");
|
||
item.textContent = t;
|
||
return item;
|
||
});
|
||
msgText[0].innerHTML = "";
|
||
list.forEach((item) => msgText[0].append(item));
|
||
this.uiChat.scrollTo(0, this.uiChat.scrollHeight);
|
||
}
|
||
|
||
private resetChatHistory() {
|
||
this.chatHistory = [];
|
||
const clearTags = ["left", "right", "init", "error"];
|
||
for (const tag of clearTags) {
|
||
// need to unpack to list so the iterator don't get affected by mutation
|
||
const matches = [...this.uiChat.getElementsByClassName(`msg ${tag}-msg`)];
|
||
for (const item of matches) {
|
||
this.uiChat.removeChild(item);
|
||
}
|
||
}
|
||
if (this.uiChatInfoLabel !== undefined) {
|
||
this.uiChatInfoLabel.innerHTML = "";
|
||
}
|
||
}
|
||
|
||
private async asyncInitChat() {
|
||
if (this.chatLoaded) return;
|
||
this.requestInProgress = true;
|
||
this.appendMessage("init", "");
|
||
const initProgressCallback = (report) => {
|
||
this.updateLastMessage("init", report.text);
|
||
};
|
||
this.engine.setInitProgressCallback(initProgressCallback);
|
||
|
||
try {
|
||
await this.engine.reload(this.selectedModel);
|
||
} catch (err) {
|
||
this.appendMessage("error", "Init error, " + err.toString());
|
||
console.log(err.stack);
|
||
this.unloadChat();
|
||
this.requestInProgress = false;
|
||
return;
|
||
}
|
||
this.requestInProgress = false;
|
||
this.chatLoaded = true;
|
||
}
|
||
|
||
private async unloadChat() {
|
||
await this.engine.unload();
|
||
this.chatLoaded = false;
|
||
}
|
||
|
||
/**
|
||
* Run generate
|
||
*/
|
||
private async asyncGenerate() {
|
||
await this.asyncInitChat();
|
||
this.requestInProgress = true;
|
||
const prompt = this.uiChatInput.value;
|
||
if (prompt == "") {
|
||
this.requestInProgress = false;
|
||
return;
|
||
}
|
||
|
||
this.appendUserMessage(prompt);
|
||
this.uiChatInput.value = "";
|
||
this.uiChatInput.setAttribute("placeholder", "Generating...");
|
||
|
||
this.appendMessage("left", "");
|
||
this.chatHistory.push({ role: "user", content: prompt });
|
||
|
||
try {
|
||
let curMessage = "";
|
||
let usage: webllm.CompletionUsage | undefined = undefined;
|
||
const completion = await this.engine.chat.completions.create({
|
||
stream: true,
|
||
messages: this.chatHistory,
|
||
stream_options: { include_usage: true },
|
||
// if model starts with "Qwen3", disable thinking.
|
||
extra_body: this.selectedModel.startsWith("Qwen3")
|
||
? {
|
||
enable_thinking: false,
|
||
}
|
||
: undefined,
|
||
});
|
||
// TODO(Charlie): Processing of <20> requires changes
|
||
for await (const chunk of completion) {
|
||
const curDelta = chunk.choices[0]?.delta.content;
|
||
if (curDelta) {
|
||
curMessage += curDelta;
|
||
}
|
||
this.updateLastMessage("left", curMessage);
|
||
if (chunk.usage) {
|
||
usage = chunk.usage;
|
||
}
|
||
}
|
||
if (usage) {
|
||
this.uiChatInfoLabel.innerHTML =
|
||
`prompt_tokens: ${usage.prompt_tokens}, ` +
|
||
`completion_tokens: ${usage.completion_tokens}, ` +
|
||
`prefill: ${usage.extra.prefill_tokens_per_s.toFixed(4)} tokens/sec, ` +
|
||
`decoding: ${usage.extra.decode_tokens_per_s.toFixed(4)} tokens/sec`;
|
||
}
|
||
const finalMessage = await this.engine.getMessage();
|
||
this.updateLastMessage("left", finalMessage); // TODO: Remove this after <20> issue is fixed
|
||
this.chatHistory.push({ role: "assistant", content: finalMessage });
|
||
} catch (err) {
|
||
this.appendMessage("error", "Generate error, " + err.toString());
|
||
console.log(err.stack);
|
||
await this.unloadChat();
|
||
}
|
||
this.uiChatInput.setAttribute("placeholder", "Enter your message...");
|
||
this.requestInProgress = false;
|
||
}
|
||
}
|
||
|
||
const useWebWorker = appConfig.use_web_worker;
|
||
let engine: webllm.MLCEngineInterface;
|
||
|
||
// Here we do not use `CreateMLCEngine()` but instantiate an engine that is not loaded with model
|
||
if (useWebWorker) {
|
||
engine = new webllm.WebWorkerMLCEngine(
|
||
new Worker(new URL("./worker.ts", import.meta.url), { type: "module" }),
|
||
{ appConfig, logLevel: "INFO" },
|
||
);
|
||
} else {
|
||
engine = new webllm.MLCEngine({ appConfig });
|
||
}
|
||
ChatUI.CreateAsync(engine);
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/simple-chat-ts/src/worker.ts
|
||
================================================
|
||
// Serve the engine workload through web worker
|
||
import { WebWorkerMLCEngineHandler } from "@mlc-ai/web-llm";
|
||
|
||
const handler = new WebWorkerMLCEngineHandler();
|
||
self.onmessage = (msg: MessageEvent) => {
|
||
handler.onmessage(msg);
|
||
};
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/simple-chat-upload/README.md
|
||
================================================
|
||
# SimpleChat
|
||
|
||
This folder provides a complete implementation of a simple
|
||
chat app based on WebLLM. To try it out, you can do the following steps
|
||
under this folder
|
||
|
||
```bash
|
||
npm install
|
||
npm start
|
||
```
|
||
|
||
Note if you would like to hack WebLLM core package.
|
||
You can change web-llm dependencies as `"file:../.."`, and follow the build from source
|
||
instruction in the project to build webllm locally. This option is only recommended
|
||
if you would like to hack WebLLM core package.
|
||
|
||
Due to the differences in command-line tools between Unix/Linux and Windows systems, special adaptation is necessary for Windows. Unix/Linux systems natively support commands like `cp` for file operations, which are not directly available in Windows. To ensure cross-platform compatibility, we use a Node.js script for file copying in Windows.
|
||
|
||
### Steps for Windows Users
|
||
|
||
1. **Create a Node.js Script File**:
|
||
- In the `examples\simple-chat` directory, create a file named `copy-config.js`.
|
||
- Add the following code to handle file copying:
|
||
```javascript
|
||
const fs = require("fs");
|
||
// Copy file
|
||
fs.copyFileSync("src/gh-config.js", "src/app-config.js");
|
||
```
|
||
|
||
2. **Modify `package.json`**:
|
||
- In the `scripts` section of your `package.json`, replace Unix-style `cp` commands with our new Node.js script. For example:
|
||
```json
|
||
"scripts": {
|
||
"start": "node copy-config.js && parcel src/llm_chat.html --port 8888",
|
||
"mlc-local": "node copy-config.js && parcel src/llm_chat.html --port 8888",
|
||
"build": "node copy-config.js && parcel build src/llm_chat.html --dist-dir lib --no-content-hash"
|
||
},
|
||
```
|
||
|
||
3. **Run the Application**:
|
||
- Save your changes and run `npm start` in CMD or PowerShell to start the application.
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/simple-chat-upload/package.json
|
||
================================================
|
||
{
|
||
"name": "simple-chat",
|
||
"version": "0.1.0",
|
||
"private": true,
|
||
"scripts": {
|
||
"start": "cp src/gh-config.js src/app-config.js && parcel src/llm_chat.html --port 8883",
|
||
"build": "cp src/gh-config.js src/app-config.js && parcel build src/llm_chat.html --dist-dir lib --no-content-hash"
|
||
},
|
||
"devDependencies": {
|
||
"buffer": "^5.7.1",
|
||
"parcel": "^2.8.3",
|
||
"process": "^0.11.10",
|
||
"tslib": "^2.3.1",
|
||
"typescript": "^4.9.5",
|
||
"url": "^0.11.3"
|
||
},
|
||
"dependencies": {
|
||
"@mlc-ai/web-llm": "^0.2.80"
|
||
}
|
||
}
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/simple-chat-upload/src/gh-config.js
|
||
================================================
|
||
import { prebuiltAppConfig } from "@mlc-ai/web-llm";
|
||
|
||
export default {
|
||
model_list: prebuiltAppConfig.model_list,
|
||
use_web_worker: true,
|
||
};
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/simple-chat-upload/src/llm_chat.css
|
||
================================================
|
||
.chatui {
|
||
display: flex;
|
||
position: relative;
|
||
flex-flow: column wrap;
|
||
justify-content: space-between;
|
||
width: 100%;
|
||
max-width: 867px;
|
||
margin: 25px 10px;
|
||
height: 600px;
|
||
border: 2px solid #ddd;
|
||
border-radius: 5px;
|
||
background-color: #1f2027;
|
||
}
|
||
|
||
.chatui-select-wrapper {
|
||
display: flex;
|
||
justify-content: center;
|
||
background-color: #1f2027;
|
||
padding: 10px 0;
|
||
}
|
||
|
||
#chatui-select {
|
||
width: 350px;
|
||
background-color: #1f2027;
|
||
color: white;
|
||
border: none;
|
||
}
|
||
|
||
#chatui-select:focus {
|
||
outline: none;
|
||
}
|
||
|
||
#chatui-select::-webkit-scrollbar {
|
||
display: none;
|
||
}
|
||
|
||
#chatui-select option {
|
||
background-color: #1f2027;
|
||
color: white;
|
||
}
|
||
|
||
#chatui-select option:hover {
|
||
background-color: #474747;
|
||
color: white;
|
||
}
|
||
|
||
s .chatui-header {
|
||
display: flex;
|
||
justify-content: space-between;
|
||
padding: 10px;
|
||
border-bottom: 2px solid #ddd;
|
||
background: #eee;
|
||
color: #666;
|
||
}
|
||
|
||
/* Used to remove tiny white lines in android devices; not sure if there is a better way */
|
||
*,
|
||
*::before,
|
||
*::after {
|
||
box-sizing: content-box;
|
||
}
|
||
|
||
.chatui-chat {
|
||
flex: 1;
|
||
overflow-y: auto;
|
||
padding: 10px;
|
||
background-color: #1f2027;
|
||
}
|
||
|
||
.chatui-chat::-webkit-scrollbar {
|
||
width: 6px;
|
||
}
|
||
|
||
.chatui-chat::-webkit-scrollbar-track {
|
||
background: #1f2027;
|
||
}
|
||
|
||
.chatui-chat::-webkit-scrollbar-thumb {
|
||
background: #888;
|
||
}
|
||
|
||
.chatui-chat::-webkit-scrollbar-thumb:hover {
|
||
background: #555;
|
||
}
|
||
|
||
.msg {
|
||
display: flex;
|
||
align-items: flex-end;
|
||
margin-bottom: 10px;
|
||
}
|
||
|
||
.msg:last-of-type {
|
||
margin: 0;
|
||
}
|
||
|
||
.msg-bubble {
|
||
background-color: #f0f0f0;
|
||
border-radius: 8px;
|
||
padding: 16px;
|
||
margin: 5px auto;
|
||
width: calc(100% - 20px);
|
||
box-sizing: border-box;
|
||
color: black;
|
||
border: none;
|
||
font-size: medium;
|
||
margin-left: auto;
|
||
margin-right: auto;
|
||
}
|
||
|
||
.left-msg .msg-bubble {
|
||
background-color: #343541;
|
||
color: #ececec;
|
||
}
|
||
|
||
.error-msg .msg-bubble {
|
||
background-color: #343541;
|
||
color: #f15959;
|
||
}
|
||
|
||
.init-msg .msg-bubble {
|
||
background-color: #343541;
|
||
color: #ececec;
|
||
}
|
||
|
||
.right-msg .msg-bubble {
|
||
background-color: #444654;
|
||
color: #ececec;
|
||
}
|
||
|
||
.chatui-inputarea {
|
||
display: flex;
|
||
padding: 10px;
|
||
border-top: 2px solid transparent;
|
||
background-color: #1f2027;
|
||
}
|
||
|
||
.chatui-inputarea * {
|
||
padding: 10px;
|
||
border: none;
|
||
border-radius: 3px;
|
||
font-size: 1em;
|
||
color: white;
|
||
background: rgba(0, 0, 0, 0.3);
|
||
}
|
||
|
||
.chatui-input {
|
||
flex: 1;
|
||
background-color: #40414f;
|
||
color: white;
|
||
}
|
||
|
||
.chatui-reset-btn {
|
||
margin-left: 10px;
|
||
background-color: #40414f;
|
||
color: #fff;
|
||
font-weight: bold;
|
||
cursor: pointer;
|
||
background-image: url("img/reset.png");
|
||
background-repeat: no-repeat;
|
||
background-position: center;
|
||
width: 40px;
|
||
background-repeat: no-repeat;
|
||
background-position: center;
|
||
background-size: 20px 20px;
|
||
}
|
||
|
||
.chatui-reset-btn:hover {
|
||
background-color: #03a33e;
|
||
}
|
||
|
||
.chatui-send-btn {
|
||
margin-left: 10px;
|
||
background-color: #40414f;
|
||
color: #fff;
|
||
font-weight: bold;
|
||
cursor: pointer;
|
||
background-image: url("img/plane.png");
|
||
background-repeat: no-repeat;
|
||
background-position: center;
|
||
width: 40px;
|
||
background-repeat: no-repeat;
|
||
background-position: center;
|
||
background-size: 20px 20px;
|
||
}
|
||
|
||
.chatui-send-btn:hover {
|
||
background-color: #03a33e;
|
||
}
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/simple-chat-upload/src/llm_chat.html
|
||
================================================
|
||
<link href="./llm_chat.css" rel="stylesheet" type="text/css" />
|
||
|
||
<div class="chatui">
|
||
<div class="chatui-select-wrapper">
|
||
<select id="chatui-select"></select>
|
||
</div>
|
||
<div class="chatui-chat" id="chatui-chat" height="100"></div>
|
||
|
||
<body>
|
||
<input
|
||
type="file"
|
||
id="file-input"
|
||
style="position: absolute; top: 10px; right: 20px"
|
||
multiple
|
||
onchange="uploadFiles()"
|
||
/>/>
|
||
</body>
|
||
<div class="chatui-inputarea">
|
||
<input
|
||
id="chatui-input"
|
||
type="text"
|
||
class="chatui-input"
|
||
placeholder="Enter your message..."
|
||
/>
|
||
<button id="chatui-send-btn" class="chatui-send-btn"></button>
|
||
<button id="chatui-reset-btn" class="chatui-reset-btn"></button>
|
||
</div>
|
||
</div>
|
||
|
||
<div class="chatui-extra-control">
|
||
<label id="chatui-info-label"></label>
|
||
</div>
|
||
<!--- Place script after ui to make sure ui loads first -->
|
||
<script type="module" src="./simple_chat.ts"></script>
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/simple-chat-upload/src/simple_chat.ts
|
||
================================================
|
||
import appConfig from "./app-config";
|
||
import * as webllm from "@mlc-ai/web-llm";
|
||
|
||
function getElementAndCheck(id: string): HTMLElement {
|
||
const element = document.getElementById(id);
|
||
if (element == null) {
|
||
throw Error("Cannot find element " + id);
|
||
}
|
||
return element;
|
||
}
|
||
|
||
class ChatUI {
|
||
private uiChat: HTMLElement;
|
||
private uiChatInput: HTMLInputElement;
|
||
private uiChatInfoLabel: HTMLLabelElement;
|
||
private engine: webllm.MLCEngineInterface | webllm.WebWorkerMLCEngine;
|
||
private config: webllm.AppConfig = appConfig;
|
||
private selectedModel: string;
|
||
private chatLoaded = false;
|
||
private requestInProgress = false;
|
||
private chatHistory: webllm.ChatCompletionMessageParam[] = [];
|
||
// We use a request chain to ensure that
|
||
// all requests send to chat are sequentialized
|
||
private chatRequestChain: Promise<void> = Promise.resolve();
|
||
|
||
/**
|
||
* An asynchronous factory constructor since we need to await getMaxStorageBufferBindingSize();
|
||
* this is not allowed in a constructor (which cannot be asynchronous).
|
||
*/
|
||
public static CreateAsync = async (engine: webllm.MLCEngineInterface) => {
|
||
const chatUI = new ChatUI();
|
||
chatUI.engine = engine;
|
||
// get the elements
|
||
chatUI.uiChat = getElementAndCheck("chatui-chat");
|
||
chatUI.uiChatInput = getElementAndCheck("chatui-input") as HTMLInputElement;
|
||
chatUI.uiChatInfoLabel = getElementAndCheck(
|
||
"chatui-info-label",
|
||
) as HTMLLabelElement;
|
||
// register event handlers
|
||
getElementAndCheck("chatui-reset-btn").onclick = () => {
|
||
chatUI.onReset();
|
||
};
|
||
getElementAndCheck("chatui-send-btn").onclick = () => {
|
||
chatUI.onGenerate();
|
||
};
|
||
// TODO: find other alternative triggers
|
||
getElementAndCheck("chatui-input").onkeypress = (event) => {
|
||
if (event.keyCode === 13) {
|
||
chatUI.onGenerate();
|
||
}
|
||
};
|
||
|
||
// When we detect low maxStorageBufferBindingSize, we assume that the device (e.g. an Android
|
||
// phone) can only handle small models and make all other models unselectable. Otherwise, the
|
||
// browser may crash. See https://github.com/mlc-ai/web-llm/issues/209.
|
||
// Also use GPU vendor to decide whether it is a mobile device (hence with limited resources).
|
||
const androidMaxStorageBufferBindingSize = 1 << 27; // 128MB
|
||
const mobileVendors = new Set<string>(["qualcomm", "arm"]);
|
||
let restrictModels = false;
|
||
let maxStorageBufferBindingSize: number;
|
||
let gpuVendor: string;
|
||
try {
|
||
[maxStorageBufferBindingSize, gpuVendor] = await Promise.all([
|
||
engine.getMaxStorageBufferBindingSize(),
|
||
engine.getGPUVendor(),
|
||
]);
|
||
} catch (err) {
|
||
chatUI.appendMessage("error", "Init error, " + err.toString());
|
||
console.log(err.stack);
|
||
return;
|
||
}
|
||
if (
|
||
(gpuVendor.length != 0 && mobileVendors.has(gpuVendor)) ||
|
||
maxStorageBufferBindingSize <= androidMaxStorageBufferBindingSize
|
||
) {
|
||
chatUI.appendMessage(
|
||
"init",
|
||
"Your device seems to have " +
|
||
"limited resources, so we restrict the selectable models.",
|
||
);
|
||
restrictModels = true;
|
||
}
|
||
|
||
// Populate modelSelector
|
||
const modelSelector = getElementAndCheck(
|
||
"chatui-select",
|
||
) as HTMLSelectElement;
|
||
for (let i = 0; i < chatUI.config.model_list.length; ++i) {
|
||
const item = chatUI.config.model_list[i];
|
||
const opt = document.createElement("option");
|
||
opt.value = item.model_id;
|
||
opt.innerHTML = item.model_id;
|
||
opt.selected = i == 0;
|
||
if (
|
||
(restrictModels &&
|
||
(item.low_resource_required === undefined ||
|
||
!item.low_resource_required)) ||
|
||
(item.buffer_size_required_bytes &&
|
||
maxStorageBufferBindingSize < item.buffer_size_required_bytes)
|
||
) {
|
||
// Either on a low-resource device and not a low-resource model
|
||
// Or device's maxStorageBufferBindingSize does not satisfy the model's need (if specified)
|
||
const params = new URLSearchParams(location.search);
|
||
opt.disabled = !params.has("bypassRestrictions");
|
||
opt.selected = false;
|
||
}
|
||
if (
|
||
!modelSelector.lastChild?.textContent?.startsWith(
|
||
opt.value.split("-")[0],
|
||
)
|
||
) {
|
||
modelSelector.appendChild(document.createElement("hr"));
|
||
}
|
||
modelSelector.appendChild(opt);
|
||
}
|
||
modelSelector.appendChild(document.createElement("hr"));
|
||
|
||
chatUI.selectedModel = modelSelector.value;
|
||
modelSelector.onchange = () => {
|
||
chatUI.onSelectChange(modelSelector);
|
||
};
|
||
|
||
return chatUI;
|
||
};
|
||
|
||
/**
|
||
* Push a task to the execution queue.
|
||
*
|
||
* @param task The task to be executed;
|
||
*/
|
||
private pushTask(task: () => Promise<void>) {
|
||
const lastEvent = this.chatRequestChain;
|
||
this.chatRequestChain = lastEvent.then(task);
|
||
}
|
||
// Event handlers
|
||
// all event handler pushes the tasks to a queue
|
||
// that get executed sequentially
|
||
// the tasks previous tasks, which causes them to early stop
|
||
// can be interrupted by engine.interruptGenerate
|
||
private async onGenerate() {
|
||
if (this.requestInProgress) {
|
||
return;
|
||
}
|
||
this.pushTask(async () => {
|
||
await this.asyncGenerate();
|
||
});
|
||
}
|
||
|
||
private async onSelectChange(modelSelector: HTMLSelectElement) {
|
||
if (this.requestInProgress) {
|
||
// interrupt previous generation if any
|
||
this.engine.interruptGenerate();
|
||
}
|
||
// try reset after previous requests finishes
|
||
this.pushTask(async () => {
|
||
await this.engine.resetChat();
|
||
this.resetChatHistory();
|
||
await this.unloadChat();
|
||
this.selectedModel = modelSelector.value;
|
||
await this.asyncInitChat();
|
||
});
|
||
}
|
||
|
||
private async onReset() {
|
||
if (this.requestInProgress) {
|
||
// interrupt previous generation if any
|
||
this.engine.interruptGenerate();
|
||
}
|
||
// try reset after previous requests finishes
|
||
this.pushTask(async () => {
|
||
await this.engine.resetChat();
|
||
this.resetChatHistory();
|
||
});
|
||
}
|
||
|
||
// Internal helper functions
|
||
private appendMessage(kind, text) {
|
||
if (kind == "init") {
|
||
text = "[System Initalize] " + text;
|
||
}
|
||
if (this.uiChat === undefined) {
|
||
throw Error("cannot find ui chat");
|
||
}
|
||
const msg = `
|
||
<div class="msg ${kind}-msg">
|
||
<div class="msg-bubble">
|
||
<div class="msg-text">${text}</div>
|
||
</div>
|
||
</div>
|
||
`;
|
||
this.uiChat.insertAdjacentHTML("beforeend", msg);
|
||
this.uiChat.scrollTo(0, this.uiChat.scrollHeight);
|
||
}
|
||
|
||
private updateLastMessage(kind, text) {
|
||
if (kind == "init") {
|
||
text = "[System Initalize] " + text;
|
||
}
|
||
if (this.uiChat === undefined) {
|
||
throw Error("cannot find ui chat");
|
||
}
|
||
const matches = this.uiChat.getElementsByClassName(`msg ${kind}-msg`);
|
||
if (matches.length == 0) throw Error(`${kind} message do not exist`);
|
||
const msg = matches[matches.length - 1];
|
||
const msgText = msg.getElementsByClassName("msg-text");
|
||
if (msgText.length != 1) throw Error("Expect msg-text");
|
||
if (msgText[0].innerHTML == text) return;
|
||
const list = text.split("\n").map((t) => {
|
||
const item = document.createElement("div");
|
||
item.textContent = t;
|
||
return item;
|
||
});
|
||
msgText[0].innerHTML = "";
|
||
list.forEach((item) => msgText[0].append(item));
|
||
this.uiChat.scrollTo(0, this.uiChat.scrollHeight);
|
||
}
|
||
|
||
private resetChatHistory() {
|
||
this.chatHistory = [];
|
||
const clearTags = ["left", "right", "init", "error"];
|
||
for (const tag of clearTags) {
|
||
// need to unpack to list so the iterator don't get affected by mutation
|
||
const matches = [...this.uiChat.getElementsByClassName(`msg ${tag}-msg`)];
|
||
for (const item of matches) {
|
||
this.uiChat.removeChild(item);
|
||
}
|
||
}
|
||
if (this.uiChatInfoLabel !== undefined) {
|
||
this.uiChatInfoLabel.innerHTML = "";
|
||
}
|
||
}
|
||
|
||
private async asyncInitChat() {
|
||
if (this.chatLoaded) return;
|
||
this.requestInProgress = true;
|
||
this.appendMessage("init", "");
|
||
const initProgressCallback = (report) => {
|
||
this.updateLastMessage("init", report.text);
|
||
};
|
||
this.engine.setInitProgressCallback(initProgressCallback);
|
||
|
||
try {
|
||
await this.engine.reload(this.selectedModel);
|
||
} catch (err) {
|
||
this.appendMessage("error", "Init error, " + err.toString());
|
||
console.log(err.stack);
|
||
this.unloadChat();
|
||
this.requestInProgress = false;
|
||
return;
|
||
}
|
||
this.requestInProgress = false;
|
||
this.chatLoaded = true;
|
||
}
|
||
|
||
private async unloadChat() {
|
||
await this.engine.unload();
|
||
this.chatLoaded = false;
|
||
}
|
||
|
||
/**
|
||
* Run generate
|
||
*/
|
||
private async asyncGenerate() {
|
||
await this.asyncInitChat();
|
||
this.requestInProgress = true;
|
||
const prompt = this.uiChatInput.value;
|
||
if (prompt == "") {
|
||
this.requestInProgress = false;
|
||
return;
|
||
}
|
||
|
||
this.appendMessage("right", prompt);
|
||
this.uiChatInput.value = "";
|
||
this.uiChatInput.setAttribute("placeholder", "Generating...");
|
||
|
||
this.appendMessage("left", "");
|
||
this.chatHistory.push({ role: "user", content: prompt });
|
||
|
||
try {
|
||
let curMessage = "";
|
||
let usage: webllm.CompletionUsage | undefined = undefined;
|
||
const completion = await this.engine.chat.completions.create({
|
||
stream: true,
|
||
messages: this.chatHistory,
|
||
stream_options: { include_usage: true },
|
||
});
|
||
// TODO(Charlie): Processing of <20> requires changes
|
||
for await (const chunk of completion) {
|
||
const curDelta = chunk.choices[0]?.delta.content;
|
||
if (curDelta) {
|
||
curMessage += curDelta;
|
||
}
|
||
this.updateLastMessage("left", curMessage);
|
||
if (chunk.usage) {
|
||
usage = chunk.usage;
|
||
}
|
||
}
|
||
if (usage) {
|
||
this.uiChatInfoLabel.innerHTML =
|
||
`prompt_tokens: ${usage.prompt_tokens}, ` +
|
||
`completion_tokens: ${usage.completion_tokens}, ` +
|
||
`prefill: ${usage.extra.prefill_tokens_per_s.toFixed(4)} tokens/sec, ` +
|
||
`decoding: ${usage.extra.decode_tokens_per_s.toFixed(4)} tokens/sec`;
|
||
}
|
||
const finalMessage = await this.engine.getMessage();
|
||
this.updateLastMessage("left", finalMessage); // TODO: Remove this after <20> issue is fixed
|
||
this.chatHistory.push({ role: "assistant", content: finalMessage });
|
||
} catch (err) {
|
||
this.appendMessage("error", "Generate error, " + err.toString());
|
||
console.log(err.stack);
|
||
await this.unloadChat();
|
||
}
|
||
this.uiChatInput.setAttribute("placeholder", "Enter your message...");
|
||
this.requestInProgress = false;
|
||
}
|
||
}
|
||
|
||
const useWebWorker = appConfig.use_web_worker;
|
||
let engine: webllm.MLCEngineInterface;
|
||
|
||
// Here we do not use `CreateMLCEngine()` but instantiate an engine that is not loaded with model
|
||
if (useWebWorker) {
|
||
engine = new webllm.WebWorkerMLCEngine(
|
||
new Worker(new URL("./worker.ts", import.meta.url), { type: "module" }),
|
||
{ appConfig },
|
||
);
|
||
} else {
|
||
engine = new webllm.MLCEngine({ appConfig });
|
||
}
|
||
ChatUI.CreateAsync(engine);
|
||
|
||
function getFileType(file: File) {
|
||
if (file.name.includes("wasm")) {
|
||
return "webllm/wasm";
|
||
} else if (
|
||
file.name.includes(".bin") ||
|
||
file.name.includes("ndarray-cache.json")
|
||
) {
|
||
return "webllm/model";
|
||
} else if (file.name.includes("mlc-chat-config.json")) {
|
||
return "webllm/config";
|
||
} else {
|
||
console.log("No model file suffix found");
|
||
return "file-cache";
|
||
}
|
||
}
|
||
|
||
async function uploadToIndexedDB(file: File) {
|
||
let db;
|
||
const request = indexedDB.open(getFileType(file), 1);
|
||
request.onupgradeneeded = (event) => {
|
||
db = (event.target as IDBOpenDBRequest).result;
|
||
if (!db.objectStoreNames.contains("urls")) {
|
||
db.createObjectStore("urls", { keyPath: "url" });
|
||
}
|
||
};
|
||
request.onsuccess = (event) => {
|
||
db = (event.target as IDBOpenDBRequest).result;
|
||
};
|
||
request.onerror = (event) => {
|
||
console.error("Database error: ", (event.target as IDBOpenDBRequest).error);
|
||
};
|
||
const transaction = db.transaction("files", "readwrite");
|
||
const store = transaction.objectStore("files");
|
||
const reader = new FileReader();
|
||
reader.onload = async (e) => {
|
||
if (e.target === null || e.target.result === null) {
|
||
console.error("Do not read any files");
|
||
return;
|
||
}
|
||
const url = file.name;
|
||
store.add(e.target.result, url);
|
||
};
|
||
transaction.oncomplete = function () {
|
||
alert("All files have been uploaded to IndexedDB.");
|
||
};
|
||
transaction.onerror = function (event) {
|
||
console.error("Error uploading files:", event);
|
||
};
|
||
}
|
||
|
||
async function cacheFile(file: File, response: Response) {
|
||
try {
|
||
const cache = await caches.open(getFileType(file)); // Ensure getFileType is a synchronous function or awaited if async
|
||
console.log("Put response into cache:", response);
|
||
await cache.put(file.name, response);
|
||
} catch (error) {
|
||
console.error("Failed to cache the file:", error);
|
||
}
|
||
}
|
||
|
||
async function uploadFiles(): Promise<void> {
|
||
const input = document.getElementById("file-input") as HTMLInputElement;
|
||
if (!input.files || input.files.length === 0) {
|
||
alert("No files selected.");
|
||
return;
|
||
}
|
||
if (appConfig.useIndexedDBCache) {
|
||
for (const file of input.files) {
|
||
uploadToIndexedDB(file);
|
||
}
|
||
} else {
|
||
for (const file of input.files) {
|
||
const reader = new FileReader();
|
||
reader.onload = async (e) => {
|
||
if (e.target === null || e.target.result === null) {
|
||
console.error("Do not read any files");
|
||
return;
|
||
}
|
||
const arrayBuffer = e.target.result as ArrayBuffer;
|
||
const response = new Response(arrayBuffer, {
|
||
status: 200,
|
||
statusText: "OK",
|
||
headers: {
|
||
"Content-Type": "application/octet-stream",
|
||
"Content-Length": arrayBuffer.byteLength.toString(),
|
||
},
|
||
});
|
||
await cacheFile(file, response);
|
||
};
|
||
if (
|
||
file.name.includes("mlc-chat-config.json") ||
|
||
file.name.includes("ndarray-cache.json")
|
||
) {
|
||
reader.readAsText(file);
|
||
} else {
|
||
reader.readAsArrayBuffer(file);
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
(window as any).uploadFiles = uploadFiles;
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/simple-chat-upload/src/worker.ts
|
||
================================================
|
||
// Serve the engine workload through web worker
|
||
import { WebWorkerMLCEngineHandler } from "@mlc-ai/web-llm";
|
||
|
||
const handler = new WebWorkerMLCEngineHandler();
|
||
self.onmessage = (msg: MessageEvent) => {
|
||
handler.onmessage(msg);
|
||
};
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/streaming/README.md
|
||
================================================
|
||
### OpenAI API Demos
|
||
|
||
Run `npm install` first, followed by `npm start`.
|
||
|
||
Note if you would like to hack WebLLM core package,
|
||
you can change web-llm dependencies as `"file:../.."`, and follow the build from source
|
||
instruction in the project to build webllm locally. This option is only recommended
|
||
if you would like to hack WebLLM core package.
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/streaming/package.json
|
||
================================================
|
||
{
|
||
"name": "streaming",
|
||
"version": "0.1.0",
|
||
"private": true,
|
||
"scripts": {
|
||
"start": "parcel src/streaming.html --port 8888",
|
||
"build": "parcel build src/streaming.html --dist-dir lib"
|
||
},
|
||
"devDependencies": {
|
||
"buffer": "^5.7.1",
|
||
"parcel": "^2.8.3",
|
||
"process": "^0.11.10",
|
||
"tslib": "^2.3.1",
|
||
"typescript": "^4.9.5",
|
||
"url": "^0.11.3"
|
||
},
|
||
"dependencies": {
|
||
"@mlc-ai/web-llm": "^0.2.80"
|
||
}
|
||
}
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/streaming/src/streaming.html
|
||
================================================
|
||
<!doctype html>
|
||
<html>
|
||
<script>
|
||
webLLMGlobal = {};
|
||
</script>
|
||
|
||
<body>
|
||
<h2>WebLLM Test Page</h2>
|
||
Open console to see output
|
||
<br />
|
||
<br />
|
||
<label id="init-label"> </label>
|
||
<h3>Response</h3>
|
||
<label id="generate-label"> </label>
|
||
<script type="module" src="./streaming.ts"></script>
|
||
</body>
|
||
</html>
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/streaming/src/streaming.ts
|
||
================================================
|
||
import * as webllm from "@mlc-ai/web-llm";
|
||
|
||
function setLabel(id: string, text: string) {
|
||
const label = document.getElementById(id);
|
||
if (label == null) {
|
||
throw Error("Cannot find label " + id);
|
||
}
|
||
label.innerText = text;
|
||
}
|
||
|
||
/**
|
||
* We demonstrate chat completion with streaming, where delta is sent while generating response.
|
||
*/
|
||
async function main() {
|
||
const initProgressCallback = (report: webllm.InitProgressReport) => {
|
||
setLabel("init-label", report.text);
|
||
};
|
||
const selectedModel = "Llama-3.1-8B-Instruct-q4f32_1-MLC";
|
||
const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine(
|
||
selectedModel,
|
||
{ initProgressCallback: initProgressCallback },
|
||
);
|
||
|
||
const request: webllm.ChatCompletionRequest = {
|
||
stream: true,
|
||
stream_options: { include_usage: true },
|
||
messages: [
|
||
{
|
||
role: "system",
|
||
content:
|
||
"You are a pirate chatbot who always responds in pirate speak!",
|
||
},
|
||
{ role: "user", content: "Who are you?" },
|
||
],
|
||
logprobs: true,
|
||
top_logprobs: 2,
|
||
};
|
||
|
||
const asyncChunkGenerator = await engine.chat.completions.create(request);
|
||
let message = "";
|
||
for await (const chunk of asyncChunkGenerator) {
|
||
console.log(chunk);
|
||
message += chunk.choices[0]?.delta?.content || "";
|
||
setLabel("generate-label", message);
|
||
if (chunk.usage) {
|
||
console.log(chunk.usage); // only last chunk has usage
|
||
}
|
||
// engine.interruptGenerate(); // works with interrupt as well
|
||
}
|
||
console.log("Final message:\n", await engine.getMessage()); // the concatenated message
|
||
}
|
||
|
||
main();
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/text-completion/README.md
|
||
================================================
|
||
# WebLLM Get Started App
|
||
|
||
This folder provides a minimum demo to show WebLLM API in a webapp setting.
|
||
To try it out, you can do the following steps under this folder
|
||
|
||
```bash
|
||
npm install
|
||
npm start
|
||
```
|
||
|
||
Note if you would like to hack WebLLM core package.
|
||
You can change web-llm dependencies as `"file:../.."`, and follow the build from source
|
||
instruction in the project to build webllm locally. This option is only recommended
|
||
if you would like to hack WebLLM core package.
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/text-completion/package.json
|
||
================================================
|
||
{
|
||
"name": "text-completion",
|
||
"version": "0.1.0",
|
||
"private": true,
|
||
"scripts": {
|
||
"start": "parcel src/text_completion.html --port 8888",
|
||
"build": "parcel build src/text_completion.html --dist-dir lib"
|
||
},
|
||
"devDependencies": {
|
||
"buffer": "^5.7.1",
|
||
"parcel": "^2.8.3",
|
||
"process": "^0.11.10",
|
||
"tslib": "^2.3.1",
|
||
"typescript": "^4.9.5",
|
||
"url": "^0.11.3"
|
||
},
|
||
"dependencies": {
|
||
"@mlc-ai/web-llm": "^0.2.80"
|
||
}
|
||
}
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/text-completion/src/text_completion.html
|
||
================================================
|
||
<!doctype html>
|
||
<html>
|
||
<script>
|
||
webLLMGlobal = {};
|
||
</script>
|
||
<body>
|
||
<h2>WebLLM Test Page</h2>
|
||
Open console to see output
|
||
<br />
|
||
<br />
|
||
<label id="init-label"> </label>
|
||
|
||
<h3>Prompt</h3>
|
||
<label id="prompt-label"> </label>
|
||
|
||
<h3>Response</h3>
|
||
<label id="generate-label"> </label>
|
||
<br />
|
||
<label id="stats-label"> </label>
|
||
|
||
<script type="module" src="./text_completion.ts"></script>
|
||
</body>
|
||
</html>
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/text-completion/src/text_completion.ts
|
||
================================================
|
||
import * as webllm from "@mlc-ai/web-llm";
|
||
|
||
function setLabel(id: string, text: string) {
|
||
const label = document.getElementById(id);
|
||
if (label == null) {
|
||
throw Error("Cannot find label " + id);
|
||
}
|
||
label.innerText = text;
|
||
}
|
||
|
||
async function main() {
|
||
const initProgressCallback = (report: webllm.InitProgressReport) => {
|
||
setLabel("init-label", report.text);
|
||
};
|
||
|
||
// Unlike "Llama-3.1-8B-Instruct-q4f32_1-MLC", this is a base model
|
||
const selectedModel = "Llama-3.1-8B-q4f32_1-MLC";
|
||
|
||
const appConfig: webllm.AppConfig = {
|
||
model_list: [
|
||
{
|
||
model: "https://huggingface.co/mlc-ai/Llama-3.1-8B-q4f32_1-MLC", // a base model
|
||
model_id: selectedModel,
|
||
model_lib:
|
||
webllm.modelLibURLPrefix +
|
||
webllm.modelVersion +
|
||
"/Llama-3_1-8B-Instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm",
|
||
overrides: {
|
||
context_window_size: 2048,
|
||
},
|
||
},
|
||
],
|
||
};
|
||
const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine(
|
||
selectedModel,
|
||
{
|
||
appConfig: appConfig,
|
||
initProgressCallback: initProgressCallback,
|
||
logLevel: "INFO",
|
||
},
|
||
);
|
||
|
||
const reply0 = await engine.completions.create({
|
||
prompt: "List 3 US states: ",
|
||
// below configurations are all optional
|
||
echo: true,
|
||
n: 2,
|
||
max_tokens: 64,
|
||
logprobs: true,
|
||
top_logprobs: 2,
|
||
});
|
||
console.log(reply0);
|
||
console.log(reply0.usage);
|
||
|
||
// To change model, either create a new engine via `CreateMLCEngine()`, or call `engine.reload(modelId)`
|
||
}
|
||
|
||
main();
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/vision-model/README.md
|
||
================================================
|
||
# WebLLM Get Started App
|
||
|
||
This folder provides a minimum demo to show WebLLM API in a webapp setting.
|
||
To try it out, you can do the following steps under this folder
|
||
|
||
```bash
|
||
npm install
|
||
npm start
|
||
```
|
||
|
||
Note if you would like to hack WebLLM core package.
|
||
You can change web-llm dependencies as `"file:../.."`, and follow the build from source
|
||
instruction in the project to build webllm locally. This option is only recommended
|
||
if you would like to hack WebLLM core package.
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/vision-model/package.json
|
||
================================================
|
||
{
|
||
"name": "get-started",
|
||
"version": "0.1.0",
|
||
"private": true,
|
||
"scripts": {
|
||
"start": "parcel src/vision_model.html --port 8888",
|
||
"build": "parcel build src/vision_model.html --dist-dir lib"
|
||
},
|
||
"devDependencies": {
|
||
"buffer": "^5.7.1",
|
||
"parcel": "^2.8.3",
|
||
"process": "^0.11.10",
|
||
"tslib": "^2.3.1",
|
||
"typescript": "^4.9.5",
|
||
"url": "^0.11.3"
|
||
},
|
||
"dependencies": {
|
||
"@mlc-ai/web-llm": "^0.2.80"
|
||
}
|
||
}
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/vision-model/src/utils.ts
|
||
================================================
|
||
export function getImageDataFromURL(url: string): Promise<ImageData> {
|
||
return new Promise((resolve, reject) => {
|
||
// Converts img to any, and later `as CanvasImageSource`, otherwise build complains
|
||
const img: any = new Image();
|
||
img.crossOrigin = "anonymous"; // Important for CORS
|
||
img.onload = () => {
|
||
const canvas: HTMLCanvasElement = document.createElement("canvas");
|
||
const ctx: CanvasRenderingContext2D = canvas.getContext("2d")!;
|
||
canvas.width = img.width;
|
||
canvas.height = img.height;
|
||
ctx.drawImage(img as CanvasImageSource, 0, 0);
|
||
|
||
const imageData = ctx.getImageData(0, 0, img.width, img.height);
|
||
resolve(imageData);
|
||
};
|
||
img.onerror = () => reject(new Error("Failed to load image"));
|
||
img.src = url;
|
||
});
|
||
}
|
||
|
||
export async function imageURLToBase64(url: string): Promise<string> {
|
||
const imageData: ImageData = await getImageDataFromURL(url);
|
||
const canvas = document.createElement("canvas");
|
||
const ctx = canvas.getContext("2d");
|
||
|
||
canvas.width = imageData.width;
|
||
canvas.height = imageData.height;
|
||
|
||
ctx!.putImageData(imageData, 0, 0);
|
||
|
||
return canvas.toDataURL();
|
||
}
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/vision-model/src/vision_model.html
|
||
================================================
|
||
<!doctype html>
|
||
<html>
|
||
<script>
|
||
webLLMGlobal = {};
|
||
</script>
|
||
<body>
|
||
<h2>WebLLM Test Page</h2>
|
||
Open console to see output
|
||
<br />
|
||
<br />
|
||
<label id="init-label"> </label>
|
||
|
||
<h3>Prompt</h3>
|
||
<label id="prompt-label"> </label>
|
||
|
||
<h3>Response</h3>
|
||
<label id="generate-label"> </label>
|
||
<br />
|
||
<label id="stats-label"> </label>
|
||
|
||
<script type="module" src="./vision_model.ts"></script>
|
||
</body>
|
||
</html>
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/vision-model/src/vision_model.ts
|
||
================================================
|
||
import * as webllm from "@mlc-ai/web-llm";
|
||
import { imageURLToBase64 } from "./utils";
|
||
|
||
function setLabel(id: string, text: string) {
|
||
const label = document.getElementById(id);
|
||
if (label == null) {
|
||
throw Error("Cannot find label " + id);
|
||
}
|
||
label.innerText = text;
|
||
}
|
||
|
||
const USE_WEB_WORKER = true;
|
||
|
||
const proxyUrl = "https://cors-anywhere.herokuapp.com/";
|
||
const url_https_street = "https://www.ilankelman.org/stopsigns/australia.jpg";
|
||
const url_https_tree = "https://www.ilankelman.org/sunset.jpg";
|
||
const url_https_sea =
|
||
"https://www.islandvulnerability.org/index/silhouette.jpg";
|
||
|
||
async function main() {
|
||
// can feed request with either base64 or http url
|
||
const url_base64_street = await imageURLToBase64(proxyUrl + url_https_street);
|
||
|
||
const initProgressCallback = (report: webllm.InitProgressReport) => {
|
||
setLabel("init-label", report.text);
|
||
};
|
||
const selectedModel = "Phi-3.5-vision-instruct-q4f16_1-MLC";
|
||
|
||
const engineConfig: webllm.MLCEngineConfig = {
|
||
initProgressCallback: initProgressCallback,
|
||
logLevel: "INFO", // specify the log level
|
||
};
|
||
const chatOpts = {
|
||
context_window_size: 6144,
|
||
};
|
||
|
||
const engine: webllm.MLCEngineInterface = USE_WEB_WORKER
|
||
? await webllm.CreateWebWorkerMLCEngine(
|
||
new Worker(new URL("./worker.ts", import.meta.url), {
|
||
type: "module",
|
||
}),
|
||
selectedModel,
|
||
engineConfig,
|
||
chatOpts,
|
||
)
|
||
: await webllm.CreateMLCEngine(selectedModel, engineConfig, chatOpts);
|
||
|
||
// 1. Prefill two images
|
||
const messages: webllm.ChatCompletionMessageParam[] = [
|
||
{
|
||
role: "user",
|
||
content: [
|
||
{ type: "text", text: "List the items in each image concisely." },
|
||
{
|
||
type: "image_url",
|
||
image_url: {
|
||
url: url_base64_street,
|
||
},
|
||
},
|
||
{
|
||
type: "image_url",
|
||
image_url: {
|
||
url: proxyUrl + url_https_sea,
|
||
},
|
||
},
|
||
],
|
||
},
|
||
];
|
||
const request0: webllm.ChatCompletionRequest = {
|
||
stream: false, // can be streaming, same behavior
|
||
messages: messages,
|
||
};
|
||
const reply0 = await engine.chat.completions.create(request0);
|
||
const replyMessage0 = await engine.getMessage();
|
||
console.log(reply0);
|
||
console.log(replyMessage0);
|
||
console.log(reply0.usage);
|
||
|
||
// 2. A follow up text-only question
|
||
messages.push({ role: "assistant", content: replyMessage0 });
|
||
messages.push({ role: "user", content: "What is special about each image?" });
|
||
const request1: webllm.ChatCompletionRequest = {
|
||
stream: false, // can be streaming, same behavior
|
||
messages: messages,
|
||
};
|
||
const reply1 = await engine.chat.completions.create(request1);
|
||
const replyMessage1 = await engine.getMessage();
|
||
console.log(reply1);
|
||
console.log(replyMessage1);
|
||
console.log(reply1.usage);
|
||
|
||
// 3. A follow up single-image question
|
||
messages.push({ role: "assistant", content: replyMessage1 });
|
||
messages.push({
|
||
role: "user",
|
||
content: [
|
||
{ type: "text", text: "What about this image? Answer concisely." },
|
||
{
|
||
type: "image_url",
|
||
image_url: { url: proxyUrl + url_https_tree },
|
||
},
|
||
],
|
||
});
|
||
const request2: webllm.ChatCompletionRequest = {
|
||
stream: false, // can be streaming, same behavior
|
||
messages: messages,
|
||
};
|
||
const reply2 = await engine.chat.completions.create(request2);
|
||
const replyMessage2 = await engine.getMessage();
|
||
console.log(reply2);
|
||
console.log(replyMessage2);
|
||
console.log(reply2.usage);
|
||
}
|
||
|
||
main();
|
||
|
||
|
||
|
||
================================================
|
||
FILE: examples/vision-model/src/worker.ts
|
||
================================================
|
||
import { WebWorkerMLCEngineHandler } from "@mlc-ai/web-llm";
|
||
|
||
const handler = new WebWorkerMLCEngineHandler();
|
||
|
||
self.onmessage = (msg: MessageEvent) => {
|
||
handler.onmessage(msg);
|
||
};
|
||
|
||
|