Local LLM via LM Studio with conversational memory. Uses the OpenAI-compatible /v1/chat/completions endpoint with token streaming.
Requires LM Studio running locally on port 1234 with a model loaded. Will not work on GitHub Pages.
const chat = new quikchat('#chat', lmStudioCallback, {
theme: 'quikchat-theme-light',
titleArea: { title: 'Memory Chat', show: true, align: 'left' }
});
function lmStudioCallback(chatInstance, userInput) {
let start = true, id;
chatInstance.messageAddNew(userInput, 'user', 'right');
fetch('http://localhost:1234/v1/chat/completions', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
model: 'lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF',
messages: [
{ role: 'system', content: 'You are a skilled assistant.' },
...chatInstance.historyGet()
],
stream: true
})
})
.then(r => r.body.getReader())
.then(reader => {
// Parse SSE stream (OpenAI-compatible format)
return reader.read().then(function process(result) {
if (result.done) return;
let line = new TextDecoder().decode(result.value).replace('data: ', '');
try {
const data = JSON.parse(line.trim());
const content = data.choices[0]?.delta?.content;
if (content) {
if (start) {
id = chatInstance.messageAddNew(content, 'bot', 'left');
start = false;
} else {
chatInstance.messageAppendContent(id, content);
}
}
} catch (e) { /* partial chunk */ }
return reader.read().then(process);
});
});
}