Requires Ollama running locally on port 11434 with the llama3.1 model. Will not work on GitHub Pages.

Code

const chat = new quikchat('#chat', ollamaStreamingCallback, {
  theme: 'quikchat-theme-light',
  titleArea: { title: 'Memory Chat', show: true, align: 'left' }
});

function ollamaStreamingCallback(chatInstance, userInput) {
  const startPrompt = {
    content: 'You are a skilled assistant.',
    role: 'system'
  };
  let start = true, id;
  chatInstance.messageAddNew(userInput, 'user', 'right');

  fetch('http://localhost:11434/api/chat', {
    method: 'POST',
    headers: { 'Content-Type': 'application/json' },
    body: JSON.stringify({
      model: 'llama3.1',
      // historyGet() provides full conversation context
      messages: [startPrompt, ...chatInstance.historyGet()],
      stream: true
    })
  })
  .then(r => r.body.getReader())
  .then(reader => {
    return reader.read().then(function process(result) {
      if (result.done) return;
      const data = JSON.parse(new TextDecoder().decode(result.value).trim());
      const content = data.message.content;
      if (start) {
        id = chatInstance.messageAddNew(content, 'bot', 'left');
        start = false;
      } else {
        chatInstance.messageAppendContent(id, content);
      }
      return reader.read().then(process);
    });
  });
}