Spaces:
Sleeping
Sleeping
Sébastien De Greef
commited on
Commit
·
95888be
1
Parent(s):
40802c2
Add new LLM models and update main.py to support model selection
Browse files- llama38b.Modelfile +17 -0
- main.py +39 -7
- mistral7b.Modelfile +7 -0
- models.txt +6 -0
- start_server.sh +6 -2
llama38b.Modelfile
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM llama3:8b
|
| 2 |
+
|
| 3 |
+
TEMPLATE """{{ if .System }}<|start_header_id|>system<|end_header_id|>
|
| 4 |
+
|
| 5 |
+
{{ .System }}<|eot_id|>{{ end }}{{ if .Prompt }}<|start_header_id|>user<|end_header_id|>
|
| 6 |
+
|
| 7 |
+
{{ .Prompt }}<|eot_id|>{{ end }}<|start_header_id|>assistant<|end_header_id|>
|
| 8 |
+
|
| 9 |
+
{{ .Response }}<|eot_id|>"""
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
PARAMETER num_ctx 8192
|
| 13 |
+
|
| 14 |
+
PARAMETER stop "<|start_header_id|>"
|
| 15 |
+
PARAMETER stop "<|end_header_id|>"
|
| 16 |
+
PARAMETER stop "<|eot_id|>"
|
| 17 |
+
PARAMETER stop "<|reserved_special_token"
|
main.py
CHANGED
|
@@ -2,9 +2,25 @@ from langchain.schema import AIMessage, HumanMessage
|
|
| 2 |
import gradio as gr
|
| 3 |
from langchain_community.llms import Ollama
|
| 4 |
|
| 5 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
|
| 7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
history_langchain_format = []
|
| 9 |
for human, ai in history:
|
| 10 |
history_langchain_format.append(HumanMessage(content=human))
|
|
@@ -12,15 +28,31 @@ def predict(message, history):
|
|
| 12 |
history_langchain_format.append(HumanMessage(content=message))
|
| 13 |
try:
|
| 14 |
chat_response = llm.invoke(history_langchain_format)
|
| 15 |
-
except
|
| 16 |
chat_response = "Error: " + str(e)
|
| 17 |
|
| 18 |
return chat_response
|
| 19 |
|
| 20 |
-
def run():
|
| 21 |
-
demo = gr.ChatInterface(predict)
|
| 22 |
-
demo.launch(server_name="0.0.0.0", server_port=7860)
|
| 23 |
|
| 24 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
if __name__ == "__main__":
|
| 26 |
-
|
|
|
|
| 2 |
import gradio as gr
|
| 3 |
from langchain_community.llms import Ollama
|
| 4 |
|
| 5 |
+
def parse_model_names(path):
|
| 6 |
+
"""Parses the model file to extract value-label pairs for the dropdown."""
|
| 7 |
+
choices = []
|
| 8 |
+
with open(path, 'r') as file:
|
| 9 |
+
lines = file.readlines()
|
| 10 |
+
for line in lines:
|
| 11 |
+
if '#' in line:
|
| 12 |
+
value, description = line.split('#', 1)
|
| 13 |
+
value = value.strip()
|
| 14 |
+
description = description.strip()
|
| 15 |
+
choices.append((description, value))
|
| 16 |
+
return choices
|
| 17 |
|
| 18 |
+
models = parse_model_names("models.txt")
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
def predict(message, history, model):
|
| 22 |
+
print("Predicting", message, history, models[model][1]),
|
| 23 |
+
llm = Ollama(model=models[model][1], timeout=1000) # Instantiate Ollama with the selected model
|
| 24 |
history_langchain_format = []
|
| 25 |
for human, ai in history:
|
| 26 |
history_langchain_format.append(HumanMessage(content=human))
|
|
|
|
| 28 |
history_langchain_format.append(HumanMessage(content=message))
|
| 29 |
try:
|
| 30 |
chat_response = llm.invoke(history_langchain_format)
|
| 31 |
+
except Exception as e: # Use a general exception handler here
|
| 32 |
chat_response = "Error: " + str(e)
|
| 33 |
|
| 34 |
return chat_response
|
| 35 |
|
|
|
|
|
|
|
|
|
|
| 36 |
|
| 37 |
|
| 38 |
+
with gr.Blocks(fill_height=True) as demo:
|
| 39 |
+
with gr.Row():
|
| 40 |
+
model_dropdown = gr.Dropdown(label="Select LLM Model", choices=models, info="Select the model you want to chat with", type="index")
|
| 41 |
+
|
| 42 |
+
# We use a state variable to track the current model
|
| 43 |
+
model_state = gr.State(value=model_dropdown.value)
|
| 44 |
+
|
| 45 |
+
def update_model(selected_model):
|
| 46 |
+
print("Model selected", selected_model)
|
| 47 |
+
model_state.value = selected_model
|
| 48 |
+
return selected_model
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
chat = gr.ChatInterface(predict,
|
| 52 |
+
additional_inputs=[ model_dropdown ],
|
| 53 |
+
|
| 54 |
+
)
|
| 55 |
+
|
| 56 |
+
|
| 57 |
if __name__ == "__main__":
|
| 58 |
+
demo.launch(server_name="0.0.0.0", server_port=7860)
|
mistral7b.Modelfile
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM mistral:7b
|
| 2 |
+
Wh
|
| 3 |
+
TEMPLATE """[INST] {{ .System }} {{ .Prompt }} [/INST]"""
|
| 4 |
+
PARAMETER stop "[INST]"
|
| 5 |
+
PARAMETER stop "[/INST]"
|
| 6 |
+
|
| 7 |
+
PARAMETER num_ctx 4096
|
models.txt
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
gemma:2b # Gemma 2b
|
| 2 |
+
gemma:7b # Gemma 7b
|
| 3 |
+
mistral:7b # Mistral 7b
|
| 4 |
+
mistral4k:7b # Mistral 7b (4096 context)
|
| 5 |
+
llama3:8b # Llama3 8b
|
| 6 |
+
llama38k:8b # Llama3 8b (8192 context)
|
start_server.sh
CHANGED
|
@@ -1,9 +1,13 @@
|
|
| 1 |
#!/bin/bash
|
| 2 |
-
# Start the background task
|
| 3 |
ollama serve &
|
| 4 |
|
| 5 |
ollama pull mistral:7b > /dev/null 2>&1
|
|
|
|
|
|
|
| 6 |
ollama pull llama3:8b > /dev/null 2>&1
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
|
| 8 |
-
# Start the Gradio app
|
| 9 |
python main.py
|
|
|
|
| 1 |
#!/bin/bash
|
|
|
|
| 2 |
ollama serve &
|
| 3 |
|
| 4 |
ollama pull mistral:7b > /dev/null 2>&1
|
| 5 |
+
ollama create mistral4k:7b --file .\mistral7b.Modelfile > /dev/null 2>&1
|
| 6 |
+
|
| 7 |
ollama pull llama3:8b > /dev/null 2>&1
|
| 8 |
+
ollama create llama38k:8b --file .\llama38b.Modelfile > /dev/null 2>&1
|
| 9 |
+
|
| 10 |
+
ollama pull gemma:2b > /dev/null 2>&1
|
| 11 |
+
ollama pull gemma:7b > /dev/null 2>&1
|
| 12 |
|
|
|
|
| 13 |
python main.py
|