Spaces:
Sleeping
Sleeping
File size: 619 Bytes
44feadc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 |
import streamlit as st
from llama_cpp import Llama
@st.cache_resource
def load_model():
return Llama(
model_path="cybertron-v4-qw7B-MGS-IQ2_M.gguf",
n_ctx=2048,
n_threads=8,
n_gpu_layers=20
)
llm = load_model()
st.title("Cybertron Chat")
prompt = st.text_input("Ask a question:")
if prompt:
with st.spinner("Generating response..."):
response = llm.create_chat_completion(
messages=[{"role": "user", "content": prompt}],
temperature=0.7,
max_tokens=256
)
st.write(response["choices"][0]["message"]["content"])
|