Spaces:
Sleeping
Sleeping
from flask import Flask, request, render_template | |
from transformers import pipeline, AutoTokenizer | |
import torch | |
app = Flask(__name__) | |
# Load a lightweight model (e.g., Zephyr-7B, Mistral-7B) | |
model_name = "mistralai/Mistral-7B-Instruct-v0.2" | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
chatbot = pipeline( | |
"text-generation", | |
model=model_name, | |
tokenizer=tokenizer, | |
torch_dtype=torch.float16, | |
device_map="auto" # Uses GPU if available | |
) | |
def home(): | |
if request.method == "POST": | |
user_input = request.form["user_input"] | |
response = generate_response(user_input) | |
return render_template("index.html", user_input=user_input, bot_response=response) | |
return render_template("index.html") | |
def generate_response(prompt): | |
# Format prompt for instruction-following models | |
messages = [{"role": "user", "content": prompt}] | |
prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) | |
# Generate response | |
outputs = chatbot( | |
prompt, | |
max_new_tokens=256, | |
do_sample=True, | |
temperature=0.7, | |
top_k=50, | |
top_p=0.95 | |
) | |
return outputs[0]["generated_text"][len(prompt):] # Extract only the bot's reply | |
if __name__ == "__main__": | |
app.run(host="0.0.0.0", port=5000, debug=True) |