Spaces:

hari7261
/

TechChatBot

Runtime error

File size: 1,191 Bytes

2185ead
 
361b22f
2185ead
 
 
 
ffb6796
7e7d8a9
9e5e66c
4ee7dce
2185ead
 
 
ffb6796
2185ead
 
ffb6796
2185ead
 
 
 
 
 
 
 
 
 
 
 
ffb6796
2185ead
 
 
 
 
 
 
 
ffb6796
2185ead

import os
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

# Read Hugging Face token from environment variable
HF_TOKEN = os.getenv("HF_TOKEN")


MODEL_NAME = "mistralai/Mistral-7B-v0.1"

print("Loading tokenizer and model...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_auth_token=HF_TOKEN)
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, use_auth_token=HF_TOKEN)

device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)

def generate_text(prompt):
    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    outputs = model.generate(
        **inputs,
        max_length=150,
        do_sample=True,
        temperature=0.7,
        top_p=0.9,
        eos_token_id=tokenizer.eos_token_id
    )
    text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return text

# Gradio UI
iface = gr.Interface(
    fn=generate_text,
    inputs=gr.Textbox(lines=5, placeholder="Enter your prompt here..."),
    outputs="text",
    title="TechChat - Mistral 7B",
    description="Generate text with hari7261/TechChat model hosted on Hugging Face."
)

if __name__ == "__main__":
    iface.launch()