import os import gradio as gr import torch from transformers import AutoTokenizer, AutoModelForCausalLM # Read Hugging Face token from environment variable HF_TOKEN = os.getenv("HF_TOKEN") MODEL_NAME = "mistralai/Mistral-7B-v0.1" print("Loading tokenizer and model...") tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_auth_token=HF_TOKEN) model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, use_auth_token=HF_TOKEN) device = "cuda" if torch.cuda.is_available() else "cpu" model.to(device) def generate_text(prompt): inputs = tokenizer(prompt, return_tensors="pt").to(device) outputs = model.generate( **inputs, max_length=150, do_sample=True, temperature=0.7, top_p=0.9, eos_token_id=tokenizer.eos_token_id ) text = tokenizer.decode(outputs[0], skip_special_tokens=True) return text # Gradio UI iface = gr.Interface( fn=generate_text, inputs=gr.Textbox(lines=5, placeholder="Enter your prompt here..."), outputs="text", title="TechChat - Mistral 7B", description="Generate text with hari7261/TechChat model hosted on Hugging Face." ) if __name__ == "__main__": iface.launch()