File size: 3,411 Bytes
a5ea6e6 e83210b a5ea6e6 33d5962 a5ea6e6 33d5962 a5ea6e6 33d5962 a5ea6e6 33d5962 a5ea6e6 33d5962 a5ea6e6 33d5962 a5ea6e6 e83210b a5ea6e6 33d5962 a5ea6e6 e83210b a5ea6e6 33d5962 a5ea6e6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 |
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import torch
import os
# --- Configuration (Read from Environment Variables) ---
# Get the model path from an environment variable.
model_path = os.environ.get("MODEL_PATH", "Athspi/Athspiv2new")
deepseek_tokenizer_path = os.environ.get("TOKENIZER_PATH", "deepseek-ai/DeepSeek-R1")
# Get the Hugging Face token from an environment variable (for gated models).
hf_token = os.environ.get("HF_TOKEN", None) # Default to None if not set
# --- Model and Tokenizer Loading ---
# Use try-except for robust error handling
try:
# Load the model. Assume a merged model.
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto", # Use GPU if available, otherwise CPU
torch_dtype=torch.float16, # Use float16 if supported
token=hf_token # Use the token from the environment variable
)
# Load the DeepSeek tokenizer
tokenizer = AutoTokenizer.from_pretrained(deepseek_tokenizer_path, token=hf_token)
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"
except OSError as e:
print(f"Error loading model or tokenizer: {e}")
print("Ensure MODEL_PATH and TOKENIZER_PATH environment variables are set correctly.")
print("If using a gated model, ensure HF_TOKEN is set correctly.")
exit() # Terminate the script if loading fails
# --- Chat Function ---
def chat_with_llm(prompt, history):
"""Generates a response from the LLM, handling history correctly."""
formatted_prompt = ""
if history:
for item in history:
if item["role"] == "user":
formatted_prompt += f"{tokenizer.bos_token}{item['content']}{tokenizer.eos_token}"
elif item["role"] == "assistant":
formatted_prompt += f"{item['content']}{tokenizer.eos_token}"
formatted_prompt += f"{tokenizer.bos_token}{prompt}{tokenizer.eos_token}"
try:
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, device_map="auto")
result = pipe(
formatted_prompt,
max_new_tokens=256,
do_sample=True,
temperature=0.7,
top_p=0.95,
top_k=50,
return_full_text=False,
pad_token_id=tokenizer.eos_token_id,
)
response = result[0]['generated_text'].strip()
return response
except Exception as e:
return f"Error during generation: {e}"
# --- Gradio Interface ---
# Use the 'messages' format for chatbot
def predict(message, history):
history = history or []
response = chat_with_llm(message, history)
history.append({"role": "user", "content": message})
history.append({"role": "assistant", "content": response})
return "", history
with gr.Blocks() as demo:
chatbot = gr.Chatbot(label="Athspi Chat", height=500, show_label=True,
value=[{"role": "assistant", "content": "Hi! I'm Athspi. How can I help you today?"}],
type="messages") # Set type to "messages"
msg = gr.Textbox(label="Your Message", placeholder="Type your message here...")
clear = gr.Button("Clear")
msg.submit(predict, [msg, chatbot], [msg, chatbot])
clear.click(lambda: [], [], chatbot, queue=False)
demo.launch(share=True) |