Spaces:

Zakia
/

deepseek-r1-demo

Sleeping

deepseek-r1-demo / app.py

Update app.py

2fa9a9c verified 4 months ago

1.23 kB

	import gradio as gr
	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig

	# Select the best distill model for Hugging Face Spaces
	model_name = "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"

	# Load tokenizer
	tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)

	# Load model with quantization for optimized performance
	quantization_config = BitsAndBytesConfig(load_in_8bit=True)
	model = AutoModelForCausalLM.from_pretrained(
	model_name,
	quantization_config=quantization_config,
	device_map="auto",
	trust_remote_code=True
	)

	# Define the text generation function
	def generate_response(prompt):
	inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
	with torch.no_grad():
	output = model.generate(**inputs, max_length=150)
	return tokenizer.decode(output[0], skip_special_tokens=True)

	# Set up Gradio UI
	interface = gr.Interface(
	fn=generate_response,
	inputs=gr.Textbox(label="Enter your prompt"),
	outputs=gr.Textbox(label="AI Response"),
	title="DeepSeek-R1 Distilled LLaMA Chatbot",
	description="Enter a prompt and receive a response from DeepSeek-R1-Distill-Llama-8B."
	)

	# Launch the app
	interface.launch()