Spaces:

Zakia
/

deepseek-r1-demo

Sleeping

deepseek-r1-demo / app.py

Update app.py

cda3c49 verified 4 months ago

1.43 kB

	import gradio as gr
	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig

	# Use a more compatible DeepSeek model
	model_name = "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"

	# Load tokenizer
	tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)

	# Fix quantization issue by using 4-bit
	quantization_config = BitsAndBytesConfig(
	load_in_4bit=True, # Use 4-bit instead of 8-bit
	bnb_4bit_compute_dtype=torch.float16, # Use FP16 for better compatibility
	bnb_4bit_use_double_quant=True, # Enable double quantization for efficiency
	)

	# Load model with optimized quantization
	model = AutoModelForCausalLM.from_pretrained(
	model_name,
	device_map="auto",
	quantization_config=quantization_config,
	trust_remote_code=True
	)

	# Define text generation function
	def generate_response(prompt):
	inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
	with torch.no_grad():
	output = model.generate(**inputs, max_length=150)
	return tokenizer.decode(output[0], skip_special_tokens=True)

	# Set up Gradio UI
	interface = gr.Interface(
	fn=generate_response,
	inputs=gr.Textbox(label="Enter your prompt"),
	outputs=gr.Textbox(label="AI Response"),
	title="DeepSeek-R1 Distill LLaMA Chatbot",
	description="Enter a prompt and receive a response from DeepSeek-R1-Distill-Llama-8B."
	)

	# Launch the app
	interface.launch()