Spaces:

George-API
/

qwen4bit

Sleeping

App Files Files Community

qwen4bit / app.py

George-API

Upload app.py with huggingface_hub

3f27b41 verified 4 months ago

raw

history blame

2.24 kB

	import gradio as gr
	import os
	from dotenv import load_dotenv

	# Load environment variables
	load_dotenv()

	# Model details
	MODEL_NAME = "unsloth/DeepSeek-R1-Distill-Qwen-14B-bnb-4bit"
	SPACE_NAME = os.getenv("HF_SPACE_NAME", "qwen4bit")

	def generate_response(prompt, max_new_tokens=256):
	"""
	This is a placeholder function that will be replaced with actual model inference
	after fine-tuning is complete.
	"""
	# Currently returns a placeholder message
	return f"""[Placeholder Response]
	This is a demo of the {MODEL_NAME} model.
	Once fine-tuning is complete, this will respond to:
	"{prompt}"

	This space will be updated with the fine-tuned model."""

	# Create the Gradio interface
	with gr.Blocks(title=f"Fine-tuned {MODEL_NAME}") as demo:
	gr.Markdown(f"""
	# Fine-tuned DeepSeek-R1-Distill-Qwen-14B Model

	This space will host the fine-tuned version of `{MODEL_NAME}` once training is complete.

	Model Details:
	- Base model: `{MODEL_NAME}`
	- Fine-tuned on: `phi4-cognitive-dataset`
	- 4-bit quantized (already, not further quantized)

	Current Status: Preparing for fine-tuning
	""")

	with gr.Row():
	with gr.Column():
	input_text = gr.Textbox(
	label="Enter your prompt",
	placeholder="Type your prompt here...",
	lines=4
	)
	max_tokens = gr.Slider(
	minimum=32,
	maximum=1024,
	value=256,
	step=32,
	label="Max new tokens"
	)
	submit_btn = gr.Button("Generate Response")

	with gr.Column():
	output_text = gr.Textbox(
	label="Model Response",
	lines=10
	)

	submit_btn.click(
	fn=generate_response,
	inputs=[input_text, max_tokens],
	outputs=output_text
	)

	gr.Markdown("""
	### Note
	This is a placeholder application. The actual fine-tuned model will be deployed
	to this space once training is complete.
	""")

	# Launch the app
	if __name__ == "__main__":
	demo.launch()