qwen4bit / app.py
George-API's picture
Upload app.py with huggingface_hub
3f27b41 verified
raw
history blame
2.24 kB
import gradio as gr
import os
from dotenv import load_dotenv
# Load environment variables
load_dotenv()
# Model details
MODEL_NAME = "unsloth/DeepSeek-R1-Distill-Qwen-14B-bnb-4bit"
SPACE_NAME = os.getenv("HF_SPACE_NAME", "qwen4bit")
def generate_response(prompt, max_new_tokens=256):
"""
This is a placeholder function that will be replaced with actual model inference
after fine-tuning is complete.
"""
# Currently returns a placeholder message
return f"""[Placeholder Response]
This is a demo of the {MODEL_NAME} model.
Once fine-tuning is complete, this will respond to:
"{prompt}"
This space will be updated with the fine-tuned model."""
# Create the Gradio interface
with gr.Blocks(title=f"Fine-tuned {MODEL_NAME}") as demo:
gr.Markdown(f"""
# Fine-tuned DeepSeek-R1-Distill-Qwen-14B Model
This space will host the fine-tuned version of `{MODEL_NAME}` once training is complete.
**Model Details**:
- Base model: `{MODEL_NAME}`
- Fine-tuned on: `phi4-cognitive-dataset`
- 4-bit quantized (already, not further quantized)
**Current Status**: Preparing for fine-tuning
""")
with gr.Row():
with gr.Column():
input_text = gr.Textbox(
label="Enter your prompt",
placeholder="Type your prompt here...",
lines=4
)
max_tokens = gr.Slider(
minimum=32,
maximum=1024,
value=256,
step=32,
label="Max new tokens"
)
submit_btn = gr.Button("Generate Response")
with gr.Column():
output_text = gr.Textbox(
label="Model Response",
lines=10
)
submit_btn.click(
fn=generate_response,
inputs=[input_text, max_tokens],
outputs=output_text
)
gr.Markdown("""
### Note
This is a placeholder application. The actual fine-tuned model will be deployed
to this space once training is complete.
""")
# Launch the app
if __name__ == "__main__":
demo.launch()