ITR / app.py
Deepan13's picture
with some changes
aff08a4
raw
history blame
4.56 kB
import gradio as gr
from transformers import AutoTokenizer
import transformers
import torch
import os
import time
# Model configuration
MODEL_NAME = "meta-llama/CodeLlama-7b-hf"
# Default example prompts
EXAMPLES = [
["import socket\n\ndef ping_exponential_backoff(host: str):"],
["def fibonacci(n: int) -> int:"],
["class BinarySearchTree:\n def __init__(self):"],
["async def fetch_data(url: str):"]
]
# Load model with error handling
def load_model():
try:
print("Loading model and tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
# Configure the pipeline based on available resources
# Hugging Face Spaces typically have GPU available
pipeline = transformers.pipeline(
"text-generation",
model=MODEL_NAME,
torch_dtype=torch.float16,
device_map="auto",
)
print("Model loaded successfully!")
return tokenizer, pipeline
except Exception as e:
print(f"Error loading model: {str(e)}")
# Return None to indicate failure
return None, None
# Generate code based on the prompt
def generate_code(prompt, max_length=200, temperature=0.1, top_p=0.95, top_k=10):
try:
# Check if model is loaded
if tokenizer is None or pipeline is None:
return "Error: Model failed to load. Please check the logs."
# Add a loading message
start_time = time.time()
# Generate the code
sequences = pipeline(
prompt,
do_sample=True,
top_k=top_k,
temperature=temperature,
top_p=top_p,
num_return_sequences=1,
eos_token_id=tokenizer.eos_token_id,
max_length=max_length,
)
# Calculate generation time
generation_time = time.time() - start_time
# Format the result
result = sequences[0]['generated_text']
return f"{result}\n\n---\nGeneration time: {generation_time:.2f} seconds"
except Exception as e:
return f"Error generating code: {str(e)}"
# Load the model and tokenizer
print("Initializing CodeLlama...")
tokenizer, pipeline = load_model()
# Create the Gradio interface
with gr.Blocks(title="CodeLlama Code Generation") as demo:
gr.Markdown("# CodeLlama Code Generation")
gr.Markdown("Enter a code prompt and CodeLlama will complete it for you.")
with gr.Row():
with gr.Column():
prompt = gr.Textbox(
label="Code Prompt",
placeholder="Enter your code prompt here...",
lines=5
)
with gr.Row():
max_length = gr.Slider(
minimum=50,
maximum=500,
value=200,
step=10,
label="Max Length"
)
temperature = gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.1,
step=0.1,
label="Temperature"
)
with gr.Row():
top_p = gr.Slider(
minimum=0.5,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-p"
)
top_k = gr.Slider(
minimum=1,
maximum=50,
value=10,
step=1,
label="Top-k"
)
generate_btn = gr.Button("Generate Code")
with gr.Column():
output = gr.Textbox(
label="Generated Code",
lines=20
)
# Connect the button to the generate function
generate_btn.click(
fn=generate_code,
inputs=[prompt, max_length, temperature, top_p, top_k],
outputs=output
)
# Add examples
gr.Examples(
examples=EXAMPLES,
inputs=prompt
)
# Add information about the model
gr.Markdown("""
## About
This demo uses the CodeLlama-7b model to generate code completions based on your prompts.
- **Max Length**: Controls the maximum length of the generated text
- **Temperature**: Controls randomness (lower = more deterministic)
- **Top-p**: Controls diversity via nucleus sampling
- **Top-k**: Controls diversity via top-k sampling
Created by DheepLearning
""")
# Launch the app
demo.launch()