Spaces:
Sleeping
Sleeping
import gradio as gr | |
from transformers import AutoTokenizer | |
import transformers | |
import torch | |
import os | |
import time | |
# Model configuration | |
MODEL_NAME = "meta-llama/CodeLlama-7b-hf" | |
# Default example prompts | |
EXAMPLES = [ | |
["import socket\n\ndef ping_exponential_backoff(host: str):"], | |
["def fibonacci(n: int) -> int:"], | |
["class BinarySearchTree:\n def __init__(self):"], | |
["async def fetch_data(url: str):"] | |
] | |
# Load model with error handling | |
def load_model(): | |
try: | |
print("Loading model and tokenizer...") | |
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) | |
# Configure the pipeline based on available resources | |
# Hugging Face Spaces typically have GPU available | |
pipeline = transformers.pipeline( | |
"text-generation", | |
model=MODEL_NAME, | |
torch_dtype=torch.float16, | |
device_map="auto", | |
) | |
print("Model loaded successfully!") | |
return tokenizer, pipeline | |
except Exception as e: | |
print(f"Error loading model: {str(e)}") | |
# Return None to indicate failure | |
return None, None | |
# Generate code based on the prompt | |
def generate_code(prompt, max_length=200, temperature=0.1, top_p=0.95, top_k=10): | |
try: | |
# Check if model is loaded | |
if tokenizer is None or pipeline is None: | |
return "Error: Model failed to load. Please check the logs." | |
# Add a loading message | |
start_time = time.time() | |
# Generate the code | |
sequences = pipeline( | |
prompt, | |
do_sample=True, | |
top_k=top_k, | |
temperature=temperature, | |
top_p=top_p, | |
num_return_sequences=1, | |
eos_token_id=tokenizer.eos_token_id, | |
max_length=max_length, | |
) | |
# Calculate generation time | |
generation_time = time.time() - start_time | |
# Format the result | |
result = sequences[0]['generated_text'] | |
return f"{result}\n\n---\nGeneration time: {generation_time:.2f} seconds" | |
except Exception as e: | |
return f"Error generating code: {str(e)}" | |
# Load the model and tokenizer | |
print("Initializing CodeLlama...") | |
tokenizer, pipeline = load_model() | |
# Create the Gradio interface | |
with gr.Blocks(title="CodeLlama Code Generation") as demo: | |
gr.Markdown("# CodeLlama Code Generation") | |
gr.Markdown("Enter a code prompt and CodeLlama will complete it for you.") | |
with gr.Row(): | |
with gr.Column(): | |
prompt = gr.Textbox( | |
label="Code Prompt", | |
placeholder="Enter your code prompt here...", | |
lines=5 | |
) | |
with gr.Row(): | |
max_length = gr.Slider( | |
minimum=50, | |
maximum=500, | |
value=200, | |
step=10, | |
label="Max Length" | |
) | |
temperature = gr.Slider( | |
minimum=0.1, | |
maximum=1.0, | |
value=0.1, | |
step=0.1, | |
label="Temperature" | |
) | |
with gr.Row(): | |
top_p = gr.Slider( | |
minimum=0.5, | |
maximum=1.0, | |
value=0.95, | |
step=0.05, | |
label="Top-p" | |
) | |
top_k = gr.Slider( | |
minimum=1, | |
maximum=50, | |
value=10, | |
step=1, | |
label="Top-k" | |
) | |
generate_btn = gr.Button("Generate Code") | |
with gr.Column(): | |
output = gr.Textbox( | |
label="Generated Code", | |
lines=20 | |
) | |
# Connect the button to the generate function | |
generate_btn.click( | |
fn=generate_code, | |
inputs=[prompt, max_length, temperature, top_p, top_k], | |
outputs=output | |
) | |
# Add examples | |
gr.Examples( | |
examples=EXAMPLES, | |
inputs=prompt | |
) | |
# Add information about the model | |
gr.Markdown(""" | |
## About | |
This demo uses the CodeLlama-7b model to generate code completions based on your prompts. | |
- **Max Length**: Controls the maximum length of the generated text | |
- **Temperature**: Controls randomness (lower = more deterministic) | |
- **Top-p**: Controls diversity via nucleus sampling | |
- **Top-k**: Controls diversity via top-k sampling | |
Created by DheepLearning | |
""") | |
# Launch the app | |
demo.launch() | |