import gradio as gr from transformers import AutoTokenizer import transformers import torch import os import time # Model configuration MODEL_NAME = "meta-llama/CodeLlama-7b-hf" # Default example prompts EXAMPLES = [ ["import socket\n\ndef ping_exponential_backoff(host: str):"], ["def fibonacci(n: int) -> int:"], ["class BinarySearchTree:\n def __init__(self):"], ["async def fetch_data(url: str):"] ] # Load model with error handling def load_model(): try: print("Loading model and tokenizer...") tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) # Configure the pipeline based on available resources # Hugging Face Spaces typically have GPU available pipeline = transformers.pipeline( "text-generation", model=MODEL_NAME, torch_dtype=torch.float16, device_map="auto", ) print("Model loaded successfully!") return tokenizer, pipeline except Exception as e: print(f"Error loading model: {str(e)}") # Return None to indicate failure return None, None # Generate code based on the prompt def generate_code(prompt, max_length=200, temperature=0.1, top_p=0.95, top_k=10): try: # Check if model is loaded if tokenizer is None or pipeline is None: return "Error: Model failed to load. Please check the logs." # Add a loading message start_time = time.time() # Generate the code sequences = pipeline( prompt, do_sample=True, top_k=top_k, temperature=temperature, top_p=top_p, num_return_sequences=1, eos_token_id=tokenizer.eos_token_id, max_length=max_length, ) # Calculate generation time generation_time = time.time() - start_time # Format the result result = sequences[0]['generated_text'] return f"{result}\n\n---\nGeneration time: {generation_time:.2f} seconds" except Exception as e: return f"Error generating code: {str(e)}" # Load the model and tokenizer print("Initializing CodeLlama...") tokenizer, pipeline = load_model() # Create the Gradio interface with gr.Blocks(title="CodeLlama Code Generation") as demo: gr.Markdown("# CodeLlama Code Generation") gr.Markdown("Enter a code prompt and CodeLlama will complete it for you.") with gr.Row(): with gr.Column(): prompt = gr.Textbox( label="Code Prompt", placeholder="Enter your code prompt here...", lines=5 ) with gr.Row(): max_length = gr.Slider( minimum=50, maximum=500, value=200, step=10, label="Max Length" ) temperature = gr.Slider( minimum=0.1, maximum=1.0, value=0.1, step=0.1, label="Temperature" ) with gr.Row(): top_p = gr.Slider( minimum=0.5, maximum=1.0, value=0.95, step=0.05, label="Top-p" ) top_k = gr.Slider( minimum=1, maximum=50, value=10, step=1, label="Top-k" ) generate_btn = gr.Button("Generate Code") with gr.Column(): output = gr.Textbox( label="Generated Code", lines=20 ) # Connect the button to the generate function generate_btn.click( fn=generate_code, inputs=[prompt, max_length, temperature, top_p, top_k], outputs=output ) # Add examples gr.Examples( examples=EXAMPLES, inputs=prompt ) # Add information about the model gr.Markdown(""" ## About This demo uses the CodeLlama-7b model to generate code completions based on your prompts. - **Max Length**: Controls the maximum length of the generated text - **Temperature**: Controls randomness (lower = more deterministic) - **Top-p**: Controls diversity via nucleus sampling - **Top-k**: Controls diversity via top-k sampling Created by DheepLearning """) # Launch the app demo.launch()