import gradio as gr from transformers import AutoTokenizer import transformers import torch import os import time # Model configuration MODEL_NAME = "meta-llama/CodeLlama-7b-hf" # Using CodeLlama as requested # Default example prompts EXAMPLES = [ ["import socket\n\ndef ping_exponential_backoff(host: str):"], ["def fibonacci(n: int) -> int:"], ["class BinarySearchTree:\n def __init__(self):"], ["async def fetch_data(url: str):"] ] # Load model with error handling and authentication def load_model(): try: print("Loading model and tokenizer...") # Get Hugging Face token from environment variable # This will be set in the Hugging Face Space settings hf_token = os.environ.get("HF_TOKEN") # If running locally and token is not set, try to use the token from git config if not hf_token: try: # Extract token from git config if available import subprocess git_url = subprocess.check_output(["git", "config", "--get", "remote.origin.url"]).decode().strip() if "@huggingface.co" in git_url: # Extract token from URL if it's in the format https://username:token@huggingface.co/... hf_token = git_url.split(":")[-2].split("/")[-1] if ":" in git_url else None if hf_token: print("Using token from git config") except Exception as e: print(f"Could not extract token from git config: {str(e)}") # Load tokenizer with token if available if hf_token: print("Using Hugging Face token for authentication") tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=hf_token) # Configure the pipeline with token pipeline = transformers.pipeline( "text-generation", model=MODEL_NAME, torch_dtype=torch.float16, device_map="auto", token=hf_token ) else: # Try without token (will only work if model is public or user is logged in) print("No Hugging Face token found, trying without authentication") tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) pipeline = transformers.pipeline( "text-generation", model=MODEL_NAME, torch_dtype=torch.float16, device_map="auto", ) print("Model loaded successfully!") return tokenizer, pipeline except Exception as e: print(f"Error loading model: {str(e)}") # Try to provide more helpful error message if "gated repo" in str(e) or "401" in str(e): print("\nIMPORTANT: CodeLlama is a gated model that requires authentication.") print("To use this model, you need to:") print("1. Accept the model's license at https://huggingface.co/meta-llama/CodeLlama-7b-hf") print("2. Set your Hugging Face token in the Space's settings") print(" (Settings > Repository Secrets > Add > HF_TOKEN)") # Return None to indicate failure return None, None # Generate code based on the prompt def generate_code(prompt, max_length=200, temperature=0.1, top_p=0.95, top_k=10): try: # Check if model is loaded if tokenizer is None or pipeline is None: return "Error: Model failed to load. Please check the logs." # Add a loading message start_time = time.time() # Generate the code sequences = pipeline( prompt, do_sample=True, top_k=top_k, temperature=temperature, top_p=top_p, num_return_sequences=1, eos_token_id=tokenizer.eos_token_id, max_length=max_length, ) # Calculate generation time generation_time = time.time() - start_time # Format the result result = sequences[0]['generated_text'] return f"{result}\n\n---\nGeneration time: {generation_time:.2f} seconds" except Exception as e: return f"Error generating code: {str(e)}" # Load the model and tokenizer print("Initializing CodeLlama-7b...") tokenizer, pipeline = load_model() # Create the Gradio interface with gr.Blocks(title="CodeLlama Code Generation") as demo: gr.Markdown("# CodeLlama Code Generation") gr.Markdown("Enter a code prompt and CodeLlama will complete it for you.") # Add a note about authentication if needed if tokenizer is None or pipeline is None: gr.Markdown(""" ## ⚠️ Authentication Required This demo requires authentication to access the CodeLlama model. To use this model, you need to: 1. Accept the model's license at [meta-llama/CodeLlama-7b-hf](https://huggingface.co/meta-llama/CodeLlama-7b-hf) 2. Set your Hugging Face token in the Space's settings (Settings > Repository Secrets > Add > HF_TOKEN) The demo will show a limited interface until authentication is set up. """) with gr.Row(): with gr.Column(): prompt = gr.Textbox( label="Code Prompt", placeholder="Enter your code prompt here...", lines=5 ) with gr.Row(): max_length = gr.Slider( minimum=50, maximum=500, value=200, step=10, label="Max Length" ) temperature = gr.Slider( minimum=0.1, maximum=1.0, value=0.1, step=0.1, label="Temperature" ) with gr.Row(): top_p = gr.Slider( minimum=0.5, maximum=1.0, value=0.95, step=0.05, label="Top-p" ) top_k = gr.Slider( minimum=1, maximum=50, value=10, step=1, label="Top-k" ) generate_btn = gr.Button("Generate Code") with gr.Column(): output = gr.Textbox( label="Generated Code", lines=20 ) # Connect the button to the generate function generate_btn.click( fn=generate_code, inputs=[prompt, max_length, temperature, top_p, top_k], outputs=output ) # Add examples gr.Examples( examples=EXAMPLES, inputs=prompt ) # Add information about the model gr.Markdown(""" ## About This demo uses the CodeLlama-7b model to generate code completions based on your prompts. - **Max Length**: Controls the maximum length of the generated text - **Temperature**: Controls randomness (lower = more deterministic) - **Top-p**: Controls diversity via nucleus sampling - **Top-k**: Controls diversity via top-k sampling **Note**: CodeLlama is a gated model that requires authentication. If you're seeing authentication errors, please follow the instructions at the top of the page. Created by DheepLearning """) # Launch the app demo.launch()