ITR / app.py
Deepan13's picture
with some changes with access
14a4094
import gradio as gr
from transformers import AutoTokenizer
import transformers
import torch
import os
import time
# Model configuration
MODEL_NAME = "meta-llama/CodeLlama-7b-hf" # Using CodeLlama as requested
# Default example prompts
EXAMPLES = [
["import socket\n\ndef ping_exponential_backoff(host: str):"],
["def fibonacci(n: int) -> int:"],
["class BinarySearchTree:\n def __init__(self):"],
["async def fetch_data(url: str):"]
]
# Load model with error handling and authentication
def load_model():
try:
print("Loading model and tokenizer...")
# Get Hugging Face token from environment variable
# This will be set in the Hugging Face Space settings
hf_token = os.environ.get("HF_TOKEN")
# If running locally and token is not set, try to use the token from git config
if not hf_token:
try:
# Extract token from git config if available
import subprocess
git_url = subprocess.check_output(["git", "config", "--get", "remote.origin.url"]).decode().strip()
if "@huggingface.co" in git_url:
# Extract token from URL if it's in the format https://username:[email protected]/...
hf_token = git_url.split(":")[-2].split("/")[-1] if ":" in git_url else None
if hf_token:
print("Using token from git config")
except Exception as e:
print(f"Could not extract token from git config: {str(e)}")
# Load tokenizer with token if available
if hf_token:
print("Using Hugging Face token for authentication")
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=hf_token)
# Configure the pipeline with token
pipeline = transformers.pipeline(
"text-generation",
model=MODEL_NAME,
torch_dtype=torch.float16,
device_map="auto",
token=hf_token
)
else:
# Try without token (will only work if model is public or user is logged in)
print("No Hugging Face token found, trying without authentication")
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
pipeline = transformers.pipeline(
"text-generation",
model=MODEL_NAME,
torch_dtype=torch.float16,
device_map="auto",
)
print("Model loaded successfully!")
return tokenizer, pipeline
except Exception as e:
print(f"Error loading model: {str(e)}")
# Try to provide more helpful error message
if "gated repo" in str(e) or "401" in str(e):
print("\nIMPORTANT: CodeLlama is a gated model that requires authentication.")
print("To use this model, you need to:")
print("1. Accept the model's license at https://huggingface.co/meta-llama/CodeLlama-7b-hf")
print("2. Set your Hugging Face token in the Space's settings")
print(" (Settings > Repository Secrets > Add > HF_TOKEN)")
# Return None to indicate failure
return None, None
# Generate code based on the prompt
def generate_code(prompt, max_length=200, temperature=0.1, top_p=0.95, top_k=10):
try:
# Check if model is loaded
if tokenizer is None or pipeline is None:
return "Error: Model failed to load. Please check the logs."
# Add a loading message
start_time = time.time()
# Generate the code
sequences = pipeline(
prompt,
do_sample=True,
top_k=top_k,
temperature=temperature,
top_p=top_p,
num_return_sequences=1,
eos_token_id=tokenizer.eos_token_id,
max_length=max_length,
)
# Calculate generation time
generation_time = time.time() - start_time
# Format the result
result = sequences[0]['generated_text']
return f"{result}\n\n---\nGeneration time: {generation_time:.2f} seconds"
except Exception as e:
return f"Error generating code: {str(e)}"
# Load the model and tokenizer
print("Initializing CodeLlama-7b...")
tokenizer, pipeline = load_model()
# Create the Gradio interface
with gr.Blocks(title="CodeLlama Code Generation") as demo:
gr.Markdown("# CodeLlama Code Generation")
gr.Markdown("Enter a code prompt and CodeLlama will complete it for you.")
# Add a note about authentication if needed
if tokenizer is None or pipeline is None:
gr.Markdown("""
## ⚠️ Authentication Required
This demo requires authentication to access the CodeLlama model.
To use this model, you need to:
1. Accept the model's license at [meta-llama/CodeLlama-7b-hf](https://huggingface.co/meta-llama/CodeLlama-7b-hf)
2. Set your Hugging Face token in the Space's settings (Settings > Repository Secrets > Add > HF_TOKEN)
The demo will show a limited interface until authentication is set up.
""")
with gr.Row():
with gr.Column():
prompt = gr.Textbox(
label="Code Prompt",
placeholder="Enter your code prompt here...",
lines=5
)
with gr.Row():
max_length = gr.Slider(
minimum=50,
maximum=500,
value=200,
step=10,
label="Max Length"
)
temperature = gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.1,
step=0.1,
label="Temperature"
)
with gr.Row():
top_p = gr.Slider(
minimum=0.5,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-p"
)
top_k = gr.Slider(
minimum=1,
maximum=50,
value=10,
step=1,
label="Top-k"
)
generate_btn = gr.Button("Generate Code")
with gr.Column():
output = gr.Textbox(
label="Generated Code",
lines=20
)
# Connect the button to the generate function
generate_btn.click(
fn=generate_code,
inputs=[prompt, max_length, temperature, top_p, top_k],
outputs=output
)
# Add examples
gr.Examples(
examples=EXAMPLES,
inputs=prompt
)
# Add information about the model
gr.Markdown("""
## About
This demo uses the CodeLlama-7b model to generate code completions based on your prompts.
- **Max Length**: Controls the maximum length of the generated text
- **Temperature**: Controls randomness (lower = more deterministic)
- **Top-p**: Controls diversity via nucleus sampling
- **Top-k**: Controls diversity via top-k sampling
**Note**: CodeLlama is a gated model that requires authentication. If you're seeing authentication errors,
please follow the instructions at the top of the page.
Created by DheepLearning
""")
# Launch the app
demo.launch()