Spaces:
Sleeping
Sleeping
File size: 4,562 Bytes
d4417c8 aff08a4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 |
import gradio as gr
from transformers import AutoTokenizer
import transformers
import torch
import os
import time
# Model configuration
MODEL_NAME = "meta-llama/CodeLlama-7b-hf"
# Default example prompts
EXAMPLES = [
["import socket\n\ndef ping_exponential_backoff(host: str):"],
["def fibonacci(n: int) -> int:"],
["class BinarySearchTree:\n def __init__(self):"],
["async def fetch_data(url: str):"]
]
# Load model with error handling
def load_model():
try:
print("Loading model and tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
# Configure the pipeline based on available resources
# Hugging Face Spaces typically have GPU available
pipeline = transformers.pipeline(
"text-generation",
model=MODEL_NAME,
torch_dtype=torch.float16,
device_map="auto",
)
print("Model loaded successfully!")
return tokenizer, pipeline
except Exception as e:
print(f"Error loading model: {str(e)}")
# Return None to indicate failure
return None, None
# Generate code based on the prompt
def generate_code(prompt, max_length=200, temperature=0.1, top_p=0.95, top_k=10):
try:
# Check if model is loaded
if tokenizer is None or pipeline is None:
return "Error: Model failed to load. Please check the logs."
# Add a loading message
start_time = time.time()
# Generate the code
sequences = pipeline(
prompt,
do_sample=True,
top_k=top_k,
temperature=temperature,
top_p=top_p,
num_return_sequences=1,
eos_token_id=tokenizer.eos_token_id,
max_length=max_length,
)
# Calculate generation time
generation_time = time.time() - start_time
# Format the result
result = sequences[0]['generated_text']
return f"{result}\n\n---\nGeneration time: {generation_time:.2f} seconds"
except Exception as e:
return f"Error generating code: {str(e)}"
# Load the model and tokenizer
print("Initializing CodeLlama...")
tokenizer, pipeline = load_model()
# Create the Gradio interface
with gr.Blocks(title="CodeLlama Code Generation") as demo:
gr.Markdown("# CodeLlama Code Generation")
gr.Markdown("Enter a code prompt and CodeLlama will complete it for you.")
with gr.Row():
with gr.Column():
prompt = gr.Textbox(
label="Code Prompt",
placeholder="Enter your code prompt here...",
lines=5
)
with gr.Row():
max_length = gr.Slider(
minimum=50,
maximum=500,
value=200,
step=10,
label="Max Length"
)
temperature = gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.1,
step=0.1,
label="Temperature"
)
with gr.Row():
top_p = gr.Slider(
minimum=0.5,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-p"
)
top_k = gr.Slider(
minimum=1,
maximum=50,
value=10,
step=1,
label="Top-k"
)
generate_btn = gr.Button("Generate Code")
with gr.Column():
output = gr.Textbox(
label="Generated Code",
lines=20
)
# Connect the button to the generate function
generate_btn.click(
fn=generate_code,
inputs=[prompt, max_length, temperature, top_p, top_k],
outputs=output
)
# Add examples
gr.Examples(
examples=EXAMPLES,
inputs=prompt
)
# Add information about the model
gr.Markdown("""
## About
This demo uses the CodeLlama-7b model to generate code completions based on your prompts.
- **Max Length**: Controls the maximum length of the generated text
- **Temperature**: Controls randomness (lower = more deterministic)
- **Top-p**: Controls diversity via nucleus sampling
- **Top-k**: Controls diversity via top-k sampling
Created by DheepLearning
""")
# Launch the app
demo.launch()
|