File size: 4,562 Bytes
d4417c8
aff08a4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
import gradio as gr
from transformers import AutoTokenizer
import transformers
import torch
import os
import time

# Model configuration
MODEL_NAME = "meta-llama/CodeLlama-7b-hf"

# Default example prompts
EXAMPLES = [
    ["import socket\n\ndef ping_exponential_backoff(host: str):"],
    ["def fibonacci(n: int) -> int:"],
    ["class BinarySearchTree:\n    def __init__(self):"],
    ["async def fetch_data(url: str):"]
]

# Load model with error handling
def load_model():
    try:
        print("Loading model and tokenizer...")
        tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

        # Configure the pipeline based on available resources
        # Hugging Face Spaces typically have GPU available
        pipeline = transformers.pipeline(
            "text-generation",
            model=MODEL_NAME,
            torch_dtype=torch.float16,
            device_map="auto",
        )

        print("Model loaded successfully!")
        return tokenizer, pipeline
    except Exception as e:
        print(f"Error loading model: {str(e)}")
        # Return None to indicate failure
        return None, None

# Generate code based on the prompt
def generate_code(prompt, max_length=200, temperature=0.1, top_p=0.95, top_k=10):
    try:
        # Check if model is loaded
        if tokenizer is None or pipeline is None:
            return "Error: Model failed to load. Please check the logs."

        # Add a loading message
        start_time = time.time()

        # Generate the code
        sequences = pipeline(
            prompt,
            do_sample=True,
            top_k=top_k,
            temperature=temperature,
            top_p=top_p,
            num_return_sequences=1,
            eos_token_id=tokenizer.eos_token_id,
            max_length=max_length,
        )

        # Calculate generation time
        generation_time = time.time() - start_time

        # Format the result
        result = sequences[0]['generated_text']
        return f"{result}\n\n---\nGeneration time: {generation_time:.2f} seconds"

    except Exception as e:
        return f"Error generating code: {str(e)}"

# Load the model and tokenizer
print("Initializing CodeLlama...")
tokenizer, pipeline = load_model()

# Create the Gradio interface
with gr.Blocks(title="CodeLlama Code Generation") as demo:
    gr.Markdown("# CodeLlama Code Generation")
    gr.Markdown("Enter a code prompt and CodeLlama will complete it for you.")

    with gr.Row():
        with gr.Column():
            prompt = gr.Textbox(
                label="Code Prompt",
                placeholder="Enter your code prompt here...",
                lines=5
            )

            with gr.Row():
                max_length = gr.Slider(
                    minimum=50,
                    maximum=500,
                    value=200,
                    step=10,
                    label="Max Length"
                )
                temperature = gr.Slider(
                    minimum=0.1,
                    maximum=1.0,
                    value=0.1,
                    step=0.1,
                    label="Temperature"
                )

            with gr.Row():
                top_p = gr.Slider(
                    minimum=0.5,
                    maximum=1.0,
                    value=0.95,
                    step=0.05,
                    label="Top-p"
                )
                top_k = gr.Slider(
                    minimum=1,
                    maximum=50,
                    value=10,
                    step=1,
                    label="Top-k"
                )

            generate_btn = gr.Button("Generate Code")

        with gr.Column():
            output = gr.Textbox(
                label="Generated Code",
                lines=20
            )

    # Connect the button to the generate function
    generate_btn.click(
        fn=generate_code,
        inputs=[prompt, max_length, temperature, top_p, top_k],
        outputs=output
    )

    # Add examples
    gr.Examples(
        examples=EXAMPLES,
        inputs=prompt
    )

    # Add information about the model
    gr.Markdown("""
    ## About

    This demo uses the CodeLlama-7b model to generate code completions based on your prompts.

    - **Max Length**: Controls the maximum length of the generated text
    - **Temperature**: Controls randomness (lower = more deterministic)
    - **Top-p**: Controls diversity via nucleus sampling
    - **Top-k**: Controls diversity via top-k sampling

    Created by DheepLearning
    """)

# Launch the app
demo.launch()