Spaces:

DheepLearning
/

ITR

Sleeping

App Files Files Community

ITR / app.py

Deepan13

with some changes

aff08a4 23 days ago

raw

history blame

4.56 kB

	import gradio as gr
	from transformers import AutoTokenizer
	import transformers
	import torch
	import os
	import time

	# Model configuration
	MODEL_NAME = "meta-llama/CodeLlama-7b-hf"

	# Default example prompts
	EXAMPLES = [
	["import socket\n\ndef ping_exponential_backoff(host: str):"],
	["def fibonacci(n: int) -> int:"],
	["class BinarySearchTree:\n def __init__(self):"],
	["async def fetch_data(url: str):"]
	]

	# Load model with error handling
	def load_model():
	try:
	print("Loading model and tokenizer...")
	tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

	# Configure the pipeline based on available resources
	# Hugging Face Spaces typically have GPU available
	pipeline = transformers.pipeline(
	"text-generation",
	model=MODEL_NAME,
	torch_dtype=torch.float16,
	device_map="auto",
	)

	print("Model loaded successfully!")
	return tokenizer, pipeline
	except Exception as e:
	print(f"Error loading model: {str(e)}")
	# Return None to indicate failure
	return None, None

	# Generate code based on the prompt
	def generate_code(prompt, max_length=200, temperature=0.1, top_p=0.95, top_k=10):
	try:
	# Check if model is loaded
	if tokenizer is None or pipeline is None:
	return "Error: Model failed to load. Please check the logs."

	# Add a loading message
	start_time = time.time()

	# Generate the code
	sequences = pipeline(
	prompt,
	do_sample=True,
	top_k=top_k,
	temperature=temperature,
	top_p=top_p,
	num_return_sequences=1,
	eos_token_id=tokenizer.eos_token_id,
	max_length=max_length,
	)

	# Calculate generation time
	generation_time = time.time() - start_time

	# Format the result
	result = sequences[0]['generated_text']
	return f"{result}\n\n---\nGeneration time: {generation_time:.2f} seconds"

	except Exception as e:
	return f"Error generating code: {str(e)}"

	# Load the model and tokenizer
	print("Initializing CodeLlama...")
	tokenizer, pipeline = load_model()

	# Create the Gradio interface
	with gr.Blocks(title="CodeLlama Code Generation") as demo:
	gr.Markdown("# CodeLlama Code Generation")
	gr.Markdown("Enter a code prompt and CodeLlama will complete it for you.")

	with gr.Row():
	with gr.Column():
	prompt = gr.Textbox(
	label="Code Prompt",
	placeholder="Enter your code prompt here...",
	lines=5
	)

	with gr.Row():
	max_length = gr.Slider(
	minimum=50,
	maximum=500,
	value=200,
	step=10,
	label="Max Length"
	)
	temperature = gr.Slider(
	minimum=0.1,
	maximum=1.0,
	value=0.1,
	step=0.1,
	label="Temperature"
	)

	with gr.Row():
	top_p = gr.Slider(
	minimum=0.5,
	maximum=1.0,
	value=0.95,
	step=0.05,
	label="Top-p"
	)
	top_k = gr.Slider(
	minimum=1,
	maximum=50,
	value=10,
	step=1,
	label="Top-k"
	)

	generate_btn = gr.Button("Generate Code")

	with gr.Column():
	output = gr.Textbox(
	label="Generated Code",
	lines=20
	)

	# Connect the button to the generate function
	generate_btn.click(
	fn=generate_code,
	inputs=[prompt, max_length, temperature, top_p, top_k],
	outputs=output
	)

	# Add examples
	gr.Examples(
	examples=EXAMPLES,
	inputs=prompt
	)

	# Add information about the model
	gr.Markdown("""
	## About

	This demo uses the CodeLlama-7b model to generate code completions based on your prompts.

	- Max Length: Controls the maximum length of the generated text
	- Temperature: Controls randomness (lower = more deterministic)
	- Top-p: Controls diversity via nucleus sampling
	- Top-k: Controls diversity via top-k sampling

	Created by DheepLearning
	""")

	# Launch the app
	demo.launch()