Spaces:

MINEOGO
/

pro-zephyr-coder

Running

App Files Files Community

pro-zephyr-coder / app.py

MINEOGO

Update app.py

155c47a verified 21 days ago

raw

history blame

15.2 kB

	import gradio as gr
	from huggingface_hub import InferenceClient
	import os
	import re # For post-processing fallback

	# --- FastAPI & Pydantic Imports ---
	from fastapi import FastAPI, Request, HTTPException, Depends, Body
	from fastapi.responses import JSONResponse
	from pydantic import BaseModel, Field
	from typing import Literal, Optional

	# --- Configuration ---
	API_TOKEN = os.getenv("HF_TOKEN", None)
	MODEL = "HuggingFaceH4/zephyr-7b-beta"
	# --- Define the Secret Key for the API ---
	API_SECRET_KEY = "onlyfordearygt" # Keep this secure in a real application (e.g., env variable)

	# --- Initialize Inference Client ---
	try:
	print(f"Attempting to initialize Inference Client for model: {MODEL}")
	if API_TOKEN:
	print("Using HF Token found in environment.")
	client = InferenceClient(model=MODEL, token=API_TOKEN)
	else:
	print("HF Token not found. Running without token (may lead to rate limits).")
	client = InferenceClient(model=MODEL)
	print("Inference Client initialized successfully.")
	except Exception as e:
	print(f"Error initializing Inference Client: {e}")
	# We still want the Gradio app to potentially load, but maybe show an error
	# raise gr.Error(f"Failed to initialize the AI model client for '{MODEL}'. Check model name, network, and HF_TOKEN secret if applicable. Error: {e}")
	client = None # Set client to None so we can check later
	print("WARNING: AI Client initialization failed. API/Generation will not work.")


	# --- Pydantic Model for API Request Body ---
	class GenerateRequest(BaseModel):
	prompt: str
	backend_choice: Literal["Static", "Flask", "Node.js"] = "Static"
	file_structure: Literal["Single File", "Multiple Files"] = "Multiple Files"
	max_tokens: Optional[int] = Field(default=3072, gt=128, le=4096) # Add validation
	temperature: Optional[float] = Field(default=0.7, gt=0.0, le=2.0)
	top_p: Optional[float] = Field(default=0.9, gt=0.0, le=1.0)
	secret_key: str # Required for authentication


	# --- Core Code Generation Function (Mostly Unchanged) ---
	# Note: This function is now also used by the API
	def generate_code(
	prompt: str,
	backend_choice: str,
	file_structure: str,
	max_tokens: int,
	temperature: float,
	top_p: float,
	):
	"""
	Generates website code based on user prompt and choices.
	Aims for richer CSS, emphasizes completeness, and strictly outputs ONLY raw code.
	Yields the code token by token for live updates (for Gradio UI).
	The final yielded value is the complete, cleaned code (for API).
	"""
	# Check if client initialized properly
	if client is None:
	final_error_message = "## Error\n\nAI Model Client not initialized. Generation is unavailable."
	print(final_error_message)
	# Yield the error for Gradio, return it for API callers later
	yield final_error_message
	return # Stop execution for this generator

	print(f"--- Generating Code ---")
	print(f"Prompt: {prompt[:100]}...")
	print(f"Backend Context: {backend_choice}")
	print(f"File Structure: {file_structure}")
	print(f"Settings: Max Tokens={max_tokens}, Temp={temperature}, Top-P={top_p}")

	if file_structure == "Single File":
	file_structure_instruction = (
	"- File Structure is 'Single File': Generate ONLY a single, complete `index.html` file. "
	"Embed ALL CSS directly within `<style>` tags inside the `<head>`. "
	"Embed ALL necessary JavaScript directly within `<script>` tags just before the closing `</body>` tag. "
	"Do NOT use file separation markers."
	)
	else: # Multiple Files
	file_structure_instruction = (
	"- File Structure is 'Multiple Files': Generate code for `index.html`, `style.css`, and `script.js` (if JS is needed). "
	"Use these EXACT markers: `<!-- index.html -->`, `/* style.css */`, `// script.js` (only if JS is needed).\n"
	"- Place the corresponding code directly after each marker.\n"
	"- Inside `index.html`, link `style.css` in the `<head>` and include `script.js` before `</body>` if generated."
	)

	system_message = (
	"You are an expert frontend web developer AI. Your primary goal is to generate complete, visually appealing, modern, and well-styled frontend code (HTML, CSS, client-side JS) based only on the user's description and selected options. "
	"Follow ALL these rules with EXTREME STRICTNESS:\n"
	"1. STYLE & DETAIL: Generate rich, detailed code. Use plenty of CSS for layout, spacing, typography, colors, and effects. Aim for a high-quality visual result.\n"
	"2. COMPLETENESS: Generate the entire requested code structure. Ensure all files/sections are fully generated and properly closed. DO NOT STOP GENERATING PREMATURELY. Finish the whole task.\n"
	"3. RAW CODE ONLY: Your entire response MUST consist only of the requested source code. NO extra text, NO explanations, NO apologies, NO introductions, NO summaries, NO comments about the code (except standard code comments), NO MARKDOWN formatting (like ```html), and *ABSOLUTELY NO CONVERSATIONAL TEXT OR TAGS* like `<\|user\|>` or `<\|assistant\|>`.\n"
	"4. IMMEDIATE CODE START: The response MUST begin directly with the first character of the code (`<!DOCTYPE html>` or `<!-- index.html -->`).\n"
	"5. IMMEDIATE CODE END: The response MUST end immediately after the very last character of the generated code. DO NOT add any text, spaces, or newlines after the code concludes.\n"
	"6. MANDATORY `index.html`: Always generate the content for `index.html`.\n"
	f"7. FILE STRUCTURE ({file_structure}): Strictly follow ONLY the instructions for the selected file structure:\n"
	f" {file_structure_instruction}\n"
	"8. BACKEND CONTEXT ({backend_choice}): Use as a hint for frontend structure only. Generate ONLY frontend code.\n"
	"9. FRONTEND ONLY: Do NOT generate server-side code.\n"
	"10. ACCURACY: Generate functional code addressing the user's prompt.\n\n"
	"REMEMBER: Create COMPLETE, visually appealing code. Output ONLY raw code. START immediately with code. FINISH the entire code generation. END immediately with code. NO extra text/tags."
	)

	messages = [
	{"role": "system", "content": system_message},
	{"role": "user", "content": f"Generate the complete website frontend code for: {prompt}"}
	]

	response_stream = ""
	full_response_for_cleaning = ""
	token_count = 0
	last_yielded_value = "" # Store the last value for the API

	try:
	print("Sending request to Hugging Face Inference API...")
	stream = client.chat_completion(
	messages=messages,
	max_tokens=max_tokens,
	stream=True,
	temperature=temperature,
	top_p=top_p,
	)
	for message in stream:
	token = message.choices[0].delta.content
	if isinstance(token, str):
	token_count += 1
	response_stream += token
	full_response_for_cleaning += token
	last_yielded_value = response_stream # Keep updating last value during stream
	yield response_stream # Yield cumulative response for live UI update

	print(f"API stream finished. Received ~{token_count} tokens. Raw length: {len(full_response_for_cleaning)}")
	if token_count >= max_tokens - 10:
	print(f"WARNING: Generation might have been cut short due to reaching max_tokens limit ({max_tokens}).")

	# --- Post-Processing ---
	cleaned_response = full_response_for_cleaning.strip()
	cleaned_response = re.sub(r"^\s```[a-z]\s*\n?", "", cleaned_response)
	cleaned_response = re.sub(r"\n?\s```\s$", "", cleaned_response)
	cleaned_response = re.sub(r"<\s\\|?\s(user\|assistant)\s\\|?\s>", "", cleaned_response, flags=re.IGNORECASE)
	# Remove common phrases only if they are clearly at the start/end and unlikely to be code
	common_phrases_start = ["Here is the code:", "Okay, here is the code:", "Here's the code:", "Sure, here is the code you requested:"]
	for phrase in common_phrases_start:
	if cleaned_response.lower().startswith(phrase.lower()):
	cleaned_response = cleaned_response[len(phrase):].lstrip()

	last_yielded_value = cleaned_response.strip() # Final cleaned value
	yield last_yielded_value # Yield final cleaned response for Gradio UI

	except Exception as e:
	error_message = f"An error occurred during the API call: {e}"
	print(error_message)
	final_error_message = f"## Error\n\nFailed to generate code.\nReason: {e}"
	# Yield error for Gradio UI
	yield final_error_message
	# Ensure the generator stops, API will handle the exception based on this
	# For the API, we will raise an exception in the route handler if needed


	# --- Build Gradio Interface using Blocks ---
	# Define this before creating the FastAPI app that might mount it
	with gr.Blocks(css=".gradio-container { max-width: 90% !important; }") as demo:
	gr.Markdown("# ✨ Website Code Generator ✨")
	gr.Markdown(
	"Describe the website you want. The AI will generate visually styled frontend code (HTML, CSS, JS) using plenty of CSS. "
	"The code appears live below.\n"
	"Important:\n"
	"1. This generator creates code based only on your initial description. To refine, modify your description and generate again.\n"
	"2. If the code output stops abruptly, it likely hit the 'Max New Tokens' limit. Increase the slider value below and try again!\n"
	"3. An API endpoint is available at `/api/generate` (POST request, requires secret key)." # Notify about API
	)

	with gr.Row():
	with gr.Column(scale=2):
	prompt_input = gr.Textbox(label="Website Description", placeholder="e.g., A modern portfolio...", lines=6,)
	backend_radio = gr.Radio(["Static", "Flask", "Node.js"], label="Backend Context Hint", value="Static", info="Hint for AI - generates ONLY frontend code.")
	file_structure_radio = gr.Radio(["Multiple Files", "Single File"], label="Output File Structure", value="Multiple Files", info="Choose 'Single File' or 'Multiple Files'.")
	generate_button = gr.Button("🎨 Generate Stylish Website Code", variant="primary")

	with gr.Column(scale=3):
	code_output = gr.Code(label="Generated Code (Raw Output - Aiming for Style!)", language="html", lines=30, interactive=False,)

	with gr.Accordion("Advanced Generation Settings", open=False):
	max_tokens_slider = gr.Slider(minimum=512, maximum=4096, value=3072, step=256, label="Max New Tokens", info="Max length. Increase if output is cut off!")
	temperature_slider = gr.Slider(minimum=0.1, maximum=1.2, value=0.7, step=0.1, label="Temperature", info="Controls randomness.")
	top_p_slider = gr.Slider( minimum=0.1, maximum=1.0, value=0.9, step=0.05, label="Top-P", info="Alternative randomness control.")

	# --- Connect Gradio Inputs/Outputs ---
	generate_button.click(
	fn=generate_code,
	inputs=[prompt_input, backend_radio, file_structure_radio, max_tokens_slider, temperature_slider, top_p_slider,],
	outputs=code_output,
	)

	# --- Gradio Examples ---
	gr.Examples(
	examples=[
	["A simple counter page...", "Static", "Single File"],
	["A responsive product grid...", "Static", "Multiple Files"],
	["A personal blog homepage...", "Flask", "Multiple Files"],
	["A 'Coming Soon' page...", "Static", "Multiple Files"]
	],
	inputs=[prompt_input, backend_radio, file_structure_radio],
	label="Example Prompts (Aiming for Style)"
	)


	# --- Create FastAPI App and Mount Gradio ---
	# This approach uses FastAPI as the main server and mounts Gradio onto it
	app = FastAPI()

	# --- Define the API Endpoint ---
	@app.post("/api/generate")
	async def api_generate_code(payload: GenerateRequest):
	"""
	API endpoint to generate website code.
	Requires a secret key for authentication.
	"""
	print(f"API Request received for prompt: {payload.prompt[:50]}...")

	# --- Authentication ---
	if payload.secret_key != API_SECRET_KEY:
	print("API Authentication failed: Invalid secret key.")
	raise HTTPException(status_code=403, detail="Invalid secret key")

	# Check if HF client is available
	if client is None:
	print("API Error: AI Client not initialized.")
	raise HTTPException(status_code=503, detail="AI Model Client not initialized. Service unavailable.")

	print("API Authentication successful.")

	# --- Call the generator function and consume it ---
	final_code = ""
	try:
	# Consume the generator to get the last yielded value (the complete code)
	code_generator = generate_code(
	prompt=payload.prompt,
	backend_choice=payload.backend_choice,
	file_structure=payload.file_structure,
	max_tokens=payload.max_tokens,
	temperature=payload.temperature,
	top_p=payload.top_p,
	)
	for code_chunk in code_generator:
	final_code = code_chunk # Keep overwriting until the last one

	# Check if the final result indicates an error from within generate_code
	if final_code.strip().startswith("## Error"):
	print(f"API Error during generation: {final_code}")
	# Extract reason if possible, otherwise return generic error
	reason = final_code.split("Reason:")[-1].strip() if "Reason:" in final_code else "Generation failed internally."
	raise HTTPException(status_code=500, detail=f"Code generation failed: {reason}")

	print(f"API generated code length: {len(final_code)}")
	# --- Return the final code ---
	return JSONResponse(content={"generated_code": final_code})

	except HTTPException as http_exc:
	# Re-raise HTTPException if it's already one (like auth failure or internal error)
	raise http_exc
	except Exception as e:
	# Catch any other unexpected errors during generation/consumption
	print(f"API - Unexpected Error during generation: {e}")
	raise HTTPException(status_code=500, detail=f"An unexpected error occurred: {str(e)}")


	# --- Mount the Gradio app onto the FastAPI app ---
	# The Gradio UI will be available at the root path "/"
	app = gr.mount_gradio_app(app, demo, path="/")

	# --- Launch ---
	# Use Uvicorn to run the FastAPI app (which now includes Gradio)
	if __name__ == "__main__":
	import uvicorn
	print("Starting FastAPI server with Gradio mounted...")
	# Recommended settings for Hugging Face Spaces: host="0.0.0.0", port=7860
	# You might need to adjust port if running locally and 7860 is taken.
	uvicorn.run(app, host="0.0.0.0", port=7860)
	# Note: demo.launch() is no longer used directly here, as uvicorn runs the combined app.