MINEOGO's picture
Update app.py
155c47a verified
raw
history blame
15.2 kB
import gradio as gr
from huggingface_hub import InferenceClient
import os
import re # For post-processing fallback
# --- FastAPI & Pydantic Imports ---
from fastapi import FastAPI, Request, HTTPException, Depends, Body
from fastapi.responses import JSONResponse
from pydantic import BaseModel, Field
from typing import Literal, Optional
# --- Configuration ---
API_TOKEN = os.getenv("HF_TOKEN", None)
MODEL = "HuggingFaceH4/zephyr-7b-beta"
# --- Define the Secret Key for the API ---
API_SECRET_KEY = "onlyfordearygt" # Keep this secure in a real application (e.g., env variable)
# --- Initialize Inference Client ---
try:
print(f"Attempting to initialize Inference Client for model: {MODEL}")
if API_TOKEN:
print("Using HF Token found in environment.")
client = InferenceClient(model=MODEL, token=API_TOKEN)
else:
print("HF Token not found. Running without token (may lead to rate limits).")
client = InferenceClient(model=MODEL)
print("Inference Client initialized successfully.")
except Exception as e:
print(f"Error initializing Inference Client: {e}")
# We still want the Gradio app to potentially load, but maybe show an error
# raise gr.Error(f"Failed to initialize the AI model client for '{MODEL}'. Check model name, network, and HF_TOKEN secret if applicable. Error: {e}")
client = None # Set client to None so we can check later
print("WARNING: AI Client initialization failed. API/Generation will not work.")
# --- Pydantic Model for API Request Body ---
class GenerateRequest(BaseModel):
prompt: str
backend_choice: Literal["Static", "Flask", "Node.js"] = "Static"
file_structure: Literal["Single File", "Multiple Files"] = "Multiple Files"
max_tokens: Optional[int] = Field(default=3072, gt=128, le=4096) # Add validation
temperature: Optional[float] = Field(default=0.7, gt=0.0, le=2.0)
top_p: Optional[float] = Field(default=0.9, gt=0.0, le=1.0)
secret_key: str # Required for authentication
# --- Core Code Generation Function (Mostly Unchanged) ---
# Note: This function is now also used by the API
def generate_code(
prompt: str,
backend_choice: str,
file_structure: str,
max_tokens: int,
temperature: float,
top_p: float,
):
"""
Generates website code based on user prompt and choices.
Aims for richer CSS, emphasizes completeness, and strictly outputs ONLY raw code.
Yields the code token by token for live updates (for Gradio UI).
The *final* yielded value is the complete, cleaned code (for API).
"""
# Check if client initialized properly
if client is None:
final_error_message = "## Error\n\nAI Model Client not initialized. Generation is unavailable."
print(final_error_message)
# Yield the error for Gradio, return it for API callers later
yield final_error_message
return # Stop execution for this generator
print(f"--- Generating Code ---")
print(f"Prompt: {prompt[:100]}...")
print(f"Backend Context: {backend_choice}")
print(f"File Structure: {file_structure}")
print(f"Settings: Max Tokens={max_tokens}, Temp={temperature}, Top-P={top_p}")
if file_structure == "Single File":
file_structure_instruction = (
"- **File Structure is 'Single File':** Generate ONLY a single, complete `index.html` file. "
"Embed ALL CSS directly within `<style>` tags inside the `<head>`. "
"Embed ALL necessary JavaScript directly within `<script>` tags just before the closing `</body>` tag. "
"Do NOT use file separation markers."
)
else: # Multiple Files
file_structure_instruction = (
"- **File Structure is 'Multiple Files':** Generate code for `index.html`, `style.css`, and `script.js` (if JS is needed). "
"Use these EXACT markers: `<!-- index.html -->`, `/* style.css */`, `// script.js` (only if JS is needed).\n"
"- Place the corresponding code directly after each marker.\n"
"- Inside `index.html`, link `style.css` in the `<head>` and include `script.js` before `</body>` if generated."
)
system_message = (
"You are an expert frontend web developer AI. Your primary goal is to generate **complete, visually appealing, modern, and well-styled** frontend code (HTML, CSS, client-side JS) based *only* on the user's description and selected options. "
"Follow ALL these rules with EXTREME STRICTNESS:\n"
"1. **STYLE & DETAIL:** Generate rich, detailed code. Use **plenty of CSS** for layout, spacing, typography, colors, and effects. Aim for a high-quality visual result.\n"
"2. **COMPLETENESS:** Generate the *entire* requested code structure. Ensure all files/sections are fully generated and properly closed. **DO NOT STOP GENERATING PREMATURELY.** Finish the whole task.\n"
"3. **RAW CODE ONLY:** Your *entire* response MUST consist *only* of the requested source code. NO extra text, NO explanations, NO apologies, NO introductions, NO summaries, NO comments about the code (except standard code comments), NO MARKDOWN formatting (like ```html), and ***ABSOLUTELY NO CONVERSATIONAL TEXT OR TAGS*** like `<|user|>` or `<|assistant|>`.\n"
"4. **IMMEDIATE CODE START:** The response MUST begin *directly* with the first character of the code (`<!DOCTYPE html>` or `<!-- index.html -->`).\n"
"5. **IMMEDIATE CODE END:** The response MUST end *immediately* after the very last character of the generated code. DO NOT add *any* text, spaces, or newlines after the code concludes.\n"
"6. **MANDATORY `index.html`:** Always generate the content for `index.html`.\n"
f"7. **FILE STRUCTURE ({file_structure}):** Strictly follow ONLY the instructions for the *selected* file structure:\n"
f" {file_structure_instruction}\n"
"8. **BACKEND CONTEXT ({backend_choice}):** Use as a hint for frontend structure only. Generate ONLY frontend code.\n"
"9. **FRONTEND ONLY:** Do NOT generate server-side code.\n"
"10. **ACCURACY:** Generate functional code addressing the user's prompt.\n\n"
"REMEMBER: Create COMPLETE, visually appealing code. Output ONLY raw code. START immediately with code. FINISH the entire code generation. END immediately with code. NO extra text/tags."
)
messages = [
{"role": "system", "content": system_message},
{"role": "user", "content": f"Generate the complete website frontend code for: {prompt}"}
]
response_stream = ""
full_response_for_cleaning = ""
token_count = 0
last_yielded_value = "" # Store the last value for the API
try:
print("Sending request to Hugging Face Inference API...")
stream = client.chat_completion(
messages=messages,
max_tokens=max_tokens,
stream=True,
temperature=temperature,
top_p=top_p,
)
for message in stream:
token = message.choices[0].delta.content
if isinstance(token, str):
token_count += 1
response_stream += token
full_response_for_cleaning += token
last_yielded_value = response_stream # Keep updating last value during stream
yield response_stream # Yield cumulative response for live UI update
print(f"API stream finished. Received ~{token_count} tokens. Raw length: {len(full_response_for_cleaning)}")
if token_count >= max_tokens - 10:
print(f"WARNING: Generation might have been cut short due to reaching max_tokens limit ({max_tokens}).")
# --- Post-Processing ---
cleaned_response = full_response_for_cleaning.strip()
cleaned_response = re.sub(r"^\s*```[a-z]*\s*\n?", "", cleaned_response)
cleaned_response = re.sub(r"\n?\s*```\s*$", "", cleaned_response)
cleaned_response = re.sub(r"<\s*\|?\s*(user|assistant)\s*\|?\s*>", "", cleaned_response, flags=re.IGNORECASE)
# Remove common phrases only if they are clearly at the start/end and unlikely to be code
common_phrases_start = ["Here is the code:", "Okay, here is the code:", "Here's the code:", "Sure, here is the code you requested:"]
for phrase in common_phrases_start:
if cleaned_response.lower().startswith(phrase.lower()):
cleaned_response = cleaned_response[len(phrase):].lstrip()
last_yielded_value = cleaned_response.strip() # Final cleaned value
yield last_yielded_value # Yield final cleaned response for Gradio UI
except Exception as e:
error_message = f"An error occurred during the API call: {e}"
print(error_message)
final_error_message = f"## Error\n\nFailed to generate code.\n**Reason:** {e}"
# Yield error for Gradio UI
yield final_error_message
# Ensure the generator stops, API will handle the exception based on this
# For the API, we will raise an exception in the route handler if needed
# --- Build Gradio Interface using Blocks ---
# Define this *before* creating the FastAPI app that might mount it
with gr.Blocks(css=".gradio-container { max-width: 90% !important; }") as demo:
gr.Markdown("# ✨ Website Code Generator ✨")
gr.Markdown(
"Describe the website you want. The AI will generate **visually styled** frontend code (HTML, CSS, JS) using **plenty of CSS**. "
"The code appears live below.\n"
"**Important:**\n"
"1. This generator creates code based *only* on your initial description. To refine, modify your description and generate again.\n"
"2. **If the code output stops abruptly**, it likely hit the 'Max New Tokens' limit. **Increase the slider value below** and try again!\n"
"3. An API endpoint is available at `/api/generate` (POST request, requires secret key)." # Notify about API
)
with gr.Row():
with gr.Column(scale=2):
prompt_input = gr.Textbox(label="Website Description", placeholder="e.g., A modern portfolio...", lines=6,)
backend_radio = gr.Radio(["Static", "Flask", "Node.js"], label="Backend Context Hint", value="Static", info="Hint for AI - generates ONLY frontend code.")
file_structure_radio = gr.Radio(["Multiple Files", "Single File"], label="Output File Structure", value="Multiple Files", info="Choose 'Single File' or 'Multiple Files'.")
generate_button = gr.Button("🎨 Generate Stylish Website Code", variant="primary")
with gr.Column(scale=3):
code_output = gr.Code(label="Generated Code (Raw Output - Aiming for Style!)", language="html", lines=30, interactive=False,)
with gr.Accordion("Advanced Generation Settings", open=False):
max_tokens_slider = gr.Slider(minimum=512, maximum=4096, value=3072, step=256, label="Max New Tokens", info="Max length. Increase if output is cut off!")
temperature_slider = gr.Slider(minimum=0.1, maximum=1.2, value=0.7, step=0.1, label="Temperature", info="Controls randomness.")
top_p_slider = gr.Slider( minimum=0.1, maximum=1.0, value=0.9, step=0.05, label="Top-P", info="Alternative randomness control.")
# --- Connect Gradio Inputs/Outputs ---
generate_button.click(
fn=generate_code,
inputs=[prompt_input, backend_radio, file_structure_radio, max_tokens_slider, temperature_slider, top_p_slider,],
outputs=code_output,
)
# --- Gradio Examples ---
gr.Examples(
examples=[
["A simple counter page...", "Static", "Single File"],
["A responsive product grid...", "Static", "Multiple Files"],
["A personal blog homepage...", "Flask", "Multiple Files"],
["A 'Coming Soon' page...", "Static", "Multiple Files"]
],
inputs=[prompt_input, backend_radio, file_structure_radio],
label="Example Prompts (Aiming for Style)"
)
# --- Create FastAPI App and Mount Gradio ---
# This approach uses FastAPI as the main server and mounts Gradio onto it
app = FastAPI()
# --- Define the API Endpoint ---
@app.post("/api/generate")
async def api_generate_code(payload: GenerateRequest):
"""
API endpoint to generate website code.
Requires a secret key for authentication.
"""
print(f"API Request received for prompt: {payload.prompt[:50]}...")
# --- Authentication ---
if payload.secret_key != API_SECRET_KEY:
print("API Authentication failed: Invalid secret key.")
raise HTTPException(status_code=403, detail="Invalid secret key")
# Check if HF client is available
if client is None:
print("API Error: AI Client not initialized.")
raise HTTPException(status_code=503, detail="AI Model Client not initialized. Service unavailable.")
print("API Authentication successful.")
# --- Call the generator function and consume it ---
final_code = ""
try:
# Consume the generator to get the last yielded value (the complete code)
code_generator = generate_code(
prompt=payload.prompt,
backend_choice=payload.backend_choice,
file_structure=payload.file_structure,
max_tokens=payload.max_tokens,
temperature=payload.temperature,
top_p=payload.top_p,
)
for code_chunk in code_generator:
final_code = code_chunk # Keep overwriting until the last one
# Check if the final result indicates an error from within generate_code
if final_code.strip().startswith("## Error"):
print(f"API Error during generation: {final_code}")
# Extract reason if possible, otherwise return generic error
reason = final_code.split("Reason:**")[-1].strip() if "Reason:**" in final_code else "Generation failed internally."
raise HTTPException(status_code=500, detail=f"Code generation failed: {reason}")
print(f"API generated code length: {len(final_code)}")
# --- Return the final code ---
return JSONResponse(content={"generated_code": final_code})
except HTTPException as http_exc:
# Re-raise HTTPException if it's already one (like auth failure or internal error)
raise http_exc
except Exception as e:
# Catch any other unexpected errors during generation/consumption
print(f"API - Unexpected Error during generation: {e}")
raise HTTPException(status_code=500, detail=f"An unexpected error occurred: {str(e)}")
# --- Mount the Gradio app onto the FastAPI app ---
# The Gradio UI will be available at the root path "/"
app = gr.mount_gradio_app(app, demo, path="/")
# --- Launch ---
# Use Uvicorn to run the FastAPI app (which now includes Gradio)
if __name__ == "__main__":
import uvicorn
print("Starting FastAPI server with Gradio mounted...")
# Recommended settings for Hugging Face Spaces: host="0.0.0.0", port=7860
# You might need to adjust port if running locally and 7860 is taken.
uvicorn.run(app, host="0.0.0.0", port=7860)
# Note: demo.launch() is no longer used directly here, as uvicorn runs the combined app.