Spaces:

MINEOGO
/

pro-zephyr-coder

Running

App Files Files Community

MINEOGO commited on Apr 30

Commit

531e420

verified ·

1 Parent(s): 69fcee7

Update app.py

Browse files

Files changed (1) hide show

app.py +108 -285

app.py CHANGED Viewed

@@ -1,334 +1,157 @@
 import gradio as gr
-from huggingface_hub import InferenceClient, HfHubHTTPError
 import os
 import re
-import traceback
-# --- Configuration ---
 API_TOKEN = os.getenv("HF_TOKEN", None)
-# MODEL = "Qwen/Qwen3-32B" # This is a very large model, might require specific inference endpoint/hardware
-# Let's try a smaller, generally available model for testing first, e.g., Mixtral
-# You can change this back if you are sure Qwen3-32B is available and configured for your space/token
-# MODEL = "mistralai/Mixtral-8x7B-Instruct-v0.1"
-# Or uncomment the Qwen model if you are certain it's correctly set up for inference:
 MODEL = "Qwen/Qwen3-32B"
-# i have used Qwen3 because its quiet compatible
-# --- Hugging Face Client Initialization ---
-print("--- App Start ---")
-if not API_TOKEN:
-    print("Warning: HF_TOKEN environment variable not set. Using anonymous access.")
-    print("Certain models might require a token for access.")
-else:
-    print(f"HF_TOKEN found (length={len(API_TOKEN)}).") # Don't print the token itself
 try:
     print(f"Initializing Inference Client for model: {MODEL}")
-    # Explicitly pass token=None if not found, though InferenceClient handles it.
-    client = InferenceClient(model=MODEL, token=API_TOKEN if API_TOKEN else None)
-    print("Inference Client Initialized Successfully.")
-    # Optional: Add a quick test call if feasible, but be mindful of potential costs/rate limits
-    # try:
-    #     client.text_generation("test", max_new_tokens=1)
-    #     print("Test generation successful.")
-    # except Exception as test_e:
-    #     print(f"Warning: Test generation failed. Client might be initialized but model access could be problematic. Error: {test_e}")
-except HfHubHTTPError as http_err:
-    # More specific error handling for HTTP errors (like 401 Unauthorized, 403 Forbidden, 404 Not Found)
-    error_message = (
-        f"Failed to initialize model client for {MODEL} due to an HTTP error.\n"
-        f"Status Code: {http_err.response.status_code}\n"
-        f"Error: {http_err}\n"
-        f"Check:\n"
-        f"1. If '{MODEL}' is a valid model ID on Hugging Face Hub.\n"
-        f"2. If the model requires gating or specific permissions.\n"
-        f"3. If your HF_TOKEN is correct and has the necessary permissions (set as a Secret in your Space).\n"
-        f"4. If the default Inference API supports this model or if a dedicated Inference Endpoint is needed."
-    )
-    print(f"ERROR: {error_message}")
-    raise gr.Error(error_message)
 except Exception as e:
-    error_message = (
-        f"An unexpected error occurred while initializing the model client for {MODEL}.\n"
-        f"Error Type: {type(e).__name__}\n"
-        f"Error: {e}\n"
-        f"Traceback:\n{traceback.format_exc()}\n" # Add traceback
-        f"Check HF_TOKEN, model availability, network connection, and Space resources."
-    )
-    print(f"ERROR: {error_message}")
-    raise gr.Error(error_message)
-# --- Helper Functions ---
-# Parse all ```filename.ext\n<code>``` blocks
-def parse_code_blocks(response: str) -> list:
-    pattern = r"```([^\n]+)\n(.*?)```"
-    blocks = re.findall(pattern, response, re.DOTALL)
-    files = []
-    for filename, code in blocks:
-        filename = filename.strip()
-        code = code.strip()
-        # Basic language detection (can be expanded)
-        lang = None
-        if filename.endswith(".py"):
-            lang = "python"
-        elif filename.endswith(".js"):
-            lang = "javascript"
-        elif filename.endswith(".html"):
-            lang = "html"
-        elif filename.endswith(".css"):
-            lang = "css"
-        elif filename.endswith(".json"):
-            lang = "json"
-        elif filename.endswith(".md"):
-            lang = "markdown"
-        elif filename.endswith(".sh") or filename.endswith(".bash"):
-            lang = "bash"
-        elif filename.endswith(".java"):
-            lang = "java"
-        # Add more extensions as needed
         files.append({
             "filename": filename,
-            "language": lang,
-            "code": code
         })
-    # Add logging to see what's parsed
-    # print(f"Parsed {len(files)} code blocks.")
-    # for i, f in enumerate(files):
-    #     print(f"  Block {i}: filename='{f['filename']}', lang='{f['language']}', code_len={len(f['code'])}")
     return files
-def strip_think_tags(text: str) -> str:
-    return re.sub(r"<think>.*?</think>", "", text, flags=re.DOTALL).strip()
-def extract_thoughts(text: str) -> str:
-    matches = re.findall(r"<think>(.*?)</think>", text, flags=re.DOTALL)
-    # Only return the last thought block for cleaner display? Or join all? Let's join.
-    return "\n---\n".join(match.strip() for match in matches).strip()
-# --- System Message ---
-system_message = (
-    "You are a helpful AI assistant specialized in generating website code. "
-    "Generate all the necessary files based on the user's request. "
-    "Output each file within a separate markdown code block formatted exactly like this:\n"
-    "```filename.ext\n"
-    "<code>\n"
-    "```\n"
-    "Do not add any explanatory text outside the code blocks. Ensure the filenames have appropriate extensions. "
-    "If you need to think step-by-step, use <think>...</think> tags. These tags will be hidden from the final user output but help guide your generation process."
-)
-# --- Code Generation Function ---
 def generate_code(prompt, backend_choice, max_tokens, temperature, top_p):
-    if not prompt:
-        # Handle empty prompt case
-        yield [], gr.update(value="Please enter a description for the website.", visible=True)
-        return
-    # Use f-string formatting for clarity
-    user_prompt = f"USER_PROMPT: {prompt}\nUSER_BACKEND_PREFERENCE: {backend_choice}"
     messages = [
         {"role": "system", "content": system_message},
         {"role": "user", "content": user_prompt}
     ]
-    full_response = ""
-    current_thoughts = ""
-    accumulated_error = "" # Accumulate errors during stream
-    # Reset outputs: Clear previous code blocks and show/clear thinking box
-    # Yield an empty list to the gr.Column to clear it.
-    # Make thinking box visible but empty.
-    yield [], gr.update(visible=True, value="Generating code...")
-    print(f"\n--- Generating Code ---")
-    print(f"Prompt: {prompt[:100]}...") # Log truncated prompt
-    print(f"Backend: {backend_choice}, Max Tokens: {max_tokens}, Temp: {temperature}, Top-P: {top_p}")
     try:
-        stream = client.chat_completion(
-            messages=messages,
-            max_tokens=max_tokens,
-            stream=True,
-            temperature=temperature if temperature > 0 else 0.01, # Ensure temp is positive
-            top_p=top_p,
-            # Consider adding stop sequences if the model tends to run on
-            # stop=["```\n\n", "\n\nHuman:", "\n\nUSER:"] # Example stop sequences
-        )
-        code_updates = [] # Store the gr.Code components to yield
-        for i, message in enumerate(stream):
-            # Check for errors in the stream message (some providers might include error info)
-            if hasattr(message, 'error') and message.error:
-                 accumulated_error += f"Error in stream chunk {i}: {message.error}\n"
-                 print(f"ERROR in stream chunk {i}: {message.error}")
-                 continue # Skip this chunk if it's an error indicator
-            # Ensure the path to content is correct
-            try:
-                 # Common path: message.choices[0].delta.content
-                 token = message.choices[0].delta.content
-                 # Handle potential None token at the end of the stream or in error cases
-                 if token is None:
-                     token = ""
-                 # print(f"Token {i}: '{token}'") # DEBUG: print each token
-            except (AttributeError, IndexError, TypeError) as e:
-                 # Handle unexpected message structure
-                 print(f"Warning: Could not extract token from stream message {i}. Structure: {message}. Error: {e}")
-                 token = "" # Assign empty string to avoid breaking accumulation
-            if isinstance(token, str):
-                full_response += token
-                # Update thinking box periodically (e.g., every 10 tokens or if thoughts change)
-                if i % 10 == 0 or "<think>" in token or "</think>" in token:
-                    thoughts = extract_thoughts(full_response)
-                    if thoughts != current_thoughts:
-                        current_thoughts = thoughts
-                        # Don't yield code_updates here yet, only update thoughts
-                        yield code_updates, gr.update(value=current_thoughts if current_thoughts else "Thinking...", visible=True)
-                # Update code blocks less frequently or when a block seems complete
-                # Heuristic: update if the response ends with ```
-                if token.strip().endswith("```") or i % 20 == 0: # Adjust frequency as needed
-                    cleaned_response = strip_think_tags(full_response)
-                    parsed_files = parse_code_blocks(cleaned_response)
-                    # Create gr.Code components for the parsed files
-                    # Compare with existing code_updates to avoid redundant updates if content hasn't changed significantly
-                    new_code_updates = []
-                    changed = False
-                    if len(parsed_files) != len(code_updates):
-                        changed = True
-                    else:
-                         # Quick check if filenames/code lengths differ significantly
-                         for idx, f in enumerate(parsed_files):
-                             if (idx >= len(code_updates) or
-                                 f["filename"] != code_updates[idx].label or
-                                 len(f["code"]) != len(code_updates[idx].value)): # Simple length check
-                                 changed = True
-                                 break
-                    if changed or not code_updates: # Update if changed or first time
-                         code_updates = []
-                         for f in parsed_files:
-                             code_updates.append(
-                                 gr.Code(
-                                     value=f["code"],
-                                     label=f["filename"],
-                                     language=f["language"]
-                                 )
-                             )
-                         # Yield the list of gr.Code components to the gr.Column
-                         # Also update thoughts (might be slightly out of sync, but acceptable)
-                         yield code_updates, gr.update(value=current_thoughts if current_thoughts else "Thinking...", visible=True)
-        # --- Final Update after Stream Ends ---
-        print("Stream finished.")
-        if accumulated_error:
-            print(f"Errors occurred during stream:\n{accumulated_error}")
-            # Decide how to show this to the user, e.g., append to thoughts or show separately
-            current_thoughts += f"\n\n**Streaming Errors:**\n{accumulated_error}"
-        cleaned_response = strip_think_tags(full_response)
-        final_files = parse_code_blocks(cleaned_response)
-        print(f"Final parsed files: {len(final_files)}")
-        final_code_updates = []
-        if not final_files and not accumulated_error:
-             # Handle case where no code blocks were generated
-             final_code_updates.append(gr.Markdown("No code blocks were generated. The model might have responded with text instead, or the format was incorrect."))
-             print("Warning: No code blocks found in the final response.")
-             # Optionally show the raw response for debugging
-             # final_code_updates.append(gr.Code(label="Raw Response", value=cleaned_response, language="text"))
-        elif not final_files and accumulated_error:
-             final_code_updates.append(gr.Markdown(f"**Error during generation:**\n{accumulated_error}"))
-        else:
-            for f in final_files:
-                final_code_updates.append(
-                    gr.Code(
-                        value=f["code"],
-                        label=f["filename"],
-                        language=f["language"]
-                    )
-                )
-        # Yield final code blocks and hide thinking box (or show final thoughts/errors)
-        final_thought_update = gr.update(visible=True if current_thoughts else False, value=current_thoughts)
-        yield final_code_updates, final_thought_update
-    except HfHubHTTPError as http_err:
-        # Handle errors during the streaming call itself
-        error_message = (
-            f"**Error during code generation (HTTP Error):**\n"
-            f"Status Code: {http_err.response.status_code}\n"
-            f"Error: {http_err}\n"
-            f"This could be due to rate limits, invalid input, model errors, or token issues.\n"
-            f"Check the Hugging Face Space logs for more details."
-        )
-        print(f"ERROR: {error_message}")
-        print(traceback.format_exc())
-        # Yield error message in the output area
-        yield [gr.Markdown(error_message)], gr.update(visible=False) # Hide thinking box on error
     except Exception as e:
-        error_message = (
-             f"**An unexpected error occurred during code generation:**\n"
-             f"Error Type: {type(e).__name__}\n"
-             f"Error: {e}\n\n"
-             f"**Traceback:**\n```\n{traceback.format_exc()}\n```\n"
-             f"Check the Hugging Face Space logs for more details."
-        )
-        print(f"ERROR: {error_message}")
-        # Yield error message in the output area
-        yield [gr.Markdown(error_message)], gr.update(visible=False) # Hide thinking box on error
-# --- Gradio Interface ---
-with gr.Blocks(css=".gradio-container { max-width: 90% !important; }") as demo:
-    gr.Markdown("# ✨ Website Code Generator ✨")
-    gr.Markdown("Describe the website you want. Code files will appear below. Uses `mistralai/Mixtral-8x7B-Instruct-v0.1` by default (check code to change).") # Update description
     with gr.Row():
         with gr.Column(scale=2):
-            prompt_input = gr.Textbox(label="Website Description", lines=6, placeholder="e.g., A simple landing page with a title, a paragraph, and a button linking to example.com")
-            backend_radio = gr.Radio(["Static (HTML/CSS/JS)", "Flask", "Node.js"], label="Backend Preference (Influences AI)", value="Static (HTML/CSS/JS)")
-            generate_button = gr.Button("✨ Generate Website Code", variant="primary")
-            with gr.Accordion("Advanced Settings", open=False):
-                 max_tokens_slider = gr.Slider(512, 8192, value=4096, step=256, label="Max New Tokens") # Increased max potential tokens
-                 temperature_slider = gr.Slider(0.0, 1.2, value=0.6, step=0.05, label="Temperature (0=deterministic, >1=more creative)") # Allow 0
-                 top_p_slider = gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-P (Nucleus Sampling)")
         with gr.Column(scale=3):
-            thinking_box = gr.Textbox(label="Model Activity / Thoughts", visible=False, interactive=False, lines=2)
-            # Use gr.Column to hold the dynamic code blocks
-            # Remove the update lambda, it's not needed for Column
-            file_outputs = gr.Column(elem_id="code-output-area")
-    generate_button.click(
         fn=generate_code,
-        inputs=[prompt_input, backend_radio, max_tokens_slider, temperature_slider, top_p_slider],
-        # Output to the Column and the Textbox
-        outputs=[file_outputs, thinking_box],
-        # api_name="generate_code" # Optional: for API access
     )
-# --- Launch ---
 if __name__ == "__main__":
-    print("Starting Gradio App...")
-    # Use queue() for handling multiple users and streaming
-    # Set share=False unless you specifically want a public link from local execution
-    # Set debug=True for more detailed Gradio errors locally (remove/set False for production)
-    demo.queue().launch(debug=False, share=False)
-    print("Gradio App Launched.")

 import gradio as gr
+from huggingface_hub import InferenceClient
 import os
 import re
 API_TOKEN = os.getenv("HF_TOKEN", None)
 MODEL = "Qwen/Qwen3-32B"
 try:
     print(f"Initializing Inference Client for model: {MODEL}")
+    client = InferenceClient(model=MODEL, token=API_TOKEN) if API_TOKEN else InferenceClient(model=MODEL)
 except Exception as e:
+    raise gr.Error(f"Failed to initialize model client for {MODEL}. Error: {e}. Check HF_TOKEN and model availability.")
+def extract_files_from_code(raw_code: str) -> list:
+    """
+    Parses the full code block output and extracts files defined using markdown-style triple backticks,
+    e.g., ```index.html ... ```
+    Returns a list of dicts with filename, language, and content.
+    """
+    pattern = r"```([a-zA-Z0-9.+_-]+)\n(.*?)```"
+    matches = re.finditer(pattern, raw_code, flags=re.DOTALL)
+    files = []
+    for match in matches:
+        filename = match.group(1).strip()
+        content = match.group(2).strip()
+        lang = "plaintext"
+        if filename.endswith(".html"): lang = "html"
+        elif filename.endswith(".py"): lang = "python"
+        elif filename.endswith(".js"): lang = "javascript"
+        elif filename.endswith(".css"): lang = "css"
         files.append({
             "filename": filename,
+            "content": content,
+            "language": lang
         })
     return files
+def clean_streamed_response(text: str) -> str:
+    """
+    Remove <think>...</think> and system/assistant/user tokens.
+    """
+    # Remove <think>...</think> blocks
+    text = re.sub(r"<think>.*?</think>", "", text, flags=re.DOTALL)
+    # Remove role markers
+    text = re.sub(r"<\s*\|?\s*(user|system|assistant)\s*\|?\s*>", "", text, flags=re.IGNORECASE)
+    return text
+def extract_think_message(text: str) -> str:
+    match = re.search(r"<think>(.*?)</think>", text, flags=re.DOTALL)
+    return match.group(1).strip() if match else ""
 def generate_code(prompt, backend_choice, max_tokens, temperature, top_p):
+    print(f"Streaming generation for: {prompt[:80]}...")
+    system_message = (
+        "You are an AI that generates website code. You MUST ONLY output the raw code, without any conversational text like 'Here is the code' or explanations before or after the code blocks. "
+        "You MUST NOT wrap the code in markdown fences like ```html, ```python, or ```js. "
+        "If the user requests 'Static' or the prompt clearly implies only frontend code, generate ONLY the content for the `index.html` file. "
+        "If the user requests 'Flask' or 'Node.js' and the prompt requires backend logic, you MUST generate both the `index.html` content AND the corresponding main backend file content (e.g., `app.py` for Flask, `server.js` or `app.js` for Node.js). "
+        "When generating multiple files, you MUST wrap them in separate triple-backtick sections labeled with filenames like ```index.html, ```app.py, etc. "
+        "The generated website code must be SFW and have minimal errors. Only include comments where user modification is strictly required."
+    )
+    user_prompt = f"USER_PROMPT = {prompt}\nUSER_BACKEND = {backend_choice}"
     messages = [
         {"role": "system", "content": system_message},
         {"role": "user", "content": user_prompt}
     ]
+    stream = client.chat_completion(
+        messages=messages,
+        max_tokens=max_tokens,
+        stream=True,
+        temperature=temperature,
+        top_p=top_p,
+    )
+    full_response = ""
+    files = []
+    yield [], "", gr.update(visible=False)
     try:
+        for message in stream:
+            token = message.choices[0].delta.content
+            if not token:
+                continue
+            full_response += token
+            # Extract and display <think> message (live)
+            think = extract_think_message(full_response)
+            cleaned = clean_streamed_response(full_response)
+            parsed_files = extract_files_from_code(cleaned)
+            files = parsed_files  # live update
+            yield files, think, gr.update(visible=bool(think.strip()))
     except Exception as e:
+        print(f"Error: {e}")
+        yield [], f"Error: {e}", gr.update(visible=True)
+with gr.Blocks(css=".gradio-container { max-width: 95% !important; }") as demo:
+    gr.Markdown("# ⚡ Website Code Generator")
+    gr.Markdown("Enter a description and get live code split into actual files like `index.html`, `app.py`, etc.")
     with gr.Row():
         with gr.Column(scale=2):
+            prompt = gr.Textbox(label="Website Prompt", lines=5)
+            backend = gr.Radio(["Static", "Flask", "Node.js"], value="Static", label="Backend Type")
+            gen_btn = gr.Button("Generate Code", variant="primary")
+            with gr.Accordion("Advanced", open=False):
+                max_tokens = gr.Slider(512, 4096, step=256, value=2048, label="Max Tokens")
+                temperature = gr.Slider(0.1, 1.5, step=0.1, value=0.7, label="Temperature")
+                top_p = gr.Slider(0.1, 1.0, step=0.05, value=0.95, label="Top-P")
         with gr.Column(scale=3):
+            file_output = gr.Group()
+            code_tabs = gr.Tabs()
+            dynamic_outputs = []
+            for i in range(5):  # Pre-create 5 tabs max
+                with code_tabs:
+                    code_box = gr.Code(label=f"File {i+1}", language="plaintext", visible=False, lines=25)
+                    dynamic_outputs.append(code_box)
+            think_box = gr.Textbox(label="Thinking...", visible=False, interactive=False)
+    def display_outputs(file_list, think_msg, think_visible):
+        updates = []
+        for i in range(5):
+            if i < len(file_list):
+                f = file_list[i]
+                updates.append(gr.update(value=f["content"], label=f["filename"], language=f["language"], visible=True))
+            else:
+                updates.append(gr.update(visible=False))
+        return updates + [gr.update(value=think_msg, visible=think_visible)]
+    gen_btn.click(
         fn=generate_code,
+        inputs=[prompt, backend, max_tokens, temperature, top_p],
+        outputs=[gr.State(), gr.State(), think_box],
+    ).then(
+        fn=display_outputs,
+        inputs=[gr.State(), gr.State(), think_box],
+        outputs=dynamic_outputs + [think_box]
     )
 if __name__ == "__main__":
+    demo.queue().launch()