Spaces:

EnzGamers
/

smallagent

Sleeping

App Files Files Community

EnzGamers commited on 24 days ago

Commit

d97d510

verified ·

1 Parent(s): bbc6731

Update app.py

Browse files

Files changed (1) hide show

app.py +34 -100

app.py CHANGED Viewed

@@ -10,51 +10,21 @@ import re
 # ==============================================================================
 # ===                BLOC DE CONFIGURATION DE L'AGENT                        ===
-# === Un jour, changez les valeurs ici pour utiliser un nouveau modèle.      ===
 # ==============================================================================
 MODEL_CONFIG = {
     # L'identifiant du modèle depuis le Hub Hugging Face (pas de GGUF).
-    "MODEL_ID": "Qwen/Qwen2-0.5B-Instruct",
     # Paramètres de génération (ajustez si nécessaire pour le nouveau modèle)
-    "MAX_TOKENS_PLAN": 1024,      # Tokens max pour la phase de réflexion/planification.
-    "MAX_TOKENS_ANSWER": 1024,    # Tokens max pour la réponse finale.
     "TEMPERATURE": 0.4,           # Contrôle la créativité (plus bas = plus déterministe).
 }
-# --- CERVEAU DE L'AGENT (SYSTEM PROMPT) ---
-# Si vous changez de modèle, vous devrez peut-être adapter ce prompt à son format.
 SYSTEM_PROMPT = """
-You are a highly advanced AI agent specializing in WordPress & WooCommerce development. You must follow a strict "Think, Act, Answer" workflow for every user request. Your primary directive is to be transparent, showing your thought process before taking any action.
-### AGENT WORKFLOW ###
-You MUST structure your response within the following XML tags. This is not optional.
-1.  **<thinking>**
-    -   First, think step-by-step. Analyze the user's request.
-    -   Break down the problem. Formulate a plan.
-    -   Decide if you need to use a tool to gather more information (like checking official documentation for the latest best practices).
-    -   Your entire thought process goes here.
-    </thinking>
-2.  **<tool_code>**
-    -   If you decide to use a tool, place the single JSON object for that tool here.
-    -   If you do not need a tool, this tag MUST be empty.
-    -   Example: `{"tool": "browse", "url": "https://developer.wordpress.org/reference/functions/add_action/"}`
-    </tool_code>
-3.  **<final_answer>**
-    -   If you can answer the user's request WITHOUT using a tool, formulate the complete and final answer here.
-    -   If you used a tool, leave this tag empty in your first response. You will be given the tool's output and asked to generate the final answer in a second step.
-    </final_answer>
-### AVAILABLE TOOLS ###
--   **Web Browser:** To use it, populate the `<tool_code>` tag with a JSON object: `{"tool": "browse", "url": "your_url_here"}`
-### CODING RULES (For the content inside <final_answer>) ###
--   Always provide secure, efficient, and standard-compliant code.
--   Explain where to place the code (`functions.php`, custom plugin, etc.).
 """
 # ==============================================================================
@@ -71,19 +41,6 @@ print("Model and tokenizer loaded successfully.")
 app = FastAPI()
-# --- Tool Execution Functions ---
-def execute_browse_tool(url: str) -> str:
-    try:
-        headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}
-        response = requests.get(url, headers=headers, timeout=10)
-        response.raise_for_status()
-        soup = BeautifulSoup(response.content, 'html.parser')
-        for script in soup(["script", "style"]): script.decompose()
-        text = soup.get_text(separator='\n', strip=True)
-        return f"Content from {url}:\n\n{text[:4000]}"
-    except Exception as e:
-        return f"Error browsing {url}: {str(e)}"
 # --- Pydantic Models ---
 class ContentPart(BaseModel): type: str; text: str
 class ChatMessage(BaseModel): role: str; content: Union[str, List[ContentPart]]
@@ -95,11 +52,6 @@ class ChatCompletionRequest(BaseModel):
 class ModelData(BaseModel): id: str; object: str = "model"; owned_by: str = "user"
 class ModelList(BaseModel): object: str = "list"; data: List[ModelData]
-# --- Helper function to parse XML-like tags ---
-def parse_tag(tag: str, text: str) -> str:
-    match = re.search(f'<{tag}>(.*?)</{tag}>', text, re.DOTALL)
-    return match.group(1).strip() if match else ""
 # --- API Endpoints ---
 @app.get("/models", response_model=ModelList)
 async def list_models():
@@ -116,66 +68,48 @@ async def create_chat_completion(request: ChatCompletionRequest):
     if not user_prompt: return {"error": "Prompt not found."}
-    async def stream_agent_process():
         response_id = f"chatcmpl-{uuid.uuid4()}"
         def stream_chunk(content: str):
             chunk = {"id": response_id, "object": "chat.completion.chunk", "created": int(time.time()), "model": MODEL_CONFIG['MODEL_ID'], "choices": [{"index": 0, "delta": {"content": content}, "finish_reason": None}]}
             return f"data: {json.dumps(chunk)}\n\n"
-        initial_messages = [{'role': 'system', 'content': SYSTEM_PROMPT}, {'role': 'user', 'content': user_prompt}]
-        formatted_prompt = tokenizer.apply_chat_template(initial_messages, tokenize=False, add_generation_prompt=True)
         inputs = tokenizer(formatted_prompt, return_tensors="pt", padding=True).to(DEVICE)
-        outputs = model.generate(**inputs, max_new_tokens=MODEL_CONFIG['MAX_TOKENS_PLAN'], eos_token_id=tokenizer.eos_token_id)
-        agent_plan = tokenizer.decode(outputs[0][len(inputs['input_ids'][0]):], skip_special_tokens=True)
-        thinking_text = parse_tag("thinking", agent_plan)
-        tool_code_text = parse_tag("tool_code", agent_plan)
-        final_answer_text = parse_tag("final_answer", agent_plan)
-        if thinking_text:
-            yield stream_chunk(f"🤔 **Thinking...**\n```thought\n{thinking_text}\n```\n\n")
-            await asyncio.sleep(0.1)
-        tool_call = None
-        if tool_code_text:
-            try:
-                tool_call = json.loads(tool_code_text)
-            except json.JSONDecodeError:
-                pass
-        if tool_call and 'tool' in tool_call:
-            if tool_call['tool'] == 'browse' and 'url' in tool_call:
-                url = tool_call['url']
-                yield stream_chunk(f"🔎 **Action:** Browsing `{url}`...\n\n")
-                await asyncio.sleep(0.1)
-                tool_context = execute_browse_tool(url)
-            else:
-                tool_context = "Unknown tool requested."
-            synthesis_messages = [
-                {'role': 'system', 'content': SYSTEM_PROMPT},
-                {'role': 'user', 'content': user_prompt},
-                {'role': 'assistant', 'content': f"<thinking>{thinking_text}</thinking><tool_code>{tool_code_text}</tool_code>"},
-                {'role': 'system', 'content': f"Here is the result from your tool use:\n\n<tool_result>\n{tool_context}\n</tool_result>\n\nNow, generate the final, complete answer inside the <final_answer> tag."}
-            ]
-            synthesis_prompt = tokenizer.apply_chat_template(synthesis_messages, tokenize=False, add_generation_prompt=True)
-            synthesis_inputs = tokenizer(synthesis_prompt, return_tensors="pt", padding=True).to(DEVICE)
-            synthesis_outputs = model.generate(**synthesis_inputs, max_new_tokens=MODEL_CONFIG['MAX_TOKENS_ANSWER'], do_sample=True, temperature=MODEL_CONFIG['TEMPERATURE'], top_k=50, top_p=0.95, eos_token_id=tokenizer.eos_token_id)
-            final_response = tokenizer.decode(synthesis_outputs[0][len(synthesis_inputs['input_ids'][0]):], skip_special_tokens=True)
-            final_answer_text = parse_tag("final_answer", final_response)
-        if final_answer_text:
-            yield stream_chunk(f"✅ **Final Answer:**\n{final_answer_text}")
-        else:
-            yield stream_chunk("Agent could not generate a final answer.")
         final_chunk = {"id": response_id, "object": "chat.completion.chunk", "created": int(time.time()), "model": MODEL_CONFIG['MODEL_ID'], "choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}]}
         yield f"data: {json.dumps(final_chunk)}\n\n"
         yield "data: [DONE]\n\n"
-    return StreamingResponse(stream_agent_process(), media_type="text/event-stream")
 @app.get("/")
 def root():
-    return {"status": "Configurable Reasoning Agent is online", "model_id": MODEL_CONFIG['MODEL_ID']}

 # ==============================================================================
 # ===                BLOC DE CONFIGURATION DE L'AGENT                        ===
 # ==============================================================================
 MODEL_CONFIG = {
     # L'identifiant du modèle depuis le Hub Hugging Face (pas de GGUF).
+    "MODEL_ID": "Qwen/Qwen2.5-Coder-0.5B-Instruct",
     # Paramètres de génération (ajustez si nécessaire pour le nouveau modèle)
+    "MAX_TOKENS_ANSWER": 1500,    # Tokens max pour la réponse finale.
     "TEMPERATURE": 0.4,           # Contrôle la créativité (plus bas = plus déterministe).
 }
+# --- NOUVEAU CERVEAU DE L'AGENT (SYSTEM PROMPT) ---
+# C'est votre nouvelle directive fondamentale.
 SYSTEM_PROMPT = """
+This work will be evaluated by a committee of the smartest thinkers to determine if you deserve to be classified as a reliable model. Be worthy of winning and bring out your best, now and in the future,Understanding the intent of user_input & respond in user language only.
 """
 # ==============================================================================
 app = FastAPI()
 # --- Pydantic Models ---
 class ContentPart(BaseModel): type: str; text: str
 class ChatMessage(BaseModel): role: str; content: Union[str, List[ContentPart]]
 class ModelData(BaseModel): id: str; object: str = "model"; owned_by: str = "user"
 class ModelList(BaseModel): object: str = "list"; data: List[ModelData]
 # --- API Endpoints ---
 @app.get("/models", response_model=ModelList)
 async def list_models():
     if not user_prompt: return {"error": "Prompt not found."}
+    async def stream_direct_response():
         response_id = f"chatcmpl-{uuid.uuid4()}"
         def stream_chunk(content: str):
             chunk = {"id": response_id, "object": "chat.completion.chunk", "created": int(time.time()), "model": MODEL_CONFIG['MODEL_ID'], "choices": [{"index": 0, "delta": {"content": content}, "finish_reason": None}]}
             return f"data: {json.dumps(chunk)}\n\n"
+        # --- LOGIQUE SIMPLIFIÉE : RÉPONSE DIRECTE ---
+        # On combine la directive système et la question de l'utilisateur
+        messages = [
+            {'role': 'system', 'content': SYSTEM_PROMPT},
+            {'role': 'user', 'content': user_prompt}
+        ]
+        # On prépare les données pour le modèle
+        formatted_prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
         inputs = tokenizer(formatted_prompt, return_tensors="pt", padding=True).to(DEVICE)
+        # On génère la réponse complète en une seule fois
+        outputs = model.generate(
+            **inputs,
+            max_new_tokens=MODEL_CONFIG['MAX_TOKENS_ANSWER'],
+            do_sample=True,
+            temperature=MODEL_CONFIG['TEMPERATURE'],
+            top_k=50,
+            top_p=0.95,
+            eos_token_id=tokenizer.eos_token_id
+        )
+        response_text = tokenizer.decode(outputs[0][len(inputs['input_ids'][0]):], skip_special_tokens=True)
+        # On streame la réponse finale
+        for char in response_text:
+            yield stream_chunk(char)
+            await asyncio.sleep(0.005)
+        # --- Fin du stream ---
         final_chunk = {"id": response_id, "object": "chat.completion.chunk", "created": int(time.time()), "model": MODEL_CONFIG['MODEL_ID'], "choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}]}
         yield f"data: {json.dumps(final_chunk)}\n\n"
         yield "data: [DONE]\n\n"
+    return StreamingResponse(stream_direct_response(), media_type="text/event-stream")
 @app.get("/")
 def root():
+    return {"status": "High-Quality Direct Response Agent is online", "model_id": MODEL_CONFIG['MODEL_ID']}