EnzGamers commited on
Commit
d97d510
·
verified ·
1 Parent(s): bbc6731

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -100
app.py CHANGED
@@ -10,51 +10,21 @@ import re
10
 
11
  # ==============================================================================
12
  # === BLOC DE CONFIGURATION DE L'AGENT ===
13
- # === Un jour, changez les valeurs ici pour utiliser un nouveau modèle. ===
14
  # ==============================================================================
15
 
16
  MODEL_CONFIG = {
17
  # L'identifiant du modèle depuis le Hub Hugging Face (pas de GGUF).
18
- "MODEL_ID": "Qwen/Qwen2-0.5B-Instruct",
19
 
20
  # Paramètres de génération (ajustez si nécessaire pour le nouveau modèle)
21
- "MAX_TOKENS_PLAN": 1024, # Tokens max pour la phase de réflexion/planification.
22
- "MAX_TOKENS_ANSWER": 1024, # Tokens max pour la réponse finale.
23
  "TEMPERATURE": 0.4, # Contrôle la créativité (plus bas = plus déterministe).
24
  }
25
 
26
- # --- CERVEAU DE L'AGENT (SYSTEM PROMPT) ---
27
- # Si vous changez de modèle, vous devrez peut-être adapter ce prompt à son format.
28
  SYSTEM_PROMPT = """
29
- You are a highly advanced AI agent specializing in WordPress & WooCommerce development. You must follow a strict "Think, Act, Answer" workflow for every user request. Your primary directive is to be transparent, showing your thought process before taking any action.
30
-
31
- ### AGENT WORKFLOW ###
32
- You MUST structure your response within the following XML tags. This is not optional.
33
-
34
- 1. **<thinking>**
35
- - First, think step-by-step. Analyze the user's request.
36
- - Break down the problem. Formulate a plan.
37
- - Decide if you need to use a tool to gather more information (like checking official documentation for the latest best practices).
38
- - Your entire thought process goes here.
39
- </thinking>
40
-
41
- 2. **<tool_code>**
42
- - If you decide to use a tool, place the single JSON object for that tool here.
43
- - If you do not need a tool, this tag MUST be empty.
44
- - Example: `{"tool": "browse", "url": "https://developer.wordpress.org/reference/functions/add_action/"}`
45
- </tool_code>
46
-
47
- 3. **<final_answer>**
48
- - If you can answer the user's request WITHOUT using a tool, formulate the complete and final answer here.
49
- - If you used a tool, leave this tag empty in your first response. You will be given the tool's output and asked to generate the final answer in a second step.
50
- </final_answer>
51
-
52
- ### AVAILABLE TOOLS ###
53
- - **Web Browser:** To use it, populate the `<tool_code>` tag with a JSON object: `{"tool": "browse", "url": "your_url_here"}`
54
-
55
- ### CODING RULES (For the content inside <final_answer>) ###
56
- - Always provide secure, efficient, and standard-compliant code.
57
- - Explain where to place the code (`functions.php`, custom plugin, etc.).
58
  """
59
 
60
  # ==============================================================================
@@ -71,19 +41,6 @@ print("Model and tokenizer loaded successfully.")
71
 
72
  app = FastAPI()
73
 
74
- # --- Tool Execution Functions ---
75
- def execute_browse_tool(url: str) -> str:
76
- try:
77
- headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}
78
- response = requests.get(url, headers=headers, timeout=10)
79
- response.raise_for_status()
80
- soup = BeautifulSoup(response.content, 'html.parser')
81
- for script in soup(["script", "style"]): script.decompose()
82
- text = soup.get_text(separator='\n', strip=True)
83
- return f"Content from {url}:\n\n{text[:4000]}"
84
- except Exception as e:
85
- return f"Error browsing {url}: {str(e)}"
86
-
87
  # --- Pydantic Models ---
88
  class ContentPart(BaseModel): type: str; text: str
89
  class ChatMessage(BaseModel): role: str; content: Union[str, List[ContentPart]]
@@ -95,11 +52,6 @@ class ChatCompletionRequest(BaseModel):
95
  class ModelData(BaseModel): id: str; object: str = "model"; owned_by: str = "user"
96
  class ModelList(BaseModel): object: str = "list"; data: List[ModelData]
97
 
98
- # --- Helper function to parse XML-like tags ---
99
- def parse_tag(tag: str, text: str) -> str:
100
- match = re.search(f'<{tag}>(.*?)</{tag}>', text, re.DOTALL)
101
- return match.group(1).strip() if match else ""
102
-
103
  # --- API Endpoints ---
104
  @app.get("/models", response_model=ModelList)
105
  async def list_models():
@@ -116,66 +68,48 @@ async def create_chat_completion(request: ChatCompletionRequest):
116
 
117
  if not user_prompt: return {"error": "Prompt not found."}
118
 
119
- async def stream_agent_process():
120
  response_id = f"chatcmpl-{uuid.uuid4()}"
121
 
122
  def stream_chunk(content: str):
123
  chunk = {"id": response_id, "object": "chat.completion.chunk", "created": int(time.time()), "model": MODEL_CONFIG['MODEL_ID'], "choices": [{"index": 0, "delta": {"content": content}, "finish_reason": None}]}
124
  return f"data: {json.dumps(chunk)}\n\n"
125
 
126
- initial_messages = [{'role': 'system', 'content': SYSTEM_PROMPT}, {'role': 'user', 'content': user_prompt}]
127
- formatted_prompt = tokenizer.apply_chat_template(initial_messages, tokenize=False, add_generation_prompt=True)
 
 
 
 
 
 
 
128
  inputs = tokenizer(formatted_prompt, return_tensors="pt", padding=True).to(DEVICE)
129
- outputs = model.generate(**inputs, max_new_tokens=MODEL_CONFIG['MAX_TOKENS_PLAN'], eos_token_id=tokenizer.eos_token_id)
130
- agent_plan = tokenizer.decode(outputs[0][len(inputs['input_ids'][0]):], skip_special_tokens=True)
131
-
132
- thinking_text = parse_tag("thinking", agent_plan)
133
- tool_code_text = parse_tag("tool_code", agent_plan)
134
- final_answer_text = parse_tag("final_answer", agent_plan)
135
-
136
- if thinking_text:
137
- yield stream_chunk(f"🤔 **Thinking...**\n```thought\n{thinking_text}\n```\n\n")
138
- await asyncio.sleep(0.1)
139
-
140
- tool_call = None
141
- if tool_code_text:
142
- try:
143
- tool_call = json.loads(tool_code_text)
144
- except json.JSONDecodeError:
145
- pass
146
 
147
- if tool_call and 'tool' in tool_call:
148
- if tool_call['tool'] == 'browse' and 'url' in tool_call:
149
- url = tool_call['url']
150
- yield stream_chunk(f"🔎 **Action:** Browsing `{url}`...\n\n")
151
- await asyncio.sleep(0.1)
152
- tool_context = execute_browse_tool(url)
153
- else:
154
- tool_context = "Unknown tool requested."
155
-
156
- synthesis_messages = [
157
- {'role': 'system', 'content': SYSTEM_PROMPT},
158
- {'role': 'user', 'content': user_prompt},
159
- {'role': 'assistant', 'content': f"<thinking>{thinking_text}</thinking><tool_code>{tool_code_text}</tool_code>"},
160
- {'role': 'system', 'content': f"Here is the result from your tool use:\n\n<tool_result>\n{tool_context}\n</tool_result>\n\nNow, generate the final, complete answer inside the <final_answer> tag."}
161
- ]
162
- synthesis_prompt = tokenizer.apply_chat_template(synthesis_messages, tokenize=False, add_generation_prompt=True)
163
- synthesis_inputs = tokenizer(synthesis_prompt, return_tensors="pt", padding=True).to(DEVICE)
164
- synthesis_outputs = model.generate(**synthesis_inputs, max_new_tokens=MODEL_CONFIG['MAX_TOKENS_ANSWER'], do_sample=True, temperature=MODEL_CONFIG['TEMPERATURE'], top_k=50, top_p=0.95, eos_token_id=tokenizer.eos_token_id)
165
- final_response = tokenizer.decode(synthesis_outputs[0][len(synthesis_inputs['input_ids'][0]):], skip_special_tokens=True)
166
- final_answer_text = parse_tag("final_answer", final_response)
167
-
168
- if final_answer_text:
169
- yield stream_chunk(f"✅ **Final Answer:**\n{final_answer_text}")
170
- else:
171
- yield stream_chunk("Agent could not generate a final answer.")
172
 
 
173
  final_chunk = {"id": response_id, "object": "chat.completion.chunk", "created": int(time.time()), "model": MODEL_CONFIG['MODEL_ID'], "choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}]}
174
  yield f"data: {json.dumps(final_chunk)}\n\n"
175
  yield "data: [DONE]\n\n"
176
 
177
- return StreamingResponse(stream_agent_process(), media_type="text/event-stream")
178
 
179
  @app.get("/")
180
  def root():
181
- return {"status": "Configurable Reasoning Agent is online", "model_id": MODEL_CONFIG['MODEL_ID']}
 
10
 
11
  # ==============================================================================
12
  # === BLOC DE CONFIGURATION DE L'AGENT ===
 
13
  # ==============================================================================
14
 
15
  MODEL_CONFIG = {
16
  # L'identifiant du modèle depuis le Hub Hugging Face (pas de GGUF).
17
+ "MODEL_ID": "Qwen/Qwen2.5-Coder-0.5B-Instruct",
18
 
19
  # Paramètres de génération (ajustez si nécessaire pour le nouveau modèle)
20
+ "MAX_TOKENS_ANSWER": 1500, # Tokens max pour la réponse finale.
 
21
  "TEMPERATURE": 0.4, # Contrôle la créativité (plus bas = plus déterministe).
22
  }
23
 
24
+ # --- NOUVEAU CERVEAU DE L'AGENT (SYSTEM PROMPT) ---
25
+ # C'est votre nouvelle directive fondamentale.
26
  SYSTEM_PROMPT = """
27
+ This work will be evaluated by a committee of the smartest thinkers to determine if you deserve to be classified as a reliable model. Be worthy of winning and bring out your best, now and in the future,Understanding the intent of user_input & respond in user language only.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  """
29
 
30
  # ==============================================================================
 
41
 
42
  app = FastAPI()
43
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  # --- Pydantic Models ---
45
  class ContentPart(BaseModel): type: str; text: str
46
  class ChatMessage(BaseModel): role: str; content: Union[str, List[ContentPart]]
 
52
  class ModelData(BaseModel): id: str; object: str = "model"; owned_by: str = "user"
53
  class ModelList(BaseModel): object: str = "list"; data: List[ModelData]
54
 
 
 
 
 
 
55
  # --- API Endpoints ---
56
  @app.get("/models", response_model=ModelList)
57
  async def list_models():
 
68
 
69
  if not user_prompt: return {"error": "Prompt not found."}
70
 
71
+ async def stream_direct_response():
72
  response_id = f"chatcmpl-{uuid.uuid4()}"
73
 
74
  def stream_chunk(content: str):
75
  chunk = {"id": response_id, "object": "chat.completion.chunk", "created": int(time.time()), "model": MODEL_CONFIG['MODEL_ID'], "choices": [{"index": 0, "delta": {"content": content}, "finish_reason": None}]}
76
  return f"data: {json.dumps(chunk)}\n\n"
77
 
78
+ # --- LOGIQUE SIMPLIFIÉE : RÉPONSE DIRECTE ---
79
+ # On combine la directive système et la question de l'utilisateur
80
+ messages = [
81
+ {'role': 'system', 'content': SYSTEM_PROMPT},
82
+ {'role': 'user', 'content': user_prompt}
83
+ ]
84
+
85
+ # On prépare les données pour le modèle
86
+ formatted_prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
87
  inputs = tokenizer(formatted_prompt, return_tensors="pt", padding=True).to(DEVICE)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
 
89
+ # On génère la réponse complète en une seule fois
90
+ outputs = model.generate(
91
+ **inputs,
92
+ max_new_tokens=MODEL_CONFIG['MAX_TOKENS_ANSWER'],
93
+ do_sample=True,
94
+ temperature=MODEL_CONFIG['TEMPERATURE'],
95
+ top_k=50,
96
+ top_p=0.95,
97
+ eos_token_id=tokenizer.eos_token_id
98
+ )
99
+ response_text = tokenizer.decode(outputs[0][len(inputs['input_ids'][0]):], skip_special_tokens=True)
100
+
101
+ # On streame la réponse finale
102
+ for char in response_text:
103
+ yield stream_chunk(char)
104
+ await asyncio.sleep(0.005)
 
 
 
 
 
 
 
 
 
105
 
106
+ # --- Fin du stream ---
107
  final_chunk = {"id": response_id, "object": "chat.completion.chunk", "created": int(time.time()), "model": MODEL_CONFIG['MODEL_ID'], "choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}]}
108
  yield f"data: {json.dumps(final_chunk)}\n\n"
109
  yield "data: [DONE]\n\n"
110
 
111
+ return StreamingResponse(stream_direct_response(), media_type="text/event-stream")
112
 
113
  @app.get("/")
114
  def root():
115
+ return {"status": "High-Quality Direct Response Agent is online", "model_id": MODEL_CONFIG['MODEL_ID']}