AIMaster7 commited on
Commit
b5ad5b3
·
verified ·
1 Parent(s): 1fc362c

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +172 -128
main.py CHANGED
@@ -4,7 +4,7 @@ import os
4
  import secrets
5
  import string
6
  import time
7
- from typing import List, Optional, Union, Any
8
  import httpx
9
  from dotenv import load_dotenv
10
  from fastapi import FastAPI
@@ -52,9 +52,20 @@ async def list_models():
52
  return {"object": "list", "data": AVAILABLE_MODELS}
53
 
54
  # === Chat Completion ===
 
 
 
 
 
 
 
 
 
55
  class Message(BaseModel):
56
  role: str
57
- content: str
 
 
58
 
59
  class ChatRequest(BaseModel):
60
  messages: List[Message]
@@ -76,189 +87,222 @@ async def chat_completion(request: ChatRequest):
76
  'referer': 'https://www.chatwithmono.xyz/',
77
  'user-agent': 'Mozilla/5.0',
78
  }
 
79
  if request.tools:
80
- # Handle tool by giving in system prompt.
81
- # Tool call must be encoded in <tool_call><tool_call> XML tag.
82
- tool_prompt = f"""You have access to the following tools . To call a tool, please respond with JSON for a tool call within <tool_call><tool_call> XML tag. Respond in the format {{"name": tool name, "parameters": dictionary of argument name and its value}}. Do not use variables.
83
- Tools:
84
- {";".join(f"<tool>{tool}</tool>" for tool in request.tools)}
85
-
86
- Response Format for tool call:
87
- For each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:
88
- <tool_call>
89
- {{"name": <function-name>, "arguments": <args-json-object>}}
90
- </tool_call>
91
-
92
- Example of tool calling:
93
- <tool_call>
94
- {{"name": "get_weather", "parameters": {{"city": "New York"}}}}
95
- </tool_call>
96
-
97
- Using tools is recommended.
98
- """
99
- if request.messages[0].role == "system":
100
  request.messages[0].content += "\n\n" + tool_prompt
101
  else:
102
- request.messages.insert(0, {"role": "system", "content": tool_prompt})
 
103
  request_data = request.model_dump(exclude_unset=True)
104
-
105
  payload = {
106
  "messages": request_data["messages"],
107
  "model": model_id
108
  }
 
109
  if request.stream:
110
  async def event_stream():
111
  created = int(time.time())
112
  is_first_chunk = True
113
  usage_info = None
114
- is_tool_call = False
115
- chunks_buffer = []
116
- max_initial_chunks = 4 # Number of initial chunks to buffer
117
  try:
118
  async with httpx.AsyncClient(timeout=120) as client:
119
- async with client.stream("POST", "https://www.chatwithmono.xyz/api/chat", headers=headers, json=payload) as response:
 
120
  response.raise_for_status()
121
  async for line in response.aiter_lines():
122
- if not line: continue
 
 
123
  if line.startswith("0:"):
124
  try:
125
  content_piece = json.loads(line[2:])
126
- print(content_piece)
127
- # Buffer the first few chunks
128
- if len(chunks_buffer) < max_initial_chunks:
129
- chunks_buffer.append(content_piece)
130
- continue
131
- # Process the buffered chunks if we haven't already
132
- if chunks_buffer and not is_tool_call:
133
- full_buffer = ''.join(chunks_buffer)
134
- if "<tool_call>" in full_buffer:
135
- print("Tool call detected")
136
- is_tool_call = True
137
-
138
- # Process the current chunk
139
- if is_tool_call:
140
- chunks_buffer.append(content_piece)
141
-
142
- full_buffer = ''.join(chunks_buffer)
143
-
144
- if "</tool_call>" in full_buffer:
145
- print("Tool call End detected")
146
- # Process tool call in the current chunk
147
- tool_call_str = full_buffer.split("<tool_call>")[1].split("</tool_call>")[0]
148
- tool_call_json = json.loads(tool_call_str.strip())
149
- delta = {
150
- "content": None,
151
- "tool_calls": [{
152
- "index": 0,
153
- "id": generate_random_id("call_"),
154
- "type": "function",
155
- "function": {
156
- "name": tool_call_json["name"],
157
- "arguments": json.dumps(tool_call_json["parameters"])
158
- }
159
- }]
160
- }
161
- chunk_data = {
162
- "id": chat_id, "object": "chat.completion.chunk", "created": created,
163
- "model": model_id,
164
- "choices": [{"index": 0, "delta": delta, "finish_reason": None}],
165
- "usage": None
166
- }
167
- yield f"data: {json.dumps(chunk_data)}\n\n"
 
 
168
  else:
 
169
  continue
170
  else:
171
-
172
  # Regular content
 
173
  if is_first_chunk:
174
- delta = {"content": "".join(chunks_buffer), "tool_calls": None}
175
  delta["role"] = "assistant"
176
  is_first_chunk = False
177
- chunk_data = {
178
- "id": chat_id, "object": "chat.completion.chunk", "created": created,
179
- "model": model_id,
180
- "choices": [{"index": 0, "delta": delta, "finish_reason": None}],
181
- "usage": None
182
- }
183
- yield f"data: {json.dumps(chunk_data)}\n\n"
184
-
185
- delta = {"content": content_piece, "tool_calls": None}
186
-
187
  chunk_data = {
188
- "id": chat_id, "object": "chat.completion.chunk", "created": created,
 
 
189
  "model": model_id,
190
  "choices": [{"index": 0, "delta": delta, "finish_reason": None}],
191
  "usage": None
192
  }
193
  yield f"data: {json.dumps(chunk_data)}\n\n"
194
- except json.JSONDecodeError: continue
 
 
 
195
  elif line.startswith(("e:", "d:")):
196
  try:
197
  usage_info = json.loads(line[2:]).get("usage")
198
- except (json.JSONDecodeError, AttributeError): pass
 
199
  break
200
-
201
- final_usage = None
202
- if usage_info:
203
- prompt_tokens = usage_info.get("promptTokens", 0)
204
- completion_tokens = usage_info.get("completionTokens", 0)
205
- final_usage = {
206
- "prompt_tokens": prompt_tokens, "completion_tokens": completion_tokens,
207
- "total_tokens": prompt_tokens + completion_tokens,
208
- }
209
  done_chunk = {
210
- "id": chat_id, "object": "chat.completion.chunk", "created": created, "model": model_id,
 
 
 
211
  "choices": [{
212
  "index": 0,
213
- "delta": {"role": "assistant", "content": None, "function_call": None, "tool_calls": None},
214
  "finish_reason": "stop"
215
  }],
216
- "usage": final_usage
217
  }
218
  yield f"data: {json.dumps(done_chunk)}\n\n"
 
 
219
  except httpx.HTTPStatusError as e:
220
  error_content = {
221
  "error": {
222
- "message": f"Upstream API error: {e.response.status_code}. Details: {e.response.text}",
223
- "type": "upstream_error", "code": str(e.response.status_code)
 
224
  }
225
  }
226
  yield f"data: {json.dumps(error_content)}\n\n"
227
- finally:
228
  yield "data: [DONE]\n\n"
 
229
  return StreamingResponse(event_stream(), media_type="text/event-stream")
230
- else: # Non-streaming
231
- assistant_response, usage_info = "", {}
232
- tool_call_json = None
233
  try:
234
  async with httpx.AsyncClient(timeout=120) as client:
235
- async with client.stream("POST", "https://www.chatwithmono.xyz/api/chat", headers=headers, json=payload) as response:
236
- response.raise_for_status()
237
- async for chunk in response.aiter_lines():
238
- if chunk.startswith("0:"):
239
- try: assistant_response += json.loads(chunk[2:])
240
- except: continue
241
- elif chunk.startswith(("e:", "d:")):
242
- try: usage_info = json.loads(chunk[2:]).get("usage", {})
243
- except: continue
 
 
 
 
 
 
 
 
 
 
 
244
 
245
- if "<tool_call>" in assistant_response and "</tool_call>" in assistant_response:
246
- tool_call_str = assistant_response.split("<tool_call>")[1].split("</tool_call>")[0]
247
- tool_call_json = json.loads(tool_call_str.strip())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
248
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
249
 
250
- return JSONResponse(content={
251
- "id": chat_id, "object": "chat.completion", "created": int(time.time()), "model": model_id,
252
- "choices": [{"index": 0, "message": {"role": "assistant", "content": assistant_response if tool_call_json is None else None, "tool_calls": tool_call_json}, "finish_reason": "stop"}],
253
- "usage": {
254
- "prompt_tokens": usage_info.get("promptTokens", 0),
255
- "completion_tokens": usage_info.get("completionTokens", 0),
256
- "total_tokens": usage_info.get("promptTokens", 0) + usage_info.get("completionTokens", 0),
257
- }
258
- })
259
  except httpx.HTTPStatusError as e:
260
- return JSONResponse(status_code=e.response.status_code, content={"error": {"message": f"Upstream API error. Details: {e.response.text}", "type": "upstream_error"}})
261
-
 
 
 
 
 
 
 
 
262
 
263
  # === Image Generation ===
264
  class ImageGenerationRequest(BaseModel):
@@ -376,4 +420,4 @@ async def create_moderation(request: ModerationRequest):
376
  # --- Main Execution ---
377
  if __name__ == "__main__":
378
  import uvicorn
379
- uvicorn.run(app, host="0.0.0.0", port=8000)
 
4
  import secrets
5
  import string
6
  import time
7
+ from typing import List, Optional, Union, Any, Literal
8
  import httpx
9
  from dotenv import load_dotenv
10
  from fastapi import FastAPI
 
52
  return {"object": "list", "data": AVAILABLE_MODELS}
53
 
54
  # === Chat Completion ===
55
+ class FunctionCall(BaseModel):
56
+ name: str
57
+ arguments: str
58
+
59
+ class ToolCall(BaseModel):
60
+ id: str
61
+ type: Literal["function"] = "function"
62
+ function: FunctionCall
63
+
64
  class Message(BaseModel):
65
  role: str
66
+ content: Optional[str] = None
67
+ tool_calls: Optional[List[ToolCall]] = None
68
+ name: Optional[str] = None
69
 
70
  class ChatRequest(BaseModel):
71
  messages: List[Message]
 
87
  'referer': 'https://www.chatwithmono.xyz/',
88
  'user-agent': 'Mozilla/5.0',
89
  }
90
+
91
  if request.tools:
92
+ tool_prompt = """You have access to tools. To call a tool, respond with JSON within <tool_call><tool_call> XML tags.
93
+ Format: <tool_call>{"name":<name>,"parameters":{...}}</tool_call>"""
94
+ if request.messages and request.messages[0].role == "system":
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
  request.messages[0].content += "\n\n" + tool_prompt
96
  else:
97
+ request.messages.insert(0, Message(role="system", content=tool_prompt))
98
+
99
  request_data = request.model_dump(exclude_unset=True)
 
100
  payload = {
101
  "messages": request_data["messages"],
102
  "model": model_id
103
  }
104
+
105
  if request.stream:
106
  async def event_stream():
107
  created = int(time.time())
108
  is_first_chunk = True
109
  usage_info = None
110
+ tool_call_buffer = ""
111
+ in_tool_call = False
112
+
113
  try:
114
  async with httpx.AsyncClient(timeout=120) as client:
115
+ async with client.stream("POST", "https://www.chatwithmono.xyz/api/chat",
116
+ headers=headers, json=payload) as response:
117
  response.raise_for_status()
118
  async for line in response.aiter_lines():
119
+ if not line:
120
+ continue
121
+
122
  if line.startswith("0:"):
123
  try:
124
  content_piece = json.loads(line[2:])
125
+
126
+ # Check for tool call tags
127
+ if not in_tool_call and "<tool_call>" in content_piece:
128
+ in_tool_call = True
129
+ tool_call_buffer = ""
130
+
131
+ if in_tool_call:
132
+ tool_call_buffer += content_piece
133
+ if "</tool_call>" in tool_call_buffer:
134
+ # Process complete tool call
135
+ try:
136
+ # Extract tool call content
137
+ start_idx = tool_call_buffer.find("<tool_call>") + len("<tool_call>")
138
+ end_idx = tool_call_buffer.find("</tool_call>")
139
+ tool_call_str = tool_call_buffer[start_idx:end_idx].strip()
140
+
141
+ tool_call_json = json.loads(tool_call_str)
142
+ delta = {
143
+ "content": None,
144
+ "tool_calls": [{
145
+ "index": 0,
146
+ "id": generate_random_id("call_"),
147
+ "type": "function",
148
+ "function": {
149
+ "name": tool_call_json["name"],
150
+ "arguments": json.dumps(tool_call_json["parameters"])
151
+ }
152
+ }]
153
+ }
154
+ chunk_data = {
155
+ "id": chat_id,
156
+ "object": "chat.completion.chunk",
157
+ "created": created,
158
+ "model": model_id,
159
+ "choices": [{"index": 0, "delta": delta, "finish_reason": None}],
160
+ "usage": None
161
+ }
162
+ yield f"data: {json.dumps(chunk_data)}\n\n"
163
+ in_tool_call = False
164
+ tool_call_buffer = ""
165
+ except (json.JSONDecodeError, KeyError):
166
+ # Fallback to regular content if parsing fails
167
+ in_tool_call = False
168
+ tool_call_buffer = ""
169
  else:
170
+ # Still building tool call - skip sending this chunk
171
  continue
172
  else:
 
173
  # Regular content
174
+ delta = {"content": content_piece}
175
  if is_first_chunk:
 
176
  delta["role"] = "assistant"
177
  is_first_chunk = False
 
 
 
 
 
 
 
 
 
 
178
  chunk_data = {
179
+ "id": chat_id,
180
+ "object": "chat.completion.chunk",
181
+ "created": created,
182
  "model": model_id,
183
  "choices": [{"index": 0, "delta": delta, "finish_reason": None}],
184
  "usage": None
185
  }
186
  yield f"data: {json.dumps(chunk_data)}\n\n"
187
+
188
+ except json.JSONDecodeError:
189
+ continue
190
+
191
  elif line.startswith(("e:", "d:")):
192
  try:
193
  usage_info = json.loads(line[2:]).get("usage")
194
+ except (json.JSONDecodeError, AttributeError):
195
+ pass
196
  break
197
+
198
+ # Final chunk
 
 
 
 
 
 
 
199
  done_chunk = {
200
+ "id": chat_id,
201
+ "object": "chat.completion.chunk",
202
+ "created": created,
203
+ "model": model_id,
204
  "choices": [{
205
  "index": 0,
206
+ "delta": {},
207
  "finish_reason": "stop"
208
  }],
209
+ "usage": usage_info
210
  }
211
  yield f"data: {json.dumps(done_chunk)}\n\n"
212
+ yield "data: [DONE]\n\n"
213
+
214
  except httpx.HTTPStatusError as e:
215
  error_content = {
216
  "error": {
217
+ "message": f"Upstream API error: {e.response.status_code}",
218
+ "type": "upstream_error",
219
+ "code": str(e.response.status_code)
220
  }
221
  }
222
  yield f"data: {json.dumps(error_content)}\n\n"
 
223
  yield "data: [DONE]\n\n"
224
+
225
  return StreamingResponse(event_stream(), media_type="text/event-stream")
226
+
227
+ else: # Non-streaming
 
228
  try:
229
  async with httpx.AsyncClient(timeout=120) as client:
230
+ response = await client.post(
231
+ "https://www.chatwithmono.xyz/api/chat",
232
+ headers=headers,
233
+ json=payload
234
+ )
235
+ response.raise_for_status()
236
+
237
+ assistant_response = ""
238
+ usage_info = {}
239
+ for line in response.text.splitlines():
240
+ if line.startswith("0:"):
241
+ try:
242
+ assistant_response += json.loads(line[2:])
243
+ except json.JSONDecodeError:
244
+ continue
245
+ elif line.startswith(("e:", "d:")):
246
+ try:
247
+ usage_info = json.loads(line[2:]).get("usage", {})
248
+ except json.JSONDecodeError:
249
+ continue
250
 
251
+ tool_calls = None
252
+ if "<tool_call>" in assistant_response and "</tool_call>" in assistant_response:
253
+ try:
254
+ # Extract tool call content
255
+ start_idx = assistant_response.find("<tool_call>") + len("<tool_call>")
256
+ end_idx = assistant_response.find("</tool_call>")
257
+ tool_call_str = assistant_response[start_idx:end_idx].strip()
258
+
259
+ tool_call_json = json.loads(tool_call_str)
260
+ tool_calls = [{
261
+ "id": generate_random_id("call_"),
262
+ "type": "function",
263
+ "function": {
264
+ "name": tool_call_json["name"],
265
+ "arguments": json.dumps(tool_call_json["parameters"])
266
+ }
267
+ }]
268
+ # Clear content for tool call response
269
+ assistant_response = None
270
+ except (json.JSONDecodeError, KeyError):
271
+ # If parsing fails, treat as regular content
272
+ tool_calls = None
273
 
274
+ return JSONResponse(content={
275
+ "id": chat_id,
276
+ "object": "chat.completion",
277
+ "created": int(time.time()),
278
+ "model": model_id,
279
+ "choices": [{
280
+ "index": 0,
281
+ "message": {
282
+ "role": "assistant",
283
+ "content": assistant_response,
284
+ "tool_calls": tool_calls
285
+ },
286
+ "finish_reason": "stop"
287
+ }],
288
+ "usage": {
289
+ "prompt_tokens": usage_info.get("promptTokens", 0),
290
+ "completion_tokens": usage_info.get("completionTokens", 0),
291
+ "total_tokens": usage_info.get("promptTokens", 0) + usage_info.get("completionTokens", 0),
292
+ }
293
+ })
294
 
 
 
 
 
 
 
 
 
 
295
  except httpx.HTTPStatusError as e:
296
+ return JSONResponse(
297
+ status_code=e.response.status_code,
298
+ content={
299
+ "error": {
300
+ "message": f"Upstream API error: {e.response.status_code}",
301
+ "type": "upstream_error",
302
+ "code": str(e.response.status_code)
303
+ }
304
+ }
305
+ )
306
 
307
  # === Image Generation ===
308
  class ImageGenerationRequest(BaseModel):
 
420
  # --- Main Execution ---
421
  if __name__ == "__main__":
422
  import uvicorn
423
+ uvicorn.run(app, host="0.0.0.0", port=8000)