Spaces:

AIMaster7
/

Mono

Running

App Files Files Community

AIMaster7 commited on Jul 4

Commit

b5ad5b3

verified ·

1 Parent(s): 1fc362c

Update main.py

Browse files

Files changed (1) hide show

main.py +172 -128

main.py CHANGED Viewed

@@ -4,7 +4,7 @@ import os
 import secrets
 import string
 import time
-from typing import List, Optional, Union, Any
 import httpx
 from dotenv import load_dotenv
 from fastapi import FastAPI
@@ -52,9 +52,20 @@ async def list_models():
     return {"object": "list", "data": AVAILABLE_MODELS}
 # === Chat Completion ===
 class Message(BaseModel):
     role: str
-    content: str
 class ChatRequest(BaseModel):
     messages: List[Message]
@@ -76,189 +87,222 @@ async def chat_completion(request: ChatRequest):
         'referer': 'https://www.chatwithmono.xyz/',
         'user-agent': 'Mozilla/5.0',
     }
     if request.tools:
-        # Handle tool by giving in system prompt.
-        # Tool call must be encoded in <tool_call><tool_call> XML tag.
-        tool_prompt = f"""You have access to the following tools . To call a tool, please respond with JSON for a tool call within <tool_call><tool_call> XML tag. Respond in the format {{"name": tool name, "parameters": dictionary of argument name and its value}}. Do not use variables.
-Tools:
-{";".join(f"<tool>{tool}</tool>" for tool in request.tools)}
-Response Format for tool call:
-For each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:
-<tool_call>
-{{"name": <function-name>, "arguments": <args-json-object>}}
-</tool_call>
-Example of tool calling:
-<tool_call>
-{{"name": "get_weather", "parameters": {{"city": "New York"}}}}
-</tool_call>
-Using tools is recommended.
-        """
-        if request.messages[0].role == "system":
             request.messages[0].content += "\n\n" + tool_prompt
         else:
-            request.messages.insert(0, {"role": "system", "content": tool_prompt})
     request_data = request.model_dump(exclude_unset=True)
     payload = {
         "messages": request_data["messages"],
         "model": model_id
     }
     if request.stream:
         async def event_stream():
             created = int(time.time())
             is_first_chunk = True
             usage_info = None
-            is_tool_call = False
-            chunks_buffer = []
-            max_initial_chunks = 4  # Number of initial chunks to buffer
             try:
                 async with httpx.AsyncClient(timeout=120) as client:
-                    async with client.stream("POST", "https://www.chatwithmono.xyz/api/chat", headers=headers, json=payload) as response:
                         response.raise_for_status()
                         async for line in response.aiter_lines():
-                            if not line: continue
                             if line.startswith("0:"):
                                 try:
                                     content_piece = json.loads(line[2:])
-                                    print(content_piece)
-                                    # Buffer the first few chunks
-                                    if len(chunks_buffer) < max_initial_chunks:
-                                        chunks_buffer.append(content_piece)
-                                        continue
-                                    # Process the buffered chunks if we haven't already
-                                    if chunks_buffer and not is_tool_call:
-                                        full_buffer = ''.join(chunks_buffer)
-                                        if "<tool_call>" in full_buffer:
-                                            print("Tool call detected")
-                                            is_tool_call = True
-                                    # Process the current chunk
-                                    if is_tool_call:
-                                        chunks_buffer.append(content_piece)
-                                        full_buffer = ''.join(chunks_buffer)
-                                        if "</tool_call>" in full_buffer:
-                                            print("Tool call End detected")
-                                            # Process tool call in the current chunk
-                                            tool_call_str = full_buffer.split("<tool_call>")[1].split("</tool_call>")[0]
-                                            tool_call_json = json.loads(tool_call_str.strip())
-                                            delta = {
-                                                "content": None,
-                                                "tool_calls": [{
-                                                    "index": 0,
-                                                    "id": generate_random_id("call_"),
-                                                    "type": "function",
-                                                    "function": {
-                                                        "name": tool_call_json["name"],
-                                                        "arguments": json.dumps(tool_call_json["parameters"])
-                                                    }
-                                                }]
-                                            }
-                                            chunk_data = {
-                                                "id": chat_id, "object": "chat.completion.chunk", "created": created,
-                                                "model": model_id,
-                                                "choices": [{"index": 0, "delta": delta, "finish_reason": None}],
-                                                "usage": None
-                                            }
-                                            yield f"data: {json.dumps(chunk_data)}\n\n"
                                         else:
                                             continue
                                     else:
                                         # Regular content
                                         if is_first_chunk:
-                                            delta = {"content": "".join(chunks_buffer), "tool_calls": None}
                                             delta["role"] = "assistant"
                                             is_first_chunk = False
-                                            chunk_data = {
-                                            "id": chat_id, "object": "chat.completion.chunk", "created": created,
-                                            "model": model_id,
-                                            "choices": [{"index": 0, "delta": delta, "finish_reason": None}],
-                                            "usage": None
-                                            }
-                                            yield f"data: {json.dumps(chunk_data)}\n\n"
-                                        delta = {"content": content_piece, "tool_calls": None}
                                         chunk_data = {
-                                            "id": chat_id, "object": "chat.completion.chunk", "created": created,
                                             "model": model_id,
                                             "choices": [{"index": 0, "delta": delta, "finish_reason": None}],
                                             "usage": None
                                         }
                                         yield f"data: {json.dumps(chunk_data)}\n\n"
-                                except json.JSONDecodeError: continue
                             elif line.startswith(("e:", "d:")):
                                 try:
                                     usage_info = json.loads(line[2:]).get("usage")
-                                except (json.JSONDecodeError, AttributeError): pass
                                 break
-                        final_usage = None
-                        if usage_info:
-                            prompt_tokens = usage_info.get("promptTokens", 0)
-                            completion_tokens = usage_info.get("completionTokens", 0)
-                            final_usage = {
-                                "prompt_tokens": prompt_tokens, "completion_tokens": completion_tokens,
-                                "total_tokens": prompt_tokens + completion_tokens,
-                            }
                         done_chunk = {
-                            "id": chat_id, "object": "chat.completion.chunk", "created": created, "model": model_id,
                             "choices": [{
                                 "index": 0,
-                                "delta": {"role": "assistant", "content": None, "function_call": None, "tool_calls": None},
                                 "finish_reason": "stop"
                             }],
-                            "usage": final_usage
                         }
                         yield f"data: {json.dumps(done_chunk)}\n\n"
             except httpx.HTTPStatusError as e:
                 error_content = {
                     "error": {
-                        "message": f"Upstream API error: {e.response.status_code}. Details: {e.response.text}",
-                        "type": "upstream_error", "code": str(e.response.status_code)
                     }
                 }
                 yield f"data: {json.dumps(error_content)}\n\n"
-            finally:
                 yield "data: [DONE]\n\n"
         return StreamingResponse(event_stream(), media_type="text/event-stream")
-    else: # Non-streaming
-        assistant_response, usage_info = "", {}
-        tool_call_json = None
         try:
             async with httpx.AsyncClient(timeout=120) as client:
-                async with client.stream("POST", "https://www.chatwithmono.xyz/api/chat", headers=headers, json=payload) as response:
-                    response.raise_for_status()
-                    async for chunk in response.aiter_lines():
-                        if chunk.startswith("0:"):
-                            try: assistant_response += json.loads(chunk[2:])
-                            except: continue
-                        elif chunk.startswith(("e:", "d:")):
-                            try: usage_info = json.loads(chunk[2:]).get("usage", {})
-                            except: continue
-            if "<tool_call>" in assistant_response and "</tool_call>" in assistant_response:
-                tool_call_str = assistant_response.split("<tool_call>")[1].split("</tool_call>")[0]
-                tool_call_json = json.loads(tool_call_str.strip())
-            return JSONResponse(content={
-                "id": chat_id, "object": "chat.completion", "created": int(time.time()), "model": model_id,
-                "choices": [{"index": 0, "message": {"role": "assistant", "content": assistant_response if tool_call_json is None else None, "tool_calls": tool_call_json}, "finish_reason": "stop"}],
-                "usage": {
-                    "prompt_tokens": usage_info.get("promptTokens", 0),
-                    "completion_tokens": usage_info.get("completionTokens", 0),
-                    "total_tokens": usage_info.get("promptTokens", 0) + usage_info.get("completionTokens", 0),
-                }
-            })
         except httpx.HTTPStatusError as e:
-            return JSONResponse(status_code=e.response.status_code, content={"error": {"message": f"Upstream API error. Details: {e.response.text}", "type": "upstream_error"}})
 # === Image Generation ===
 class ImageGenerationRequest(BaseModel):
@@ -376,4 +420,4 @@ async def create_moderation(request: ModerationRequest):
 # --- Main Execution ---
 if __name__ == "__main__":
     import uvicorn
-    uvicorn.run(app, host="0.0.0.0", port=8000)

 import secrets
 import string
 import time
+from typing import List, Optional, Union, Any, Literal
 import httpx
 from dotenv import load_dotenv
 from fastapi import FastAPI
     return {"object": "list", "data": AVAILABLE_MODELS}
 # === Chat Completion ===
+class FunctionCall(BaseModel):
+    name: str
+    arguments: str
+class ToolCall(BaseModel):
+    id: str
+    type: Literal["function"] = "function"
+    function: FunctionCall
 class Message(BaseModel):
     role: str
+    content: Optional[str] = None
+    tool_calls: Optional[List[ToolCall]] = None
+    name: Optional[str] = None
 class ChatRequest(BaseModel):
     messages: List[Message]
         'referer': 'https://www.chatwithmono.xyz/',
         'user-agent': 'Mozilla/5.0',
     }
     if request.tools:
+        tool_prompt = """You have access to tools. To call a tool, respond with JSON within <tool_call><tool_call> XML tags.
+Format: <tool_call>{"name":<name>,"parameters":{...}}</tool_call>"""
+        if request.messages and request.messages[0].role == "system":
             request.messages[0].content += "\n\n" + tool_prompt
         else:
+            request.messages.insert(0, Message(role="system", content=tool_prompt))
     request_data = request.model_dump(exclude_unset=True)
     payload = {
         "messages": request_data["messages"],
         "model": model_id
     }
     if request.stream:
         async def event_stream():
             created = int(time.time())
             is_first_chunk = True
             usage_info = None
+            tool_call_buffer = ""
+            in_tool_call = False
             try:
                 async with httpx.AsyncClient(timeout=120) as client:
+                    async with client.stream("POST", "https://www.chatwithmono.xyz/api/chat",
+                                            headers=headers, json=payload) as response:
                         response.raise_for_status()
                         async for line in response.aiter_lines():
+                            if not line:
+                                continue
                             if line.startswith("0:"):
                                 try:
                                     content_piece = json.loads(line[2:])
+                                    # Check for tool call tags
+                                    if not in_tool_call and "<tool_call>" in content_piece:
+                                        in_tool_call = True
+                                        tool_call_buffer = ""
+                                    if in_tool_call:
+                                        tool_call_buffer += content_piece
+                                        if "</tool_call>" in tool_call_buffer:
+                                            # Process complete tool call
+                                            try:
+                                                # Extract tool call content
+                                                start_idx = tool_call_buffer.find("<tool_call>") + len("<tool_call>")
+                                                end_idx = tool_call_buffer.find("</tool_call>")
+                                                tool_call_str = tool_call_buffer[start_idx:end_idx].strip()
+                                                tool_call_json = json.loads(tool_call_str)
+                                                delta = {
+                                                    "content": None,
+                                                    "tool_calls": [{
+                                                        "index": 0,
+                                                        "id": generate_random_id("call_"),
+                                                        "type": "function",
+                                                        "function": {
+                                                            "name": tool_call_json["name"],
+                                                            "arguments": json.dumps(tool_call_json["parameters"])
+                                                        }
+                                                    }]
+                                                }
+                                                chunk_data = {
+                                                    "id": chat_id,
+                                                    "object": "chat.completion.chunk",
+                                                    "created": created,
+                                                    "model": model_id,
+                                                    "choices": [{"index": 0, "delta": delta, "finish_reason": None}],
+                                                    "usage": None
+                                                }
+                                                yield f"data: {json.dumps(chunk_data)}\n\n"
+                                                in_tool_call = False
+                                                tool_call_buffer = ""
+                                            except (json.JSONDecodeError, KeyError):
+                                                # Fallback to regular content if parsing fails
+                                                in_tool_call = False
+                                                tool_call_buffer = ""
                                         else:
+                                            # Still building tool call - skip sending this chunk
                                             continue
                                     else:
                                         # Regular content
+                                        delta = {"content": content_piece}
                                         if is_first_chunk:
                                             delta["role"] = "assistant"
                                             is_first_chunk = False
                                         chunk_data = {
+                                            "id": chat_id,
+                                            "object": "chat.completion.chunk",
+                                            "created": created,
                                             "model": model_id,
                                             "choices": [{"index": 0, "delta": delta, "finish_reason": None}],
                                             "usage": None
                                         }
                                         yield f"data: {json.dumps(chunk_data)}\n\n"
+                                except json.JSONDecodeError:
+                                    continue
                             elif line.startswith(("e:", "d:")):
                                 try:
                                     usage_info = json.loads(line[2:]).get("usage")
+                                except (json.JSONDecodeError, AttributeError):
+                                    pass
                                 break
+                        # Final chunk
                         done_chunk = {
+                            "id": chat_id,
+                            "object": "chat.completion.chunk",
+                            "created": created,
+                            "model": model_id,
                             "choices": [{
                                 "index": 0,
+                                "delta": {},
                                 "finish_reason": "stop"
                             }],
+                            "usage": usage_info
                         }
                         yield f"data: {json.dumps(done_chunk)}\n\n"
+                        yield "data: [DONE]\n\n"
             except httpx.HTTPStatusError as e:
                 error_content = {
                     "error": {
+                        "message": f"Upstream API error: {e.response.status_code}",
+                        "type": "upstream_error",
+                        "code": str(e.response.status_code)
                     }
                 }
                 yield f"data: {json.dumps(error_content)}\n\n"
                 yield "data: [DONE]\n\n"
         return StreamingResponse(event_stream(), media_type="text/event-stream")
+    else:  # Non-streaming
         try:
             async with httpx.AsyncClient(timeout=120) as client:
+                response = await client.post(
+                    "https://www.chatwithmono.xyz/api/chat",
+                    headers=headers,
+                    json=payload
+                )
+                response.raise_for_status()
+                assistant_response = ""
+                usage_info = {}
+                for line in response.text.splitlines():
+                    if line.startswith("0:"):
+                        try:
+                            assistant_response += json.loads(line[2:])
+                        except json.JSONDecodeError:
+                            continue
+                    elif line.startswith(("e:", "d:")):
+                        try:
+                            usage_info = json.loads(line[2:]).get("usage", {})
+                        except json.JSONDecodeError:
+                            continue
+                tool_calls = None
+                if "<tool_call>" in assistant_response and "</tool_call>" in assistant_response:
+                    try:
+                        # Extract tool call content
+                        start_idx = assistant_response.find("<tool_call>") + len("<tool_call>")
+                        end_idx = assistant_response.find("</tool_call>")
+                        tool_call_str = assistant_response[start_idx:end_idx].strip()
+                        tool_call_json = json.loads(tool_call_str)
+                        tool_calls = [{
+                            "id": generate_random_id("call_"),
+                            "type": "function",
+                            "function": {
+                                "name": tool_call_json["name"],
+                                "arguments": json.dumps(tool_call_json["parameters"])
+                            }
+                        }]
+                        # Clear content for tool call response
+                        assistant_response = None
+                    except (json.JSONDecodeError, KeyError):
+                        # If parsing fails, treat as regular content
+                        tool_calls = None
+                return JSONResponse(content={
+                    "id": chat_id,
+                    "object": "chat.completion",
+                    "created": int(time.time()),
+                    "model": model_id,
+                    "choices": [{
+                        "index": 0,
+                        "message": {
+                            "role": "assistant",
+                            "content": assistant_response,
+                            "tool_calls": tool_calls
+                        },
+                        "finish_reason": "stop"
+                    }],
+                    "usage": {
+                        "prompt_tokens": usage_info.get("promptTokens", 0),
+                        "completion_tokens": usage_info.get("completionTokens", 0),
+                        "total_tokens": usage_info.get("promptTokens", 0) + usage_info.get("completionTokens", 0),
+                    }
+                })
         except httpx.HTTPStatusError as e:
+            return JSONResponse(
+                status_code=e.response.status_code,
+                content={
+                    "error": {
+                        "message": f"Upstream API error: {e.response.status_code}",
+                        "type": "upstream_error",
+                        "code": str(e.response.status_code)
+                    }
+                }
+            )
 # === Image Generation ===
 class ImageGenerationRequest(BaseModel):
 # --- Main Execution ---
 if __name__ == "__main__":
     import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=8000)