Spaces:

rkihacker
/

IMGTOVIDEO

Paused

App Files Files Community

rkihacker commited on Aug 11

Commit

534be3f

verified ·

1 Parent(s): f07cfab

Update main.py

Browse files

Files changed (1) hide show

main.py +74 -95

main.py CHANGED Viewed

@@ -1,27 +1,27 @@
 import os
 import time
 import uuid
-from typing import List, Optional, Literal, Any, Dict
 import httpx
 from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
-# ---------- Config (env) ----------
-HF_TOKEN = os.getenv("HF_TOKEN")  # Hugging Face API token
-UPLOAD_URL = os.getenv("UPLOAD_URL", "https://upload.snapzion.com/api/public-upload")
-UPLOAD_ACCESS_TOKEN = os.getenv("UPLOAD_ACCESS_TOKEN")  # Bearer token for your uploader
 WAN_MODEL = os.getenv("WAN_MODEL", "Wan-AI/Wan2.2-T2V-A14B")
-HF_ENDPOINT = os.getenv(
-    "HF_ENDPOINT",
-    f"https://api-inference.huggingface.co/models/{WAN_MODEL}",
-)
-# Polling settings for HF async generation
-POLL_INTERVAL_SEC = float(os.getenv("POLL_INTERVAL_SEC", "3"))
-POLL_TIMEOUT_SEC = int(os.getenv("POLL_TIMEOUT_SEC", "600"))  # 10 minutes max
-# ---------- OpenAI-compatible schemas ----------
 class ChatMessage(BaseModel):
     role: Literal["system", "user", "assistant", "tool"]
     content: str
@@ -33,7 +33,6 @@ class ChatCompletionsRequest(BaseModel):
     temperature: Optional[float] = None
     max_tokens: Optional[int] = None
     stream: Optional[bool] = False
-    # we accept arbitrary extras but ignore them
     n: Optional[int] = 1
     top_p: Optional[float] = None
     presence_penalty: Optional[float] = None
@@ -66,84 +65,67 @@ class ChatCompletionsResponse(BaseModel):
     }
-# ---------- Helpers ----------
 def extract_prompt(messages: List[ChatMessage]) -> str:
-    """
-    Basic heuristic: use the content of the last user message as the video prompt.
-    If none found, join all user contents.
-    """
-    for msg in reversed(messages):
-        if msg.role == "user" and msg.content.strip():
-            return msg.content.strip()
-    # fallback
-    user_texts = [m.content for m in messages if m.role == "user"]
     if not user_texts:
         raise HTTPException(status_code=400, detail="No user prompt provided.")
     return "\n".join(user_texts).strip()
-async def hf_text_to_video(prompt: str, client: httpx.AsyncClient) -> bytes:
-    """
-    Calls Hugging Face Inference API for text-to-video and returns raw MP4 bytes.
-    Some T2V models run asynchronously; we poll until the asset is ready.
-    """
     if not HF_TOKEN:
         raise HTTPException(status_code=500, detail="HF_TOKEN is not set.")
-    headers = {
-        "Authorization": f"Bearer {HF_TOKEN}",
-        "Accept": "*/*",
-    }
-    # Kick off generation (HF will 200 with bytes OR 202 with a status JSON)
-    start = time.time()
-    while True:
-        resp = await client.post(HF_ENDPOINT, headers=headers, json={"inputs": prompt}, timeout=None)
-        ct = resp.headers.get("content-type", "")
-        # Direct bytes path
-        if resp.status_code == 200 and ("video" in ct or "octet-stream" in ct):
-            return resp.content
-        # 202 - still processing
-        if resp.status_code in (200, 202):
-            # respect suggested wait, else our own backoff
-            await client.aclose()  # close & reopen to avoid sticky connections on HF
-            await httpx.AsyncClient().__aenter__()  # no-op to satisfy type-checkers
-            elapsed = time.time() - start
-            if elapsed > POLL_TIMEOUT_SEC:
-                raise HTTPException(status_code=504, detail="Video generation timed out.")
-            time.sleep(POLL_INTERVAL_SEC)
-            # re-create client for next loop
-            client = httpx.AsyncClient()
-            continue
-        # Any other error
-        try:
-            err = resp.json()
-        except Exception:
-            err = {"detail": resp.text}
-        raise HTTPException(status_code=502, detail=f"HF error: {err}")
-async def upload_video_bytes(mp4_bytes: bytes, client: httpx.AsyncClient) -> str:
-    """
-    Uploads the MP4 to your uploader service and returns the public URL.
-    """
     if not UPLOAD_ACCESS_TOKEN:
         raise HTTPException(status_code=500, detail="UPLOAD_ACCESS_TOKEN is not set.")
-    files = {
-        "file": ("video.mp4", mp4_bytes, "video/mp4"),
-    }
-    headers = {
-        "Authorization": f"Bearer {UPLOAD_ACCESS_TOKEN}",
-    }
-    resp = await client.post(UPLOAD_URL, headers=headers, files=files, timeout=None)
     if resp.status_code >= 400:
         raise HTTPException(status_code=502, detail=f"Upload failed: {resp.text}")
     data = resp.json()
-    # Try common field names; adapt if your uploader returns a different shape
     url = (
         data.get("url")
         or data.get("fileUrl")
@@ -151,38 +133,35 @@ async def upload_video_bytes(mp4_bytes: bytes, client: httpx.AsyncClient) -> str
         or data.get("data", {}).get("url")
     )
     if not url:
-        # last resort: return whole payload for debugging
         raise HTTPException(status_code=502, detail=f"Upload response missing URL: {data}")
     return url
-# ---------- FastAPI app ----------
-app = FastAPI(title="OpenAI-Compatible T2V Proxy")
 @app.post("/v1/chat/completions", response_model=ChatCompletionsResponse)
 async def chat_completions(req: ChatCompletionsRequest):
     """
     OpenAI-compatible endpoint:
-      - takes chat messages
-      - generates a video from the last user message
-      - uploads it
-      - returns the link in assistant message content
     """
     prompt = extract_prompt(req.messages)
-    async with httpx.AsyncClient() as client:
-        mp4 = await hf_text_to_video(prompt, client)
-        video_url = await upload_video_bytes(mp4, client)
     now = int(time.time())
     completion_id = f"chatcmpl-{uuid.uuid4().hex}"
-    content = (
-        f"✅ Video generated & uploaded.\n"
-        f"**Prompt:** {prompt}\n"
-        f"**URL:** {video_url}"
-    )
     return ChatCompletionsResponse(
         id=completion_id,

 import os
 import time
 import uuid
+from typing import List, Optional, Literal, Any, Dict, Union
 import httpx
 from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
+from huggingface_hub import InferenceClient
+import asyncio
+# ---------------- Config (env) ----------------
+HF_TOKEN = os.getenv("HF_TOKEN")  # Hugging Face token (works for provider=fal-ai)
 WAN_MODEL = os.getenv("WAN_MODEL", "Wan-AI/Wan2.2-T2V-A14B")
+UPLOAD_URL = os.getenv("UPLOAD_URL", "https://upload.snapzion.com/api/public-upload")
+UPLOAD_ACCESS_TOKEN = os.getenv("UPLOAD_ACCESS_TOKEN")  # your bearer token
+# Optional tuning
+GEN_TIMEOUT_SEC = int(os.getenv("GEN_TIMEOUT_SEC", "900"))  # 15 min generation ceiling
+# ---------------- OpenAI-compatible schemas ----------------
 class ChatMessage(BaseModel):
     role: Literal["system", "user", "assistant", "tool"]
     content: str
     temperature: Optional[float] = None
     max_tokens: Optional[int] = None
     stream: Optional[bool] = False
     n: Optional[int] = 1
     top_p: Optional[float] = None
     presence_penalty: Optional[float] = None
     }
+# ---------------- Helpers ----------------
 def extract_prompt(messages: List[ChatMessage]) -> str:
+    """Use the last user message as the prompt. Fallback to joining all user messages."""
+    for m in reversed(messages):
+        if m.role == "user" and m.content and m.content.strip():
+            return m.content.strip()
+    user_texts = [m.content for m in messages if m.role == "user" and m.content]
     if not user_texts:
         raise HTTPException(status_code=400, detail="No user prompt provided.")
     return "\n".join(user_texts).strip()
+async def generate_video_bytes(prompt: str) -> bytes:
+    """Calls huggingface_hub.InferenceClient with provider='fal-ai' (Wan T2V) and returns MP4 bytes."""
     if not HF_TOKEN:
         raise HTTPException(status_code=500, detail="HF_TOKEN is not set.")
+    client = InferenceClient(provider="fal-ai", api_key=HF_TOKEN)
+    def _sync_generate() -> Union[bytes, Dict[str, Any]]:
+        # mirrors your Python example:
+        # video = client.text_to_video("prompt", model="Wan-AI/Wan2.2-T2V-A14B")
+        return client.text_to_video(prompt, model=WAN_MODEL)
+    try:
+        result = await asyncio.wait_for(
+            asyncio.get_event_loop().run_in_executor(None, _sync_generate),
+            timeout=GEN_TIMEOUT_SEC,
+        )
+    except asyncio.TimeoutError:
+        raise HTTPException(status_code=504, detail="Video generation timed out.")
+    except Exception as e:
+        raise HTTPException(status_code=502, detail=f"Video generation failed: {e}")
+    # fal-ai provider typically returns a dict with "video": bytes; sometimes raw bytes
+    if isinstance(result, (bytes, bytearray)):
+        return bytes(result)
+    if isinstance(result, dict):
+        # common keys: "video" (bytes), "seed", etc.
+        vid = result.get("video")
+        if isinstance(vid, (bytes, bytearray)):
+            return bytes(vid)
+    raise HTTPException(status_code=502, detail=f"Unexpected generation result: {type(result)}")
+async def upload_video_bytes(mp4_bytes: bytes) -> str:
+    """Uploads MP4 to Snapzion uploader and returns public URL."""
     if not UPLOAD_ACCESS_TOKEN:
         raise HTTPException(status_code=500, detail="UPLOAD_ACCESS_TOKEN is not set.")
+    headers = {"Authorization": f"Bearer {UPLOAD_ACCESS_TOKEN}"}
+    files = {"file": ("video.mp4", mp4_bytes, "video/mp4")}
+    async with httpx.AsyncClient(timeout=None) as client:
+        resp = await client.post(UPLOAD_URL, headers=headers, files=files)
     if resp.status_code >= 400:
         raise HTTPException(status_code=502, detail=f"Upload failed: {resp.text}")
     data = resp.json()
+    # Try common URL fields (adjust if your API returns a different shape)
     url = (
         data.get("url")
         or data.get("fileUrl")
         or data.get("data", {}).get("url")
     )
     if not url:
         raise HTTPException(status_code=502, detail=f"Upload response missing URL: {data}")
     return url
+# ---------------- FastAPI app ----------------
+app = FastAPI(title="OpenAI-Compatible T2V Proxy (FAL via HF)")
+@app.get("/health")
+async def health():
+    return {"status": "ok", "model": WAN_MODEL}
 @app.post("/v1/chat/completions", response_model=ChatCompletionsResponse)
 async def chat_completions(req: ChatCompletionsRequest):
     """
     OpenAI-compatible endpoint:
+      - reads last user message as the T2V prompt
+      - generates a video with Wan-AI/Wan2.2-T2V-A14B via provider='fal-ai'
+      - uploads to your uploader
+      - returns the public URL inside the assistant message
     """
     prompt = extract_prompt(req.messages)
+    mp4 = await generate_video_bytes(prompt)
+    video_url = await upload_video_bytes(mp4)
     now = int(time.time())
     completion_id = f"chatcmpl-{uuid.uuid4().hex}"
+    content = f"✅ Video generated & uploaded.\n**Prompt:** {prompt}\n**URL:** {video_url}"
     return ChatCompletionsResponse(
         id=completion_id,