Spaces:

WolfeLeo2
/

Studai

Sleeping

App Files Files Community

WolfeLeo2 commited on Jun 14

Commit

11a16ac

1 Parent(s): 8dbb8ee

removed gradio and route mismatch fix

Browse files

Files changed (1) hide show

app.py +23 -23

app.py CHANGED Viewed

@@ -4,8 +4,8 @@ import torch
 from transformers import T5Tokenizer, T5ForConditionalGeneration
 from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
-import gradio as gr
 from typing import Optional
 app = FastAPI()
@@ -28,8 +28,28 @@ class QuestionAnswerRequest(BaseModel):
     question: str
     context: str
 @app.post("/question-answer")
-def answer_question(request: QuestionAnswerRequest):
     try:
         input_text = f"question: {request.question} context: {request.context}"
         inputs = tokenizer(input_text, return_tensors="pt", max_length=512, truncation=True).to(device)
@@ -82,27 +102,7 @@ async def summarize(request: SummarizationRequest):
     except Exception as e:
         logger.error(f"Summarization error: {str(e)}")
         raise HTTPException(status_code=500, detail=str(e))
-# ---------- Gradio Interface ----------
-def gradio_summarize(text, max_length=150, min_length=30):
-    return summarize_text(text, max_length, min_length)
-demo = gr.Interface(
-    fn=gradio_summarize,
-    inputs=[
-        gr.Textbox(lines=10, placeholder="Enter text to summarize..."),
-        gr.Slider(minimum=50, maximum=200, value=150, step=10, label="Maximum Length"),
-        gr.Slider(minimum=10, maximum=100, value=30, step=5, label="Minimum Length")
-    ],
-    outputs="text",
-    title="Text Summarization with FLAN-T5",
-    description="This app summarizes text using Google's FLAN-T5 model."
-)
-# Mount the Gradio app at the root path
-app = gr.mount_gradio_app(app, demo, path="/")
 # ---------- Entry Point ----------
 if __name__ == "__main__":

 from transformers import T5Tokenizer, T5ForConditionalGeneration
 from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
 from typing import Optional
+from contextlib import asynccontextmanager
 app = FastAPI()
     question: str
     context: str
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    # Startup
+    global model, tokenizer
+    try:
+        logger.info(f"Loading model: {model_name}")
+        tokenizer = T5Tokenizer.from_pretrained(model_name)
+        model = T5ForConditionalGeneration.from_pretrained(model_name)
+        model.to(device)
+        logger.info(f"Model loaded on device: {device}")
+    except Exception as e:
+        logger.error(f"Failed to load model: {e}")
+        raise
+    yield
+    # Shutdown
+    if torch.cuda.is_available():
+        torch.cuda.empty_cache()
+app = FastAPI(lifespan=lifespan)
 @app.post("/question-answer")
+async def answer_question(request: QuestionAnswerRequest):
     try:
         input_text = f"question: {request.question} context: {request.context}"
         inputs = tokenizer(input_text, return_tensors="pt", max_length=512, truncation=True).to(device)
     except Exception as e:
         logger.error(f"Summarization error: {str(e)}")
         raise HTTPException(status_code=500, detail=str(e))
 # ---------- Entry Point ----------
 if __name__ == "__main__":