Spaces:
Running
Running
File size: 1,330 Bytes
5dbee9b 4f95499 5dbee9b 4f95499 fc8d8ec 4f95499 5dbee9b 20dbd9d fc8d8ec 4f95499 fc8d8ec 20dbd9d 5dbee9b 4f95499 fc8d8ec 4f95499 20dbd9d fc8d8ec 5dbee9b fc8d8ec |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 |
from fastapi import FastAPI
from pydantic import BaseModel
from transformers import pipeline, AutoTokenizer
app = FastAPI()
# Faster and lighter summarization model
model_name = "sshleifer/distilbart-cnn-12-6"
summarizer = pipeline("summarization", model=model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
class SummarizationRequest(BaseModel):
inputs: str
class SummarizationResponse(BaseModel):
summary: str
def chunk_text(text, max_tokens=700):
tokens = tokenizer.encode(text, truncation=False)
chunks = []
for i in range(0, len(tokens), max_tokens):
chunk = tokens[i:i + max_tokens]
chunks.append(tokenizer.decode(chunk, skip_special_tokens=True))
return chunks
@app.post("/summarize", response_model=SummarizationResponse)
async def summarize_text(request: SummarizationRequest):
chunks = chunk_text(request.inputs)
summaries = summarizer(
chunks,
max_length=150,
min_length=30,
truncation=True,
do_sample=False,
batch_size=4 # Adjust batch size according to CPU capability
)
final_summary = " ".join([summary["summary_text"] for summary in summaries])
return {"summary": final_summary}
@app.get("/")
def greet_json():
return {"message": "DistilBART Summarizer API is running"}
|