Spaces:
Running
Running
from fastapi import FastAPI | |
from pydantic import BaseModel | |
from transformers import pipeline, AutoTokenizer | |
app = FastAPI() | |
# Faster and lighter summarization model | |
model_name = "sshleifer/distilbart-cnn-12-6" | |
summarizer = pipeline("summarization", model=model_name) | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
class SummarizationRequest(BaseModel): | |
inputs: str | |
class SummarizationResponse(BaseModel): | |
summary: str | |
def chunk_text(text, max_tokens=700): | |
tokens = tokenizer.encode(text, truncation=False) | |
chunks = [] | |
for i in range(0, len(tokens), max_tokens): | |
chunk = tokens[i:i + max_tokens] | |
chunks.append(tokenizer.decode(chunk, skip_special_tokens=True)) | |
return chunks | |
async def summarize_text(request: SummarizationRequest): | |
chunks = chunk_text(request.inputs) | |
summaries = summarizer( | |
chunks, | |
max_length=150, | |
min_length=30, | |
truncation=True, | |
do_sample=False, | |
batch_size=4 # Adjust batch size according to CPU capability | |
) | |
final_summary = " ".join([summary["summary_text"] for summary in summaries]) | |
return {"summary": final_summary} | |
def greet_json(): | |
return {"message": "DistilBART Summarizer API is running"} | |