Spaces:
Running
Running
from fastapi import FastAPI | |
from pydantic import BaseModel | |
from transformers import pipeline, AutoTokenizer | |
app = FastAPI() | |
model_name = "facebook/bart-large-cnn" | |
summarizer = pipeline("summarization", model=model_name) | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
class SummarizationRequest(BaseModel): | |
inputs: str | |
class SummarizationResponse(BaseModel): | |
summary: str | |
def chunk_text(text, max_tokens=900): | |
tokens = tokenizer.encode(text, truncation=False) | |
chunks = [] | |
for i in range(0, len(tokens), max_tokens): | |
chunk = tokens[i:i + max_tokens] | |
chunks.append(tokenizer.decode(chunk, skip_special_tokens=True)) | |
return chunks | |
async def summarize_text(request: SummarizationRequest): | |
chunks = chunk_text(request.inputs) | |
summaries = [] | |
for chunk in chunks: | |
input_length = len(chunk.split()) | |
max_length = min(250, max(100, int(input_length * 0.4))) | |
min_length = min(100, max(50, int(input_length * 0.2))) | |
summary = summarizer(chunk, max_length=max_length, min_length=min_length, do_sample=False) | |
summaries.append(summary[0]["summary_text"]) | |
final_summary = " ".join(summaries) | |
return {"summary": final_summary} | |
def greet_json(): | |
return {"message": "BART Summarizer API is running"} | |