File size: 1,330 Bytes
5dbee9b
 
4f95499
5dbee9b
 
4f95499
fc8d8ec
 
4f95499
 
5dbee9b
 
 
 
20dbd9d
 
 
fc8d8ec
 
4f95499
 
 
 
 
 
 
 
 
fc8d8ec
20dbd9d
5dbee9b
4f95499
fc8d8ec
 
 
 
 
 
 
 
 
 
 
 
4f95499
20dbd9d
fc8d8ec
5dbee9b
 
fc8d8ec
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
from fastapi import FastAPI
from pydantic import BaseModel
from transformers import pipeline, AutoTokenizer

app = FastAPI()

# Faster and lighter summarization model
model_name = "sshleifer/distilbart-cnn-12-6"
summarizer = pipeline("summarization", model=model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

class SummarizationRequest(BaseModel):
    inputs: str

class SummarizationResponse(BaseModel):
    summary: str


def chunk_text(text, max_tokens=700):
    tokens = tokenizer.encode(text, truncation=False)
    chunks = []

    for i in range(0, len(tokens), max_tokens):
        chunk = tokens[i:i + max_tokens]
        chunks.append(tokenizer.decode(chunk, skip_special_tokens=True))

    return chunks


@app.post("/summarize", response_model=SummarizationResponse)
async def summarize_text(request: SummarizationRequest):
    chunks = chunk_text(request.inputs)
    
    summaries = summarizer(
        chunks,
        max_length=150,
        min_length=30,
        truncation=True,
        do_sample=False,
        batch_size=4  # Adjust batch size according to CPU capability
    )

    final_summary = " ".join([summary["summary_text"] for summary in summaries])

    return {"summary": final_summary}


@app.get("/")
def greet_json():
    return {"message": "DistilBART Summarizer API is running"}