from fastapi import FastAPI from pydantic import BaseModel from transformers import pipeline, AutoTokenizer import uvicorn app = FastAPI() MODEL_NAME = "VincentMuriuki/legal-summarizer" summarizer = pipeline("summarization", model=MODEL_NAME) tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) class SummarizeInput(BaseModel): text: str class ChunkInput(BaseModel): text: str max_tokens: int = 512 @app.post("/summarize") def summarize_text(data: SummarizeInput): summary = summarizer(data.text, max_length=150, min_length=30, do_sample=False) return {"summary": summary[0]["summary_text"]} @app.post("/chunk") def chunk_text(data: ChunkInput): tokens = tokenizer.encode(data.text, truncation=False) chunks = [] for i in range(0, len(tokens), data.max_tokens): chunk_tokens = tokens[i:i + data.max_tokens] chunk_text = tokenizer.decode(chunk_tokens, skip_special_tokens=True) chunks.append(chunk_text.strip()) return {"chunks": chunks} if _name_ == "_main_": uvicorn.run(app, host="0.0.0.0", port=7860)