File size: 1,905 Bytes
3d99217
 
 
49a53d3
bf808a4
cec4ed4
c07963e
01728a1
c07963e
a166f8e
c07963e
dc09052
 
 
 
c07963e
 
 
cec4ed4
01728a1
a166f8e
cec4ed4
bf808a4
c07963e
 
5034014
 
c07963e
cec4ed4
3d99217
 
 
 
 
c07963e
3d99217
 
 
c07963e
 
3d99217
c07963e
 
 
3d99217
 
 
c07963e
 
a166f8e
c07963e
 
 
a166f8e
9da1804
49a53d3
a166f8e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
from fastapi import FastAPI
from pydantic import BaseModel
from transformers import pipeline, AutoTokenizer
import nltk
import os
import uvicorn
import time

from chunker import chunk_by_token_limit

# Setup NLTK directory
NLTK_DATA_DIR = "/app/nltk_data"
os.makedirs(NLTK_DATA_DIR, exist_ok=True)
nltk.data.path.append(NLTK_DATA_DIR)

print("πŸ“¦ Downloading NLTK 'punkt' tokenizer...")
nltk.download("punkt", download_dir=NLTK_DATA_DIR, quiet=True)

app = FastAPI()

HF_AUTH_TOKEN = os.getenv("HF_TOKEN")

MODEL_NAME = "VincentMuriuki/legal-summarizer"
print(f"πŸš€ Loading summarization pipeline: {MODEL_NAME}")
start_model_load = time.time()
summarizer = pipeline("summarization", model=MODEL_NAME, token=HF_AUTH_TOKEN)
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=HF_AUTH_TOKEN)
print(f"βœ… Model loaded in {time.time() - start_model_load:.2f}s")

class SummarizeInput(BaseModel):
    text: str

class ChunkInput(BaseModel):
    text: str
    max_tokens: int = 1024

@app.post("/summarize")
def summarize_text(data: SummarizeInput):
    print("πŸ“₯ Received summarize request.")
    start = time.time()
    summary = summarizer(data.text, max_length=150, min_length=30, do_sample=False)
    duration = time.time() - start
    print(f"πŸ“ Summary generated in {duration:.2f}s.")
    return {"summary": summary[0]["summary_text"], "time_taken": f"{duration:.2f}s"}

@app.post("/chunk")
def chunk_text(data: ChunkInput):
    print(f"πŸ“₯ Received chunking request with max_tokens={data.max_tokens}")
    start = time.time()
    chunks = chunk_by_token_limit(data.text, data.max_tokens, tokenizer)
    duration = time.time() - start
    print(f"πŸ”– Chunking completed in {duration:.2f}s. Total chunks: {len(chunks)}")
    return {"chunks": chunks, "chunk_count": len(chunks), "time_taken": f"{duration:.2f}s"}

if __name__ == "__main__":
    uvicorn.run(app, host="0.0.0.0", port=7860)