spacesedan commited on
Commit
0dedb70
·
1 Parent(s): 204ba37

bottleneck

Browse files
Files changed (1) hide show
  1. app.py +6 -0
app.py CHANGED
@@ -2,9 +2,14 @@ from fastapi import FastAPI
2
  from pydantic import BaseModel
3
  from transformers import pipeline, AutoTokenizer
4
  from typing import List
 
5
 
6
  app = FastAPI()
7
 
 
 
 
 
8
  # Faster and lighter summarization model
9
  model_name = "sshleifer/distilbart-cnn-12-6"
10
  summarizer = pipeline("summarization", model=model_name)
@@ -48,6 +53,7 @@ async def summarize_batch(request: BatchSummarizationRequest):
48
 
49
  for item in request.inputs:
50
  chunks = chunk_text(item.text)
 
51
  all_chunks.extend(chunks)
52
  chunk_map.extend([item.content_id] * len(chunks))
53
 
 
2
  from pydantic import BaseModel
3
  from transformers import pipeline, AutoTokenizer
4
  from typing import List
5
+ import logging
6
 
7
  app = FastAPI()
8
 
9
+ # Configure logging
10
+ logging.basicConfig(level=logging.INFO)
11
+ logger = logging.getLogger("summarizer")
12
+
13
  # Faster and lighter summarization model
14
  model_name = "sshleifer/distilbart-cnn-12-6"
15
  summarizer = pipeline("summarization", model=model_name)
 
53
 
54
  for item in request.inputs:
55
  chunks = chunk_text(item.text)
56
+ logger.info(f"[CHUNKING] content_id={item.content_id} original_len={len(item.text)} num_chunks={len(chunks)}")
57
  all_chunks.extend(chunks)
58
  chunk_map.extend([item.content_id] * len(chunks))
59