spacesedan commited on
Commit
7d6020a
·
1 Parent(s): 4f95499

more chunkinh

Browse files
Files changed (1) hide show
  1. app.py +9 -11
app.py CHANGED
@@ -14,8 +14,7 @@ class SummarizationRequest(BaseModel):
14
  class SummarizationResponse(BaseModel):
15
  summary: str
16
 
17
-
18
- def chunk_text(text, max_tokens=900):
19
  tokens = tokenizer.encode(text, truncation=False)
20
  chunks = []
21
 
@@ -25,26 +24,25 @@ def chunk_text(text, max_tokens=900):
25
 
26
  return chunks
27
 
28
-
29
  @app.post("/summarize", response_model=SummarizationResponse)
30
  async def summarize_text(request: SummarizationRequest):
31
  chunks = chunk_text(request.inputs)
32
-
33
  summaries = []
34
 
35
  for chunk in chunks:
36
- input_length = len(chunk.split())
37
- max_length = min(250, max(100, int(input_length * 0.4)))
38
- min_length = min(100, max(50, int(input_length * 0.2)))
39
-
40
- summary = summarizer(chunk, max_length=max_length, min_length=min_length, do_sample=False)
 
 
 
41
  summaries.append(summary[0]["summary_text"])
42
 
43
  final_summary = " ".join(summaries)
44
-
45
  return {"summary": final_summary}
46
 
47
-
48
  @app.get("/")
49
  def greet_json():
50
  return {"message": "BART Summarizer API is running"}
 
14
  class SummarizationResponse(BaseModel):
15
  summary: str
16
 
17
+ def chunk_text(text, max_tokens=800):
 
18
  tokens = tokenizer.encode(text, truncation=False)
19
  chunks = []
20
 
 
24
 
25
  return chunks
26
 
 
27
  @app.post("/summarize", response_model=SummarizationResponse)
28
  async def summarize_text(request: SummarizationRequest):
29
  chunks = chunk_text(request.inputs)
 
30
  summaries = []
31
 
32
  for chunk in chunks:
33
+ # Explicitly truncate inputs in pipeline
34
+ summary = summarizer(
35
+ chunk,
36
+ max_length=150, # safer summarization lengths
37
+ min_length=30,
38
+ truncation=True, # crucial addition!
39
+ do_sample=False
40
+ )
41
  summaries.append(summary[0]["summary_text"])
42
 
43
  final_summary = " ".join(summaries)
 
44
  return {"summary": final_summary}
45
 
 
46
  @app.get("/")
47
  def greet_json():
48
  return {"message": "BART Summarizer API is running"}