Spaces:

spacesedan
/

summarizer

Running

spacesedan commited on Mar 26

Commit

ed4c020

1 Parent(s): a93595a

wth

Files changed (1) hide show

app.py CHANGED Viewed

@@ -3,7 +3,6 @@ from pydantic import BaseModel
 from transformers import pipeline, AutoTokenizer
 from typing import List
 import logging
-import torch
 app = FastAPI()
@@ -56,16 +55,14 @@ async def summarize_batch(request: BatchSummarizationRequest):
         all_chunks.extend(chunks)
         chunk_map.extend([item.content_id] * len(chunks))
-    # Enforce token limit using tensor shape
     safe_chunks = []
     for chunk in all_chunks:
-        inputs = tokenizer(chunk, return_tensors="pt", truncation=False)
-        token_length = inputs["input_ids"].shape[1]
-        if token_length > MAX_MODEL_TOKENS:
-            logger.warning(f"[TRUNCATING] Chunk token length {token_length} > {MAX_MODEL_TOKENS}, truncating.")
-            inputs = tokenizer(chunk, return_tensors="pt", truncation=True, max_length=MAX_MODEL_TOKENS)
-            chunk = tokenizer.decode(inputs["input_ids"][0], skip_special_tokens=True)
-        safe_chunks.append(chunk)
     summaries = summarizer(
         safe_chunks,

 from transformers import pipeline, AutoTokenizer
 from typing import List
 import logging
 app = FastAPI()
         all_chunks.extend(chunks)
         chunk_map.extend([item.content_id] * len(chunks))
+    # Hard-truncate chunks during encoding and decode safely
     safe_chunks = []
     for chunk in all_chunks:
+        encoded = tokenizer.encode(chunk, truncation=True, max_length=MAX_MODEL_TOKENS)
+        if len(encoded) >= MAX_MODEL_TOKENS:
+            logger.warning(f"[TRUNCATING] Chunk encoded to {len(encoded)} tokens, trimming to {MAX_MODEL_TOKENS}.")
+        decoded = tokenizer.decode(encoded, skip_special_tokens=True)
+        safe_chunks.append(decoded)
     summaries = summarizer(
         safe_chunks,