Spaces:

spacesedan
/

summarizer

Running

spacesedan commited on Mar 26

Commit

a93595a

1 Parent(s): eb54abc

ai on ai crime

Files changed (1) hide show

app.py CHANGED Viewed

@@ -3,6 +3,7 @@ from pydantic import BaseModel
 from transformers import pipeline, AutoTokenizer
 from typing import List
 import logging
 app = FastAPI()
@@ -55,14 +56,16 @@ async def summarize_batch(request: BatchSummarizationRequest):
         all_chunks.extend(chunks)
         chunk_map.extend([item.content_id] * len(chunks))
-    # Final safety pass to enforce 1024 token limit after decoding
     safe_chunks = []
     for chunk in all_chunks:
-        encoded = tokenizer.encode(chunk, truncation=False)
-        if len(encoded) > MAX_MODEL_TOKENS:
-            logger.warning(f"[TRUNCATING] Chunk exceeded max tokens ({len(encoded)}), trimming to {MAX_MODEL_TOKENS} tokens")
-            encoded = encoded[:MAX_MODEL_TOKENS]
-        safe_chunks.append(tokenizer.decode(encoded, skip_special_tokens=True))
     summaries = summarizer(
         safe_chunks,

 from transformers import pipeline, AutoTokenizer
 from typing import List
 import logging
+import torch
 app = FastAPI()
         all_chunks.extend(chunks)
         chunk_map.extend([item.content_id] * len(chunks))
+    # Enforce token limit using tensor shape
     safe_chunks = []
     for chunk in all_chunks:
+        inputs = tokenizer(chunk, return_tensors="pt", truncation=False)
+        token_length = inputs["input_ids"].shape[1]
+        if token_length > MAX_MODEL_TOKENS:
+            logger.warning(f"[TRUNCATING] Chunk token length {token_length} > {MAX_MODEL_TOKENS}, truncating.")
+            inputs = tokenizer(chunk, return_tensors="pt", truncation=True, max_length=MAX_MODEL_TOKENS)
+            chunk = tokenizer.decode(inputs["input_ids"][0], skip_special_tokens=True)
+        safe_chunks.append(chunk)
     summaries = summarizer(
         safe_chunks,