Assignment_1_Testing

Sleeping

App Files Files Community

LinkLinkWu commited on Apr 30

Commit

79a38ee

verified ·

1 Parent(s): 8ac7c32

Update func.py

Browse files

Files changed (1) hide show

func.py +25 -14

func.py CHANGED Viewed

@@ -32,9 +32,10 @@ def img2text(img: Union[Image.Image, str, Path]) -> str:
         img = Image.open(img)
     return _get_captioner()(img)[0]["generated_text"]
-# Step 2.  Caption  ➜  Children’s story   (DeepSeek-R1 1.5 B)
 # -------------------------------------------------------------------
-import torch
 from transformers import pipeline
 _GEN_MODEL   = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
@@ -52,15 +53,27 @@ def _get_generator():
             "text-generation",
             model=_GEN_MODEL,
             device=0 if torch.cuda.is_available() else -1,
-            # common decoding params – can still be overridden in the call
             max_new_tokens=150,
             do_sample=True,
             top_p=0.9,
             temperature=0.8,
         )
     return _generator
 def text2story(caption: str) -> str:
     """
     Generate a ≤100-word children’s story from the image caption.
@@ -69,18 +82,16 @@ def text2story(caption: str) -> str:
         caption: scene description string.
     Returns:
-        Story text (plain string, trimmed to ≤100 words).
     """
-    prompt   = _PROMPT_TMPL.format(caption=caption)
-    gen      = _get_generator()(
-        prompt,
-        return_full_text=False   # only the completion, not the prompt
-    )[0]["generated_text"]
-    # ensure last sentence is closed
-    story = gen.strip()
-    if "." in story:
-        story = story[: story.rfind(".") + 1]
     # hard cap at 100 words
     return " ".join(story.split()[:100])

         img = Image.open(img)
     return _get_captioner()(img)[0]["generated_text"]
 # -------------------------------------------------------------------
+# Step 2.  Caption ➜ Children’s story   (DeepSeek-R1 1.5 B)
+# -------------------------------------------------------------------
+import torch, re
 from transformers import pipeline
 _GEN_MODEL   = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
             "text-generation",
             model=_GEN_MODEL,
             device=0 if torch.cuda.is_available() else -1,
             max_new_tokens=150,
             do_sample=True,
             top_p=0.9,
             temperature=0.8,
+            no_repeat_ngram_size=4,    # ← block 4-gram repeats
+            repetition_penalty=1.15    # ← soften copy-loops
         )
     return _generator
+def _dedup_sentences(text: str) -> str:
+    """Remove exact duplicate sentences while preserving order."""
+    seen, cleaned = set(), []
+    for sent in re.split(r'(?<=[.!?])\s+', text.strip()):
+        s = sent.strip()
+        if s and s not in seen:
+            cleaned.append(s)
+            seen.add(s)
+    return " ".join(cleaned)
 def text2story(caption: str) -> str:
     """
     Generate a ≤100-word children’s story from the image caption.
         caption: scene description string.
     Returns:
+        Story text (plain string, ≤100 words, no exact duplicate sentences).
     """
+    prompt = _PROMPT_TMPL.format(caption=caption)
+    raw    = _get_generator()(prompt, return_full_text=False)[0]["generated_text"]
+    story  = _dedup_sentences(raw)
+    # ensure ending punctuation
+    if story and story[-1] not in ".!?":
+        story += "."
     # hard cap at 100 words
     return " ".join(story.split()[:100])