Spaces:

peterweber
/

notecraftpro-humanizer-api

Sleeping

App Files Files Community

peterweber commited on 15 days ago

Commit

0a9384a

verified ·

1 Parent(s): faae576

Update app.py

Browse files

Files changed (1) hide show

app.py +51 -42

app.py CHANGED Viewed

@@ -1,25 +1,41 @@
-import os, re, difflib
-from typing import List
 import gradio as gr
 from ctransformers import AutoModelForCausalLM
-# ---------------- Model (GGUF on CPU) ----------------
-# These defaults work on HF free CPU Spaces.
-REPO_ID  = os.getenv("LLAMA_GGUF_REPO",  "bartowski/Llama-3.2-3B-Instruct-GGUF")
-FILENAME = os.getenv("LLAMA_GGUF_FILE", "Llama-3.2-3B-Instruct-Q5_0.gguf")  # if not found, use Q8_0
-MODEL_TYPE = "llama"
-# lazy-load for fast startup
 _llm = None
 def load_model():
     global _llm
     if _llm is None:
         _llm = AutoModelForCausalLM.from_pretrained(
-            REPO_ID,
-            model_file=FILENAME,
             model_type=MODEL_TYPE,
             gpu_layers=0,
-            context_length=8192,
         )
     return _llm
@@ -46,7 +62,7 @@ def restore(text: str, protected: List[str]):
     text = re.sub(rf"{SENTINEL_OPEN}(\d+){SENTINEL_CLOSE}", unwrap, text)
     return text.replace(SENTINEL_OPEN, "").replace(SENTINEL_CLOSE, "")
-# ---------------- Prompting (Llama 3.x chat template) ----------------
 SYSTEM = (
     "You are an expert editor. Humanize the user's text: improve flow, vary sentence length, "
     "split run-ons, replace stiff phrasing with natural alternatives, and preserve meaning. "
@@ -60,7 +76,6 @@ def build_prompt(text: str, tone: str, region: str, level: str, intensity: int)
         f"Humanization intensity: {intensity} (10 strongest).\n\n"
         f"Rewrite this text. Keep markers intact:\n\n{text}"
     )
-    # Llama 3.x chat format
     return (
         "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n"
         f"{SYSTEM}\n"
@@ -72,38 +87,32 @@ def build_prompt(text: str, tone: str, region: str, level: str, intensity: int)
 def diff_ratio(a: str, b: str) -> float:
     return difflib.SequenceMatcher(None, a, b).ratio()
-def generate_once(prompt: str, temperature: float, max_new: int = 768) -> str:
     llm = load_model()
-    out = llm(
-        prompt,
-        temperature=temperature,
-        top_p=0.95,
-        max_new_tokens=max_new,
-        stop=["<|eot_id|>"]
-    )
-    return out.strip()
-# ---------------- Main humanizer ----------------
 def humanize_core(text: str, tone: str, region: str, level: str, intensity: int):
-    protected_text, bag = protect(text)
-    prompt = build_prompt(protected_text, tone, region, level, intensity)
-    # pass 1 (conservative), pass 2 (stronger) if too similar
-    draft = generate_once(prompt, temperature=0.35)
-    if diff_ratio(protected_text, draft) > 0.97:
-        draft = generate_once(prompt, temperature=0.9)
-    draft = draft.replace("—", "-")
-    final = restore(draft, bag)
-    # ensure all protected spans survived
-    for i, span in enumerate(bag):
-        marker = f"{SENTINEL_OPEN}{i}{SENTINEL_CLOSE}"
-        if marker in protected_text and span not in final:
-            final = final.replace(marker, span)
-    return final
-# ---------------- Gradio UI (and REST at /api/predict/) ----------------
 def ui_humanize(text, tone, region, level, intensity):
     return humanize_core(text, tone, region, level, int(intensity))

+import os, re, difflib, traceback
+from typing import List, Tuple
 import gradio as gr
+from huggingface_hub import hf_hub_download
 from ctransformers import AutoModelForCausalLM
+# ---------------- Auto-pick a valid GGUF ----------------
+CANDIDATES: Tuple[Tuple[str, str], ...] = (
+    ("bartowski/Llama-3.2-3B-Instruct-GGUF", "Llama-3.2-3B-Instruct-Q8_0.gguf"),
+    ("bartowski/Llama-3.2-3B-Instruct-GGUF", "Llama-3.2-3B-Instruct-Q6_K_L.gguf"),
+    ("bartowski/Llama-3.2-3B-Instruct-GGUF", "Llama-3.2-3B-Instruct-Q5_K_M.gguf"),
+    ("bartowski/Llama-3.2-3B-Instruct-GGUF", "Llama-3.2-3B-Instruct-Q4_0.gguf"),
+)
+def resolve_model_file() -> str:
+    last_err = None
+    for repo, fname in CANDIDATES:
+        try:
+            path = hf_hub_download(repo_id=repo, filename=fname)
+            print(f"[Humanizer] Using {repo} :: {fname}")
+            return path
+        except Exception as e:
+            last_err = e
+            print(f"[Humanizer] Could not get {repo}/{fname}: {e}")
+    raise RuntimeError(f"Failed to download any GGUF. Last error: {last_err}")
+MODEL_TYPE = "llama"
 _llm = None
 def load_model():
     global _llm
     if _llm is None:
+        file_path = resolve_model_file()
         _llm = AutoModelForCausalLM.from_pretrained(
+            file_path,           # direct path to the .gguf we just downloaded
             model_type=MODEL_TYPE,
             gpu_layers=0,
+            context_length=4096, # safer on free CPU
         )
     return _llm
     text = re.sub(rf"{SENTINEL_OPEN}(\d+){SENTINEL_CLOSE}", unwrap, text)
     return text.replace(SENTINEL_OPEN, "").replace(SENTINEL_CLOSE, "")
+# ---------------- Prompting ----------------
 SYSTEM = (
     "You are an expert editor. Humanize the user's text: improve flow, vary sentence length, "
     "split run-ons, replace stiff phrasing with natural alternatives, and preserve meaning. "
         f"Humanization intensity: {intensity} (10 strongest).\n\n"
         f"Rewrite this text. Keep markers intact:\n\n{text}"
     )
     return (
         "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n"
         f"{SYSTEM}\n"
 def diff_ratio(a: str, b: str) -> float:
     return difflib.SequenceMatcher(None, a, b).ratio()
+def generate_once(prompt: str, temperature: float, max_new: int = 384) -> str:
     llm = load_model()
+    return llm(prompt, temperature=temperature, top_p=0.95, max_new_tokens=max_new, stop=["<|eot_id|>"]).strip()
+# ---------------- Main ----------------
 def humanize_core(text: str, tone: str, region: str, level: str, intensity: int):
+    try:
+        protected_text, bag = protect(text)
+        prompt = build_prompt(protected_text, tone, region, level, intensity)
+        draft = generate_once(prompt, temperature=0.35)
+        if diff_ratio(protected_text, draft) > 0.97:
+            draft = generate_once(prompt, temperature=0.9)
+        draft = draft.replace("—", "-")
+        final = restore(draft, bag)
+        for i, span in enumerate(bag):
+            marker = f"{SENTINEL_OPEN}{i}{SENTINEL_CLOSE}"
+            if marker in protected_text and span not in final:
+                final = final.replace(marker, span)
+        return final
+    except Exception:
+        return "ERROR:\n" + traceback.format_exc()
+# ---------------- Gradio UI (REST at /api/predict/) ----------------
 def ui_humanize(text, tone, region, level, intensity):
     return humanize_core(text, tone, region, level, int(intensity))