Spaces:

peterweber
/

notecraftpro-humanizer-api

Running

App Files Files Community

peterweber commited on 12 days ago

Commit

faae576

verified ·

1 Parent(s): 95af4e6

Update app.py

Browse files

Files changed (1) hide show

app.py +56 -46

app.py CHANGED Viewed

@@ -1,24 +1,29 @@
 import os, re, difflib
 from typing import List
-import torch, gradio as gr
-from transformers import AutoTokenizer, AutoModelForCausalLM
-# -------- Model (lazy-load for fast startup) --------
-MODEL_ID = os.getenv("MODEL_ID", "TinyLlama/TinyLlama-1.1B-Chat-v1.0")
-device = "cuda" if torch.cuda.is_available() else "cpu"
-_tok = None
-_mdl = None
 def load_model():
-    global _tok, _mdl
-    if _tok is None or _mdl is None:
-        _tok = AutoTokenizer.from_pretrained(MODEL_ID)
-        _mdl = AutoModelForCausalLM.from_pretrained(
-            MODEL_ID, low_cpu_mem_usage=True, torch_dtype=torch.float32
-        ).to(device).eval()
-    return _tok, _mdl
-# -------- Protect / restore (keep citations, URLs, numbers, code) --------
 SENTINEL_OPEN, SENTINEL_CLOSE = "§§KEEP_OPEN§§", "§§KEEP_CLOSE§§"
 URL_RE  = re.compile(r'(https?://\S+)')
 CODE_RE = re.compile(r'`{1,3}[\s\S]*?`{1,3}')
@@ -41,59 +46,64 @@ def restore(text: str, protected: List[str]):
     text = re.sub(rf"{SENTINEL_OPEN}(\d+){SENTINEL_CLOSE}", unwrap, text)
     return text.replace(SENTINEL_OPEN, "").replace(SENTINEL_CLOSE, "")
-# -------- Humanization engine --------
 SYSTEM = (
-    "You are an expert editor. Humanize the user's text: vary sentence length, remove filler, "
-    "break up long sentences, swap stiff phrasing for natural alternatives, and keep meaning identical. "
-    "Do not alter anything inside §§KEEP markers§§, including citations, URLs, numbers, and code. "
-    "Keep tone & region as requested. No em dashes—use simple punctuation."
 )
-def build_messages(text, tone, region, level, intensity):
     user = (
         f"Tone: {tone}. Region: {region} English. Reading level: {level}. "
         f"Humanization intensity: {intensity} (10 strongest).\n\n"
-        f"Rewrite this text. Keep placeholders intact:\n\n{text}"
     )
-    return [{"role":"system","content":SYSTEM},{"role":"user","content":user}]
-def apply_chat_template(tokenizer, messages):
-    return tokenizer.apply_chat_template(
-        messages, tokenize=False, add_generation_prompt=True
     )
-def generate_once(prompt, temperature, max_new=512):
-    tok, mdl = load_model()
-    ids = tok(prompt, return_tensors="pt").to(device)
-    out = mdl.generate(
-        **ids, do_sample=True, temperature=temperature, top_p=0.95,
-        max_new_tokens=max_new, pad_token_id=tok.eos_token_id
     )
-    return tok.decode(out[0][ids["input_ids"].shape[1]:], skip_special_tokens=True).strip()
-def diff_ratio(a, b): return difflib.SequenceMatcher(None, a, b).ratio()
-def humanize_core(text, tone, region, level, intensity):
     protected_text, bag = protect(text)
-    msgs = build_messages(protected_text, tone, region, level, intensity)
-    prompt = apply_chat_template(load_model()[0], msgs)
-    # pass 1 (conservative), pass 2 (stronger) if output too similar
     draft = generate_once(prompt, temperature=0.35)
     if diff_ratio(protected_text, draft) > 0.97:
         draft = generate_once(prompt, temperature=0.9)
-    draft = draft.replace("—","-")
     final = restore(draft, bag)
-    # ensure protected spans survived
     for i, span in enumerate(bag):
         marker = f"{SENTINEL_OPEN}{i}{SENTINEL_CLOSE}"
         if marker in protected_text and span not in final:
             final = final.replace(marker, span)
     return final
-# -------- Gradio UI (also gives REST at /api/predict/) --------
 def ui_humanize(text, tone, region, level, intensity):
     return humanize_core(text, tone, region, level, int(intensity))
@@ -107,9 +117,9 @@ demo = gr.Interface(
         gr.Slider(1, 10, value=6, step=1, label="Humanization intensity"),
     ],
     outputs=gr.Textbox(label="Humanized"),
-    title="NoteCraft Humanizer (TinyLlama 1.1B Chat)",
     description="REST: POST /api/predict/ with { data: [text,tone,region,level,intensity] }",
 ).queue()
 if __name__ == "__main__":
-    demo.launch()

 import os, re, difflib
 from typing import List
+import gradio as gr
+from ctransformers import AutoModelForCausalLM
+# ---------------- Model (GGUF on CPU) ----------------
+# These defaults work on HF free CPU Spaces.
+REPO_ID  = os.getenv("LLAMA_GGUF_REPO",  "bartowski/Llama-3.2-3B-Instruct-GGUF")
+FILENAME = os.getenv("LLAMA_GGUF_FILE", "Llama-3.2-3B-Instruct-Q5_0.gguf")  # if not found, use Q8_0
+MODEL_TYPE = "llama"
+# lazy-load for fast startup
+_llm = None
 def load_model():
+    global _llm
+    if _llm is None:
+        _llm = AutoModelForCausalLM.from_pretrained(
+            REPO_ID,
+            model_file=FILENAME,
+            model_type=MODEL_TYPE,
+            gpu_layers=0,
+            context_length=8192,
+        )
+    return _llm
+# ---------------- Protect / restore ----------------
 SENTINEL_OPEN, SENTINEL_CLOSE = "§§KEEP_OPEN§§", "§§KEEP_CLOSE§§"
 URL_RE  = re.compile(r'(https?://\S+)')
 CODE_RE = re.compile(r'`{1,3}[\s\S]*?`{1,3}')
     text = re.sub(rf"{SENTINEL_OPEN}(\d+){SENTINEL_CLOSE}", unwrap, text)
     return text.replace(SENTINEL_OPEN, "").replace(SENTINEL_CLOSE, "")
+# ---------------- Prompting (Llama 3.x chat template) ----------------
 SYSTEM = (
+    "You are an expert editor. Humanize the user's text: improve flow, vary sentence length, "
+    "split run-ons, replace stiff phrasing with natural alternatives, and preserve meaning. "
+    "Do NOT alter anything wrapped by §§KEEP_OPEN§§<id>§§KEEP_CLOSE§§ (citations, URLs, numbers, code). "
+    "Keep the requested tone and region. No em dashes—use simple punctuation."
 )
+def build_prompt(text: str, tone: str, region: str, level: str, intensity: int) -> str:
     user = (
         f"Tone: {tone}. Region: {region} English. Reading level: {level}. "
         f"Humanization intensity: {intensity} (10 strongest).\n\n"
+        f"Rewrite this text. Keep markers intact:\n\n{text}"
     )
+    # Llama 3.x chat format
+    return (
+        "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n"
+        f"{SYSTEM}\n"
+        "<|eot_id|><|start_header_id|>user<|end_header_id|>\n"
+        f"{user}\n"
+        "<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n"
     )
+def diff_ratio(a: str, b: str) -> float:
+    return difflib.SequenceMatcher(None, a, b).ratio()
+def generate_once(prompt: str, temperature: float, max_new: int = 768) -> str:
+    llm = load_model()
+    out = llm(
+        prompt,
+        temperature=temperature,
+        top_p=0.95,
+        max_new_tokens=max_new,
+        stop=["<|eot_id|>"]
     )
+    return out.strip()
+# ---------------- Main humanizer ----------------
+def humanize_core(text: str, tone: str, region: str, level: str, intensity: int):
     protected_text, bag = protect(text)
+    prompt = build_prompt(protected_text, tone, region, level, intensity)
+    # pass 1 (conservative), pass 2 (stronger) if too similar
     draft = generate_once(prompt, temperature=0.35)
     if diff_ratio(protected_text, draft) > 0.97:
         draft = generate_once(prompt, temperature=0.9)
+    draft = draft.replace("—", "-")
     final = restore(draft, bag)
+    # ensure all protected spans survived
     for i, span in enumerate(bag):
         marker = f"{SENTINEL_OPEN}{i}{SENTINEL_CLOSE}"
         if marker in protected_text and span not in final:
             final = final.replace(marker, span)
     return final
+# ---------------- Gradio UI (and REST at /api/predict/) ----------------
 def ui_humanize(text, tone, region, level, intensity):
     return humanize_core(text, tone, region, level, int(intensity))
         gr.Slider(1, 10, value=6, step=1, label="Humanization intensity"),
     ],
     outputs=gr.Textbox(label="Humanized"),
+    title="NoteCraft Humanizer (Llama-3.2-3B-Instruct)",
     description="REST: POST /api/predict/ with { data: [text,tone,region,level,intensity] }",
 ).queue()
 if __name__ == "__main__":
+    demo.launch()