peterweber commited on
Commit
0a9384a
·
verified ·
1 Parent(s): faae576

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -42
app.py CHANGED
@@ -1,25 +1,41 @@
1
- import os, re, difflib
2
- from typing import List
3
  import gradio as gr
 
4
  from ctransformers import AutoModelForCausalLM
5
 
6
- # ---------------- Model (GGUF on CPU) ----------------
7
- # These defaults work on HF free CPU Spaces.
8
- REPO_ID = os.getenv("LLAMA_GGUF_REPO", "bartowski/Llama-3.2-3B-Instruct-GGUF")
9
- FILENAME = os.getenv("LLAMA_GGUF_FILE", "Llama-3.2-3B-Instruct-Q5_0.gguf") # if not found, use Q8_0
10
- MODEL_TYPE = "llama"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
- # lazy-load for fast startup
13
  _llm = None
 
14
  def load_model():
15
  global _llm
16
  if _llm is None:
 
17
  _llm = AutoModelForCausalLM.from_pretrained(
18
- REPO_ID,
19
- model_file=FILENAME,
20
  model_type=MODEL_TYPE,
21
  gpu_layers=0,
22
- context_length=8192,
23
  )
24
  return _llm
25
 
@@ -46,7 +62,7 @@ def restore(text: str, protected: List[str]):
46
  text = re.sub(rf"{SENTINEL_OPEN}(\d+){SENTINEL_CLOSE}", unwrap, text)
47
  return text.replace(SENTINEL_OPEN, "").replace(SENTINEL_CLOSE, "")
48
 
49
- # ---------------- Prompting (Llama 3.x chat template) ----------------
50
  SYSTEM = (
51
  "You are an expert editor. Humanize the user's text: improve flow, vary sentence length, "
52
  "split run-ons, replace stiff phrasing with natural alternatives, and preserve meaning. "
@@ -60,7 +76,6 @@ def build_prompt(text: str, tone: str, region: str, level: str, intensity: int)
60
  f"Humanization intensity: {intensity} (10 strongest).\n\n"
61
  f"Rewrite this text. Keep markers intact:\n\n{text}"
62
  )
63
- # Llama 3.x chat format
64
  return (
65
  "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n"
66
  f"{SYSTEM}\n"
@@ -72,38 +87,32 @@ def build_prompt(text: str, tone: str, region: str, level: str, intensity: int)
72
  def diff_ratio(a: str, b: str) -> float:
73
  return difflib.SequenceMatcher(None, a, b).ratio()
74
 
75
- def generate_once(prompt: str, temperature: float, max_new: int = 768) -> str:
76
  llm = load_model()
77
- out = llm(
78
- prompt,
79
- temperature=temperature,
80
- top_p=0.95,
81
- max_new_tokens=max_new,
82
- stop=["<|eot_id|>"]
83
- )
84
- return out.strip()
85
 
86
- # ---------------- Main humanizer ----------------
87
  def humanize_core(text: str, tone: str, region: str, level: str, intensity: int):
88
- protected_text, bag = protect(text)
89
- prompt = build_prompt(protected_text, tone, region, level, intensity)
90
-
91
- # pass 1 (conservative), pass 2 (stronger) if too similar
92
- draft = generate_once(prompt, temperature=0.35)
93
- if diff_ratio(protected_text, draft) > 0.97:
94
- draft = generate_once(prompt, temperature=0.9)
95
-
96
- draft = draft.replace("—", "-")
97
- final = restore(draft, bag)
98
-
99
- # ensure all protected spans survived
100
- for i, span in enumerate(bag):
101
- marker = f"{SENTINEL_OPEN}{i}{SENTINEL_CLOSE}"
102
- if marker in protected_text and span not in final:
103
- final = final.replace(marker, span)
104
- return final
105
-
106
- # ---------------- Gradio UI (and REST at /api/predict/) ----------------
 
107
  def ui_humanize(text, tone, region, level, intensity):
108
  return humanize_core(text, tone, region, level, int(intensity))
109
 
 
1
+ import os, re, difflib, traceback
2
+ from typing import List, Tuple
3
  import gradio as gr
4
+ from huggingface_hub import hf_hub_download
5
  from ctransformers import AutoModelForCausalLM
6
 
7
+ # ---------------- Auto-pick a valid GGUF ----------------
8
+ CANDIDATES: Tuple[Tuple[str, str], ...] = (
9
+ ("bartowski/Llama-3.2-3B-Instruct-GGUF", "Llama-3.2-3B-Instruct-Q8_0.gguf"),
10
+ ("bartowski/Llama-3.2-3B-Instruct-GGUF", "Llama-3.2-3B-Instruct-Q6_K_L.gguf"),
11
+ ("bartowski/Llama-3.2-3B-Instruct-GGUF", "Llama-3.2-3B-Instruct-Q5_K_M.gguf"),
12
+ ("bartowski/Llama-3.2-3B-Instruct-GGUF", "Llama-3.2-3B-Instruct-Q4_0.gguf"),
13
+ )
14
+
15
+ def resolve_model_file() -> str:
16
+ last_err = None
17
+ for repo, fname in CANDIDATES:
18
+ try:
19
+ path = hf_hub_download(repo_id=repo, filename=fname)
20
+ print(f"[Humanizer] Using {repo} :: {fname}")
21
+ return path
22
+ except Exception as e:
23
+ last_err = e
24
+ print(f"[Humanizer] Could not get {repo}/{fname}: {e}")
25
+ raise RuntimeError(f"Failed to download any GGUF. Last error: {last_err}")
26
 
27
+ MODEL_TYPE = "llama"
28
  _llm = None
29
+
30
  def load_model():
31
  global _llm
32
  if _llm is None:
33
+ file_path = resolve_model_file()
34
  _llm = AutoModelForCausalLM.from_pretrained(
35
+ file_path, # direct path to the .gguf we just downloaded
 
36
  model_type=MODEL_TYPE,
37
  gpu_layers=0,
38
+ context_length=4096, # safer on free CPU
39
  )
40
  return _llm
41
 
 
62
  text = re.sub(rf"{SENTINEL_OPEN}(\d+){SENTINEL_CLOSE}", unwrap, text)
63
  return text.replace(SENTINEL_OPEN, "").replace(SENTINEL_CLOSE, "")
64
 
65
+ # ---------------- Prompting ----------------
66
  SYSTEM = (
67
  "You are an expert editor. Humanize the user's text: improve flow, vary sentence length, "
68
  "split run-ons, replace stiff phrasing with natural alternatives, and preserve meaning. "
 
76
  f"Humanization intensity: {intensity} (10 strongest).\n\n"
77
  f"Rewrite this text. Keep markers intact:\n\n{text}"
78
  )
 
79
  return (
80
  "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n"
81
  f"{SYSTEM}\n"
 
87
  def diff_ratio(a: str, b: str) -> float:
88
  return difflib.SequenceMatcher(None, a, b).ratio()
89
 
90
+ def generate_once(prompt: str, temperature: float, max_new: int = 384) -> str:
91
  llm = load_model()
92
+ return llm(prompt, temperature=temperature, top_p=0.95, max_new_tokens=max_new, stop=["<|eot_id|>"]).strip()
 
 
 
 
 
 
 
93
 
94
+ # ---------------- Main ----------------
95
  def humanize_core(text: str, tone: str, region: str, level: str, intensity: int):
96
+ try:
97
+ protected_text, bag = protect(text)
98
+ prompt = build_prompt(protected_text, tone, region, level, intensity)
99
+
100
+ draft = generate_once(prompt, temperature=0.35)
101
+ if diff_ratio(protected_text, draft) > 0.97:
102
+ draft = generate_once(prompt, temperature=0.9)
103
+
104
+ draft = draft.replace("—", "-")
105
+ final = restore(draft, bag)
106
+
107
+ for i, span in enumerate(bag):
108
+ marker = f"{SENTINEL_OPEN}{i}{SENTINEL_CLOSE}"
109
+ if marker in protected_text and span not in final:
110
+ final = final.replace(marker, span)
111
+ return final
112
+ except Exception:
113
+ return "ERROR:\n" + traceback.format_exc()
114
+
115
+ # ---------------- Gradio UI (REST at /api/predict/) ----------------
116
  def ui_humanize(text, tone, region, level, intensity):
117
  return humanize_core(text, tone, region, level, int(intensity))
118