peterweber commited on
Commit
5cc8ffc
·
verified ·
1 Parent(s): 8a9efca

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -13
app.py CHANGED
@@ -1,17 +1,17 @@
1
  import os, re
2
- from typing import List, Optional
3
- from pydantic import BaseModel
4
- from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
5
  import torch
6
  import gradio as gr
 
7
 
8
- # -------- Model (CPU) --------
9
- MODEL_ID = os.getenv("MODEL_ID", "danibor/flan-t5-base-humanizer")
 
10
  device = "cuda" if torch.cuda.is_available() else "cpu"
11
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
12
  model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_ID).to(device).eval()
13
 
14
- # -------- Protect / restore --------
15
  SENTINEL_OPEN = "§§KEEP_OPEN§§"
16
  SENTINEL_CLOSE = "§§KEEP_CLOSE§§"
17
  URL_RE = re.compile(r'(https?://\S+)')
@@ -57,7 +57,7 @@ def build_input(text: str, tone: str, region: str, level: str, intensity: int) -
57
 
58
  def chunk_text(s: str, max_chars: int = 1100):
59
  parts, buf, cur = [], [], 0
60
- for block in re.split(r"(\n{2,})", s):
61
  if cur + len(block) > max_chars and buf:
62
  parts.append("".join(buf)); buf, cur = [block], len(block)
63
  else:
@@ -85,13 +85,14 @@ def humanize_core(text: str, tone: str, region: str, reading_level: str, intensi
85
  outs = [generate_one(ch, max_new=320) for ch in chunks]
86
  draft = "".join(outs).replace("—", "-").strip()
87
  final_text = restore(draft, bag)
 
88
  for i, span in enumerate(bag):
89
  marker = f"{SENTINEL_OPEN}{i}{SENTINEL_CLOSE}"
90
  if marker in protected_text and span not in final_text:
91
  final_text = final_text.replace(marker, span)
92
  return final_text
93
 
94
- # -------- Gradio UI (also enables REST API) --------
95
  def ui_humanize(text, tone, region, reading_level, intensity):
96
  return humanize_core(text, tone, region, reading_level, int(intensity))
97
 
@@ -105,9 +106,6 @@ demo = gr.Interface(
105
  gr.Slider(1, 10, value=6, step=1, label="Humanization intensity"),
106
  ],
107
  outputs=gr.Textbox(label="Humanized"),
108
- title="NoteCraft Humanizer (FLAN-T5)",
109
- description="Model: danibor/flan-t5-base-humanizer REST: POST /api/predict/ with { data: [text,tone,region,level,intensity] }",
110
  ).queue()
111
-
112
- if __name__ == "__main__":
113
- demo.launch()
 
1
  import os, re
2
+ from typing import List
 
 
3
  import torch
4
  import gradio as gr
5
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
6
 
7
+ # ---------------- Model (CPU) ----------------
8
+ # Humaneyes is Pegasus-based (seq2seq). Loads fine on CPU Spaces.
9
+ MODEL_ID = os.getenv("MODEL_ID", "Eemansleepdeprived/Humaneyes")
10
  device = "cuda" if torch.cuda.is_available() else "cpu"
11
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
12
  model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_ID).to(device).eval()
13
 
14
+ # ---------------- Protect / restore ----------------
15
  SENTINEL_OPEN = "§§KEEP_OPEN§§"
16
  SENTINEL_CLOSE = "§§KEEP_CLOSE§§"
17
  URL_RE = re.compile(r'(https?://\S+)')
 
57
 
58
  def chunk_text(s: str, max_chars: int = 1100):
59
  parts, buf, cur = [], [], 0
60
+ for block in re.split(r"(\n{2,})", s): # keep paragraph gaps
61
  if cur + len(block) > max_chars and buf:
62
  parts.append("".join(buf)); buf, cur = [block], len(block)
63
  else:
 
85
  outs = [generate_one(ch, max_new=320) for ch in chunks]
86
  draft = "".join(outs).replace("—", "-").strip()
87
  final_text = restore(draft, bag)
88
+ # ensure protected spans survived (paranoid check)
89
  for i, span in enumerate(bag):
90
  marker = f"{SENTINEL_OPEN}{i}{SENTINEL_CLOSE}"
91
  if marker in protected_text and span not in final_text:
92
  final_text = final_text.replace(marker, span)
93
  return final_text
94
 
95
+ # ---------------- Gradio UI (also exposes REST at /api/predict/) ----------------
96
  def ui_humanize(text, tone, region, reading_level, intensity):
97
  return humanize_core(text, tone, region, reading_level, int(intensity))
98
 
 
106
  gr.Slider(1, 10, value=6, step=1, label="Humanization intensity"),
107
  ],
108
  outputs=gr.Textbox(label="Humanized"),
109
+ title="NoteCraft Humanizer (Humaneyes)",
110
+ description="Model: Eemansleepdeprived/Humaneyes (Pegasus). REST: POST /api/predict/ with { data: [text,tone,region,level,intensity] }",
111
  ).queue()